43 lines
5.2 KiB
Plaintext
43 lines
5.2 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"\n# A demo for multi-output regression\n\nThe demo is adopted from scikit-learn:\n\nhttps://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py\n\nSee :doc:`/tutorials/multioutput` for more information.\n\n<div class=\"alert alert-info\"><h4>Note</h4><p>The feature is experimental. For the `multi_output_tree` strategy, many features are\n missing.</p></div>\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"collapsed": false
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"import argparse\nfrom typing import Dict, List, Tuple\n\nimport numpy as np\nfrom matplotlib import pyplot as plt\n\nimport xgboost as xgb\n\n\ndef plot_predt(y: np.ndarray, y_predt: np.ndarray, name: str) -> None:\n s = 25\n plt.scatter(y[:, 0], y[:, 1], c=\"navy\", s=s, edgecolor=\"black\", label=\"data\")\n plt.scatter(\n y_predt[:, 0], y_predt[:, 1], c=\"cornflowerblue\", s=s, edgecolor=\"black\"\n )\n plt.xlim([-1, 2])\n plt.ylim([-1, 2])\n plt.show()\n\n\ndef gen_circle() -> Tuple[np.ndarray, np.ndarray]:\n \"Generate a sample dataset that y is a 2 dim circle.\"\n rng = np.random.RandomState(1994)\n X = np.sort(200 * rng.rand(100, 1) - 100, axis=0)\n y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T\n y[::5, :] += 0.5 - rng.rand(20, 2)\n y = y - y.min()\n y = y / y.max()\n return X, y\n\n\ndef rmse_model(plot_result: bool, strategy: str) -> None:\n \"\"\"Draw a circle with 2-dim coordinate as target variables.\"\"\"\n X, y = gen_circle()\n # Train a regressor on it\n reg = xgb.XGBRegressor(\n tree_method=\"hist\",\n n_estimators=128,\n n_jobs=16,\n max_depth=8,\n multi_strategy=strategy,\n subsample=0.6,\n )\n reg.fit(X, y, eval_set=[(X, y)])\n\n y_predt = reg.predict(X)\n if plot_result:\n plot_predt(y, y_predt, \"multi\")\n\n\ndef custom_rmse_model(plot_result: bool, strategy: str) -> None:\n \"\"\"Train using Python implementation of Squared Error.\"\"\"\n\n # As the experimental support status, custom objective doesn't support matrix as\n # gradient and hessian, which will be changed in future release.\n def gradient(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:\n \"\"\"Compute the gradient squared error.\"\"\"\n y = dtrain.get_label().reshape(predt.shape)\n return (predt - y).reshape(y.size)\n\n def hessian(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:\n \"\"\"Compute the hessian for squared error.\"\"\"\n return np.ones(predt.shape).reshape(predt.size)\n\n def squared_log(\n predt: np.ndarray, dtrain: xgb.DMatrix\n ) -> Tuple[np.ndarray, np.ndarray]:\n grad = gradient(predt, dtrain)\n hess = hessian(predt, dtrain)\n return grad, hess\n\n def rmse(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:\n y = dtrain.get_label().reshape(predt.shape)\n v = np.sqrt(np.sum(np.power(y - predt, 2)))\n return \"PyRMSE\", v\n\n X, y = gen_circle()\n Xy = xgb.DMatrix(X, y)\n results: Dict[str, Dict[str, List[float]]] = {}\n # Make sure the `num_target` is passed to XGBoost when custom objective is used.\n # When builtin objective is used, XGBoost can figure out the number of targets\n # automatically.\n booster = xgb.train(\n {\n \"tree_method\": \"hist\",\n \"num_target\": y.shape[1],\n \"multi_strategy\": strategy,\n },\n dtrain=Xy,\n num_boost_round=128,\n obj=squared_log,\n evals=[(Xy, \"Train\")],\n evals_result=results,\n custom_metric=rmse,\n )\n\n y_predt = booster.inplace_predict(X)\n if plot_result:\n plot_predt(y, y_predt, \"multi\")\n\n\nif __name__ == \"__main__\":\n parser = argparse.ArgumentParser()\n parser.add_argument(\"--plot\", choices=[0, 1], type=int, default=1)\n args = parser.parse_args()\n\n # Train with builtin RMSE objective\n # - One model per output.\n rmse_model(args.plot == 1, \"one_output_per_tree\")\n # - One model for all outputs, this is still working in progress, many features are\n # missing.\n rmse_model(args.plot == 1, \"multi_output_tree\")\n\n # Train with custom objective.\n # - One model per output.\n custom_rmse_model(args.plot == 1, \"one_output_per_tree\")\n # - One model for all outputs, this is still working in progress, many features are\n # missing.\n custom_rmse_model(args.plot == 1, \"multi_output_tree\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.18"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 0
|
|
} |