{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "# A demo for multi-output regression\n", "\n", "The demo is adopted from scikit-learn:\n", "\n", "https://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py\n", "\n", "See :doc:`/tutorials/multioutput` for more information.\n", "\n", "

Note

The feature is experimental. For the `multi_output_tree` strategy, many features are\n", " missing.

\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false } }, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'xgboost'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[1], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pyplot \u001b[38;5;28;01mas\u001b[39;00m plt\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mxgboost\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mxgb\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mplot_predt\u001b[39m(y: np\u001b[38;5;241m.\u001b[39mndarray, y_predt: np\u001b[38;5;241m.\u001b[39mndarray, name: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 11\u001b[0m s \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m25\u001b[39m\n", "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'xgboost'" ] } ], "source": [ "import argparse\n", "from typing import Dict, List, Tuple\n", "\n", "import numpy as np\n", "from matplotlib import pyplot as plt\n", "\n", "import xgboost as xgb\n", "\n", "\n", "def plot_predt(y: np.ndarray, y_predt: np.ndarray, name: str) -> None:\n", " s = 25\n", " plt.scatter(y[:, 0], y[:, 1], c=\"navy\", s=s, edgecolor=\"black\", label=\"data\")\n", " plt.scatter(\n", " y_predt[:, 0], y_predt[:, 1], c=\"cornflowerblue\", s=s, edgecolor=\"black\"\n", " )\n", " plt.xlim([-1, 2])\n", " plt.ylim([-1, 2])\n", " plt.show()\n", "\n", "\n", "def gen_circle() -> Tuple[np.ndarray, np.ndarray]:\n", " \"Generate a sample dataset that y is a 2 dim circle.\"\n", " rng = np.random.RandomState(1994)\n", " X = np.sort(200 * rng.rand(100, 1) - 100, axis=0)\n", " y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T\n", " y[::5, :] += 0.5 - rng.rand(20, 2)\n", " y = y - y.min()\n", " y = y / y.max()\n", " return X, y\n", "\n", "\n", "def rmse_model(plot_result: bool, strategy: str) -> None:\n", " \"\"\"Draw a circle with 2-dim coordinate as target variables.\"\"\"\n", " X, y = gen_circle()\n", " # Train a regressor on it\n", " reg = xgb.XGBRegressor(\n", " tree_method=\"hist\",\n", " n_estimators=128,\n", " n_jobs=16,\n", " max_depth=8,\n", " multi_strategy=strategy,\n", " subsample=0.6,\n", " )\n", " reg.fit(X, y, eval_set=[(X, y)])\n", "\n", " y_predt = reg.predict(X)\n", " if plot_result:\n", " plot_predt(y, y_predt, \"multi\")\n", "\n", "\n", "def custom_rmse_model(plot_result: bool, strategy: str) -> None:\n", " \"\"\"Train using Python implementation of Squared Error.\"\"\"\n", "\n", " # As the experimental support status, custom objective doesn't support matrix as\n", " # gradient and hessian, which will be changed in future release.\n", " def gradient(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:\n", " \"\"\"Compute the gradient squared error.\"\"\"\n", " y = dtrain.get_label().reshape(predt.shape)\n", " return (predt - y).reshape(y.size)\n", "\n", " def hessian(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:\n", " \"\"\"Compute the hessian for squared error.\"\"\"\n", " return np.ones(predt.shape).reshape(predt.size)\n", "\n", " def squared_log(\n", " predt: np.ndarray, dtrain: xgb.DMatrix\n", " ) -> Tuple[np.ndarray, np.ndarray]:\n", " grad = gradient(predt, dtrain)\n", " hess = hessian(predt, dtrain)\n", " return grad, hess\n", "\n", " def rmse(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:\n", " y = dtrain.get_label().reshape(predt.shape)\n", " v = np.sqrt(np.sum(np.power(y - predt, 2)))\n", " return \"PyRMSE\", v\n", "\n", " X, y = gen_circle()\n", " Xy = xgb.DMatrix(X, y)\n", " results: Dict[str, Dict[str, List[float]]] = {}\n", " # Make sure the `num_target` is passed to XGBoost when custom objective is used.\n", " # When builtin objective is used, XGBoost can figure out the number of targets\n", " # automatically.\n", " booster = xgb.train(\n", " {\n", " \"tree_method\": \"hist\",\n", " \"num_target\": y.shape[1],\n", " \"multi_strategy\": strategy,\n", " },\n", " dtrain=Xy,\n", " num_boost_round=128,\n", " obj=squared_log,\n", " evals=[(Xy, \"Train\")],\n", " evals_result=results,\n", " custom_metric=rmse,\n", " )\n", "\n", " y_predt = booster.inplace_predict(X)\n", " if plot_result:\n", " plot_predt(y, y_predt, \"multi\")\n", "\n", "\n", "if __name__ == \"__main__\":\n", " parser = argparse.ArgumentParser()\n", " parser.add_argument(\"--plot\", choices=[0, 1], type=int, default=1)\n", " args = parser.parse_args()\n", "\n", " # Train with builtin RMSE objective\n", " # - One model per output.\n", " rmse_model(args.plot == 1, \"one_output_per_tree\")\n", " # - One model for all outputs, this is still working in progress, many features are\n", " # missing.\n", " rmse_model(args.plot == 1, \"multi_output_tree\")\n", "\n", " # Train with custom objective.\n", " # - One model per output.\n", " custom_rmse_model(args.plot == 1, \"one_output_per_tree\")\n", " # - One model for all outputs, this is still working in progress, many features are\n", " # missing.\n", " custom_rmse_model(args.plot == 1, \"multi_output_tree\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.16" } }, "nbformat": 4, "nbformat_minor": 4 }