{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "# A demo for multi-output regression\n",
    "\n",
    "The demo is adopted from scikit-learn:\n",
    "\n",
    "https://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py\n",
    "\n",
    "See :doc:`/tutorials/multioutput` for more information.\n",
    "\n",
    "<div class=\"alert alert-info\"><h4>Note</h4><p>The feature is experimental. For the `multi_output_tree` strategy, many features are\n",
    "    missing.</p></div>\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'xgboost'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[1], line 7\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[1;32m      5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pyplot \u001b[38;5;28;01mas\u001b[39;00m plt\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mxgboost\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mxgb\u001b[39;00m\n\u001b[1;32m     10\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mplot_predt\u001b[39m(y: np\u001b[38;5;241m.\u001b[39mndarray, y_predt: np\u001b[38;5;241m.\u001b[39mndarray, name: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m     11\u001b[0m     s \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m25\u001b[39m\n",
      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'xgboost'"
     ]
    }
   ],
   "source": [
    "import argparse\n",
    "from typing import Dict, List, Tuple\n",
    "\n",
    "import numpy as np\n",
    "from matplotlib import pyplot as plt\n",
    "\n",
    "import xgboost as xgb\n",
    "\n",
    "\n",
    "def plot_predt(y: np.ndarray, y_predt: np.ndarray, name: str) -> None:\n",
    "    s = 25\n",
    "    plt.scatter(y[:, 0], y[:, 1], c=\"navy\", s=s, edgecolor=\"black\", label=\"data\")\n",
    "    plt.scatter(\n",
    "        y_predt[:, 0], y_predt[:, 1], c=\"cornflowerblue\", s=s, edgecolor=\"black\"\n",
    "    )\n",
    "    plt.xlim([-1, 2])\n",
    "    plt.ylim([-1, 2])\n",
    "    plt.show()\n",
    "\n",
    "\n",
    "def gen_circle() -> Tuple[np.ndarray, np.ndarray]:\n",
    "    \"Generate a sample dataset that y is a 2 dim circle.\"\n",
    "    rng = np.random.RandomState(1994)\n",
    "    X = np.sort(200 * rng.rand(100, 1) - 100, axis=0)\n",
    "    y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T\n",
    "    y[::5, :] += 0.5 - rng.rand(20, 2)\n",
    "    y = y - y.min()\n",
    "    y = y / y.max()\n",
    "    return X, y\n",
    "\n",
    "\n",
    "def rmse_model(plot_result: bool, strategy: str) -> None:\n",
    "    \"\"\"Draw a circle with 2-dim coordinate as target variables.\"\"\"\n",
    "    X, y = gen_circle()\n",
    "    # Train a regressor on it\n",
    "    reg = xgb.XGBRegressor(\n",
    "        tree_method=\"hist\",\n",
    "        n_estimators=128,\n",
    "        n_jobs=16,\n",
    "        max_depth=8,\n",
    "        multi_strategy=strategy,\n",
    "        subsample=0.6,\n",
    "    )\n",
    "    reg.fit(X, y, eval_set=[(X, y)])\n",
    "\n",
    "    y_predt = reg.predict(X)\n",
    "    if plot_result:\n",
    "        plot_predt(y, y_predt, \"multi\")\n",
    "\n",
    "\n",
    "def custom_rmse_model(plot_result: bool, strategy: str) -> None:\n",
    "    \"\"\"Train using Python implementation of Squared Error.\"\"\"\n",
    "\n",
    "    # As the experimental support status, custom objective doesn't support matrix as\n",
    "    # gradient and hessian, which will be changed in future release.\n",
    "    def gradient(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:\n",
    "        \"\"\"Compute the gradient squared error.\"\"\"\n",
    "        y = dtrain.get_label().reshape(predt.shape)\n",
    "        return (predt - y).reshape(y.size)\n",
    "\n",
    "    def hessian(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:\n",
    "        \"\"\"Compute the hessian for squared error.\"\"\"\n",
    "        return np.ones(predt.shape).reshape(predt.size)\n",
    "\n",
    "    def squared_log(\n",
    "        predt: np.ndarray, dtrain: xgb.DMatrix\n",
    "    ) -> Tuple[np.ndarray, np.ndarray]:\n",
    "        grad = gradient(predt, dtrain)\n",
    "        hess = hessian(predt, dtrain)\n",
    "        return grad, hess\n",
    "\n",
    "    def rmse(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:\n",
    "        y = dtrain.get_label().reshape(predt.shape)\n",
    "        v = np.sqrt(np.sum(np.power(y - predt, 2)))\n",
    "        return \"PyRMSE\", v\n",
    "\n",
    "    X, y = gen_circle()\n",
    "    Xy = xgb.DMatrix(X, y)\n",
    "    results: Dict[str, Dict[str, List[float]]] = {}\n",
    "    # Make sure the `num_target` is passed to XGBoost when custom objective is used.\n",
    "    # When builtin objective is used, XGBoost can figure out the number of targets\n",
    "    # automatically.\n",
    "    booster = xgb.train(\n",
    "        {\n",
    "            \"tree_method\": \"hist\",\n",
    "            \"num_target\": y.shape[1],\n",
    "            \"multi_strategy\": strategy,\n",
    "        },\n",
    "        dtrain=Xy,\n",
    "        num_boost_round=128,\n",
    "        obj=squared_log,\n",
    "        evals=[(Xy, \"Train\")],\n",
    "        evals_result=results,\n",
    "        custom_metric=rmse,\n",
    "    )\n",
    "\n",
    "    y_predt = booster.inplace_predict(X)\n",
    "    if plot_result:\n",
    "        plot_predt(y, y_predt, \"multi\")\n",
    "\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    parser = argparse.ArgumentParser()\n",
    "    parser.add_argument(\"--plot\", choices=[0, 1], type=int, default=1)\n",
    "    args = parser.parse_args()\n",
    "\n",
    "    # Train with builtin RMSE objective\n",
    "    # - One model per output.\n",
    "    rmse_model(args.plot == 1, \"one_output_per_tree\")\n",
    "    # - One model for all outputs, this is still working in progress, many features are\n",
    "    # missing.\n",
    "    rmse_model(args.plot == 1, \"multi_output_tree\")\n",
    "\n",
    "    # Train with custom objective.\n",
    "    # - One model per output.\n",
    "    custom_rmse_model(args.plot == 1, \"one_output_per_tree\")\n",
    "    # - One model for all outputs, this is still working in progress, many features are\n",
    "    # missing.\n",
    "    custom_rmse_model(args.plot == 1, \"multi_output_tree\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}