coal_materials/multioutput_regression.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "\n",
    "# A demo for multi-output regression\n",
    "\n",
    "The demo is adopted from scikit-learn:\n",
    "\n",
    "https://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py\n",
    "\n",
    "See :doc:`/tutorials/multioutput` for more information.\n",
    "\n",
    "<div class=\"alert alert-info\"><h4>Note</h4><p>The feature is experimental. For the `multi_output_tree` strategy, many features are\n",
    "    missing.</p></div>\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false,
    "jupyter": {
     "outputs_hidden": false
    }
   },
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'xgboost'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[1], line 7\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[1;32m      5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pyplot \u001b[38;5;28;01mas\u001b[39;00m plt\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mxgboost\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mxgb\u001b[39;00m\n\u001b[1;32m     10\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mplot_predt\u001b[39m(y: np\u001b[38;5;241m.\u001b[39mndarray, y_predt: np\u001b[38;5;241m.\u001b[39mndarray, name: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m     11\u001b[0m     s \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m25\u001b[39m\n",
      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'xgboost'"
     ]
    }
   ],
   "source": [
    "import argparse\n",
    "from typing import Dict, List, Tuple\n",
    "\n",
    "import numpy as np\n",
    "from matplotlib import pyplot as plt\n",
    "\n",
    "import xgboost as xgb\n",
    "\n",
    "\n",
    "def plot_predt(y: np.ndarray, y_predt: np.ndarray, name: str) -> None:\n",
    "    s = 25\n",
    "    plt.scatter(y[:, 0], y[:, 1], c=\"navy\", s=s, edgecolor=\"black\", label=\"data\")\n",
    "    plt.scatter(\n",
    "        y_predt[:, 0], y_predt[:, 1], c=\"cornflowerblue\", s=s, edgecolor=\"black\"\n",
    "    )\n",
    "    plt.xlim([-1, 2])\n",
    "    plt.ylim([-1, 2])\n",
    "    plt.show()\n",
    "\n",
    "\n",
    "def gen_circle() -> Tuple[np.ndarray, np.ndarray]:\n",
    "    \"Generate a sample dataset that y is a 2 dim circle.\"\n",
    "    rng = np.random.RandomState(1994)\n",
    "    X = np.sort(200 * rng.rand(100, 1) - 100, axis=0)\n",
    "    y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T\n",
    "    y[::5, :] += 0.5 - rng.rand(20, 2)\n",
    "    y = y - y.min()\n",
    "    y = y / y.max()\n",
    "    return X, y\n",
    "\n",
    "\n",
    "def rmse_model(plot_result: bool, strategy: str) -> None:\n",
    "    \"\"\"Draw a circle with 2-dim coordinate as target variables.\"\"\"\n",
    "    X, y = gen_circle()\n",
    "    # Train a regressor on it\n",
    "    reg = xgb.XGBRegressor(\n",
    "        tree_method=\"hist\",\n",
    "        n_estimators=128,\n",
    "        n_jobs=16,\n",
    "        max_depth=8,\n",
    "        multi_strategy=strategy,\n",
    "        subsample=0.6,\n",
    "    )\n",
    "    reg.fit(X, y, eval_set=[(X, y)])\n",
    "\n",
    "    y_predt = reg.predict(X)\n",
    "    if plot_result:\n",
    "        plot_predt(y, y_predt, \"multi\")\n",
    "\n",
    "\n",
    "def custom_rmse_model(plot_result: bool, strategy: str) -> None:\n",
    "    \"\"\"Train using Python implementation of Squared Error.\"\"\"\n",
    "\n",
    "    # As the experimental support status, custom objective doesn't support matrix as\n",
    "    # gradient and hessian, which will be changed in future release.\n",
    "    def gradient(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:\n",
    "        \"\"\"Compute the gradient squared error.\"\"\"\n",
    "        y = dtrain.get_label().reshape(predt.shape)\n",
    "        return (predt - y).reshape(y.size)\n",
    "\n",
    "    def hessian(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:\n",
    "        \"\"\"Compute the hessian for squared error.\"\"\"\n",
    "        return np.ones(predt.shape).reshape(predt.size)\n",
    "\n",
    "    def squared_log(\n",
    "        predt: np.ndarray, dtrain: xgb.DMatrix\n",
    "    ) -> Tuple[np.ndarray, np.ndarray]:\n",
    "        grad = gradient(predt, dtrain)\n",
    "        hess = hessian(predt, dtrain)\n",
    "        return grad, hess\n",
    "\n",
    "    def rmse(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:\n",
    "        y = dtrain.get_label().reshape(predt.shape)\n",
    "        v = np.sqrt(np.sum(np.power(y - predt, 2)))\n",
    "        return \"PyRMSE\", v\n",
    "\n",
    "    X, y = gen_circle()\n",
    "    Xy = xgb.DMatrix(X, y)\n",
    "    results: Dict[str, Dict[str, List[float]]] = {}\n",
    "    # Make sure the `num_target` is passed to XGBoost when custom objective is used.\n",
    "    # When builtin objective is used, XGBoost can figure out the number of targets\n",
    "    # automatically.\n",
    "    booster = xgb.train(\n",
    "        {\n",
    "            \"tree_method\": \"hist\",\n",
    "            \"num_target\": y.shape[1],\n",
    "            \"multi_strategy\": strategy,\n",
    "        },\n",
    "        dtrain=Xy,\n",
    "        num_boost_round=128,\n",
    "        obj=squared_log,\n",
    "        evals=[(Xy, \"Train\")],\n",
    "        evals_result=results,\n",
    "        custom_metric=rmse,\n",
    "    )\n",
    "\n",
    "    y_predt = booster.inplace_predict(X)\n",
    "    if plot_result:\n",
    "        plot_predt(y, y_predt, \"multi\")\n",
    "\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    parser = argparse.ArgumentParser()\n",
    "    parser.add_argument(\"--plot\", choices=[0, 1], type=int, default=1)\n",
    "    args = parser.parse_args()\n",
    "\n",
    "    # Train with builtin RMSE objective\n",
    "    # - One model per output.\n",
    "    rmse_model(args.plot == 1, \"one_output_per_tree\")\n",
    "    # - One model for all outputs, this is still working in progress, many features are\n",
    "    # missing.\n",
    "    rmse_model(args.plot == 1, \"multi_output_tree\")\n",
    "\n",
    "    # Train with custom objective.\n",
    "    # - One model per output.\n",
    "    custom_rmse_model(args.plot == 1, \"one_output_per_tree\")\n",
    "    # - One model for all outputs, this is still working in progress, many features are\n",
    "    # missing.\n",
    "    custom_rmse_model(args.plot == 1, \"multi_output_tree\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
initial repo 2024-01-05 09:12:27 +08:00			`{`
			`"cells": [`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"\n",`
			`"# A demo for multi-output regression\n",`
			`"\n",`
			`"The demo is adopted from scikit-learn:\n",`
			`"\n",`
			`"https://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py\n",`
			`"\n",`
			"See :doc:`/tutorials/multioutput` for more information.\n",
			`"\n",`
			"<div class=\"alert alert-info\"><h4>Note</h4><p>The feature is experimental. For the `multi_output_tree` strategy, many features are\n",
			`" missing.</p></div>\n"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 1,`
			`"metadata": {`
			`"collapsed": false,`
			`"jupyter": {`
			`"outputs_hidden": false`
			`}`
			`},`
			`"outputs": [`
			`{`
			`"ename": "ModuleNotFoundError",`
			`"evalue": "No module named 'xgboost'",`
			`"output_type": "error",`
			`"traceback": [`
			`"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",`
			`"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",`
			"Cell \u001b[0;32mIn[1], line 7\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pyplot \u001b[38;5;28;01mas\u001b[39;00m plt\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mxgboost\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mxgb\u001b[39;00m\n\u001b[1;32m 10\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mplot_predt\u001b[39m(y: np\u001b[38;5;241m.\u001b[39mndarray, y_predt: np\u001b[38;5;241m.\u001b[39mndarray, name: \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 11\u001b[0m s \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m25\u001b[39m\n",
			`"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'xgboost'"`
			`]`
			`}`
			`],`
			`"source": [`
			`"import argparse\n",`
			`"from typing import Dict, List, Tuple\n",`
			`"\n",`
			`"import numpy as np\n",`
			`"from matplotlib import pyplot as plt\n",`
			`"\n",`
			`"import xgboost as xgb\n",`
			`"\n",`
			`"\n",`
			`"def plot_predt(y: np.ndarray, y_predt: np.ndarray, name: str) -> None:\n",`
			`" s = 25\n",`
			`" plt.scatter(y[:, 0], y[:, 1], c=\"navy\", s=s, edgecolor=\"black\", label=\"data\")\n",`
			`" plt.scatter(\n",`
			`" y_predt[:, 0], y_predt[:, 1], c=\"cornflowerblue\", s=s, edgecolor=\"black\"\n",`
			`" )\n",`
			`" plt.xlim([-1, 2])\n",`
			`" plt.ylim([-1, 2])\n",`
			`" plt.show()\n",`
			`"\n",`
			`"\n",`
			`"def gen_circle() -> Tuple[np.ndarray, np.ndarray]:\n",`
			`" \"Generate a sample dataset that y is a 2 dim circle.\"\n",`
			`" rng = np.random.RandomState(1994)\n",`
			`" X = np.sort(200 * rng.rand(100, 1) - 100, axis=0)\n",`
			`" y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T\n",`
			`" y[::5, :] += 0.5 - rng.rand(20, 2)\n",`
			`" y = y - y.min()\n",`
			`" y = y / y.max()\n",`
			`" return X, y\n",`
			`"\n",`
			`"\n",`
			`"def rmse_model(plot_result: bool, strategy: str) -> None:\n",`
			`" \"\"\"Draw a circle with 2-dim coordinate as target variables.\"\"\"\n",`
			`" X, y = gen_circle()\n",`
			`" # Train a regressor on it\n",`
			`" reg = xgb.XGBRegressor(\n",`
			`" tree_method=\"hist\",\n",`
			`" n_estimators=128,\n",`
			`" n_jobs=16,\n",`
			`" max_depth=8,\n",`
			`" multi_strategy=strategy,\n",`
			`" subsample=0.6,\n",`
			`" )\n",`
			`" reg.fit(X, y, eval_set=[(X, y)])\n",`
			`"\n",`
			`" y_predt = reg.predict(X)\n",`
			`" if plot_result:\n",`
			`" plot_predt(y, y_predt, \"multi\")\n",`
			`"\n",`
			`"\n",`
			`"def custom_rmse_model(plot_result: bool, strategy: str) -> None:\n",`
			`" \"\"\"Train using Python implementation of Squared Error.\"\"\"\n",`
			`"\n",`
			`" # As the experimental support status, custom objective doesn't support matrix as\n",`
			`" # gradient and hessian, which will be changed in future release.\n",`
			`" def gradient(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:\n",`
			`" \"\"\"Compute the gradient squared error.\"\"\"\n",`
			`" y = dtrain.get_label().reshape(predt.shape)\n",`
			`" return (predt - y).reshape(y.size)\n",`
			`"\n",`
			`" def hessian(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:\n",`
			`" \"\"\"Compute the hessian for squared error.\"\"\"\n",`
			`" return np.ones(predt.shape).reshape(predt.size)\n",`
			`"\n",`
			`" def squared_log(\n",`
			`" predt: np.ndarray, dtrain: xgb.DMatrix\n",`
			`" ) -> Tuple[np.ndarray, np.ndarray]:\n",`
			`" grad = gradient(predt, dtrain)\n",`
			`" hess = hessian(predt, dtrain)\n",`
			`" return grad, hess\n",`
			`"\n",`
			`" def rmse(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:\n",`
			`" y = dtrain.get_label().reshape(predt.shape)\n",`
			`" v = np.sqrt(np.sum(np.power(y - predt, 2)))\n",`
			`" return \"PyRMSE\", v\n",`
			`"\n",`
			`" X, y = gen_circle()\n",`
			`" Xy = xgb.DMatrix(X, y)\n",`
			`" results: Dict[str, Dict[str, List[float]]] = {}\n",`
			" # Make sure the `num_target` is passed to XGBoost when custom objective is used.\n",
			`" # When builtin objective is used, XGBoost can figure out the number of targets\n",`
			`" # automatically.\n",`
			`" booster = xgb.train(\n",`
			`" {\n",`
			`" \"tree_method\": \"hist\",\n",`
			`" \"num_target\": y.shape[1],\n",`
			`" \"multi_strategy\": strategy,\n",`
			`" },\n",`
			`" dtrain=Xy,\n",`
			`" num_boost_round=128,\n",`
			`" obj=squared_log,\n",`
			`" evals=[(Xy, \"Train\")],\n",`
			`" evals_result=results,\n",`
			`" custom_metric=rmse,\n",`
			`" )\n",`
			`"\n",`
			`" y_predt = booster.inplace_predict(X)\n",`
			`" if plot_result:\n",`
			`" plot_predt(y, y_predt, \"multi\")\n",`
			`"\n",`
			`"\n",`
			`"if __name__ == \"__main__\":\n",`
			`" parser = argparse.ArgumentParser()\n",`
			`" parser.add_argument(\"--plot\", choices=[0, 1], type=int, default=1)\n",`
			`" args = parser.parse_args()\n",`
			`"\n",`
			`" # Train with builtin RMSE objective\n",`
			`" # - One model per output.\n",`
			`" rmse_model(args.plot == 1, \"one_output_per_tree\")\n",`
			`" # - One model for all outputs, this is still working in progress, many features are\n",`
			`" # missing.\n",`
			`" rmse_model(args.plot == 1, \"multi_output_tree\")\n",`
			`"\n",`
			`" # Train with custom objective.\n",`
			`" # - One model per output.\n",`
			`" custom_rmse_model(args.plot == 1, \"one_output_per_tree\")\n",`
			`" # - One model for all outputs, this is still working in progress, many features are\n",`
			`" # missing.\n",`
			`" custom_rmse_model(args.plot == 1, \"multi_output_tree\")"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": []`
			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
			`"display_name": "Python 3 (ipykernel)",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
添加电容预测 2024-06-18 10:19:35 +08:00			`"version": "3.8.18"`
initial repo 2024-01-05 09:12:27 +08:00			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 4`
			`}`