coal_materials/.ipynb_checkpoints/multioutput_regression-chec...

{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# A demo for multi-output regression\n\nThe demo is adopted from scikit-learn:\n\nhttps://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_regression_multioutput.html#sphx-glr-auto-examples-ensemble-plot-random-forest-regression-multioutput-py\n\nSee :doc:`/tutorials/multioutput` for more information.\n\n<div class=\"alert alert-info\"><h4>Note</h4><p>The feature is experimental. For the `multi_output_tree` strategy, many features are\n    missing.</p></div>\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "import argparse\nfrom typing import Dict, List, Tuple\n\nimport numpy as np\nfrom matplotlib import pyplot as plt\n\nimport xgboost as xgb\n\n\ndef plot_predt(y: np.ndarray, y_predt: np.ndarray, name: str) -> None:\n    s = 25\n    plt.scatter(y[:, 0], y[:, 1], c=\"navy\", s=s, edgecolor=\"black\", label=\"data\")\n    plt.scatter(\n        y_predt[:, 0], y_predt[:, 1], c=\"cornflowerblue\", s=s, edgecolor=\"black\"\n    )\n    plt.xlim([-1, 2])\n    plt.ylim([-1, 2])\n    plt.show()\n\n\ndef gen_circle() -> Tuple[np.ndarray, np.ndarray]:\n    \"Generate a sample dataset that y is a 2 dim circle.\"\n    rng = np.random.RandomState(1994)\n    X = np.sort(200 * rng.rand(100, 1) - 100, axis=0)\n    y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T\n    y[::5, :] += 0.5 - rng.rand(20, 2)\n    y = y - y.min()\n    y = y / y.max()\n    return X, y\n\n\ndef rmse_model(plot_result: bool, strategy: str) -> None:\n    \"\"\"Draw a circle with 2-dim coordinate as target variables.\"\"\"\n    X, y = gen_circle()\n    # Train a regressor on it\n    reg = xgb.XGBRegressor(\n        tree_method=\"hist\",\n        n_estimators=128,\n        n_jobs=16,\n        max_depth=8,\n        multi_strategy=strategy,\n        subsample=0.6,\n    )\n    reg.fit(X, y, eval_set=[(X, y)])\n\n    y_predt = reg.predict(X)\n    if plot_result:\n        plot_predt(y, y_predt, \"multi\")\n\n\ndef custom_rmse_model(plot_result: bool, strategy: str) -> None:\n    \"\"\"Train using Python implementation of Squared Error.\"\"\"\n\n    # As the experimental support status, custom objective doesn't support matrix as\n    # gradient and hessian, which will be changed in future release.\n    def gradient(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:\n        \"\"\"Compute the gradient squared error.\"\"\"\n        y = dtrain.get_label().reshape(predt.shape)\n        return (predt - y).reshape(y.size)\n\n    def hessian(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray:\n        \"\"\"Compute the hessian for squared error.\"\"\"\n        return np.ones(predt.shape).reshape(predt.size)\n\n    def squared_log(\n        predt: np.ndarray, dtrain: xgb.DMatrix\n    ) -> Tuple[np.ndarray, np.ndarray]:\n        grad = gradient(predt, dtrain)\n        hess = hessian(predt, dtrain)\n        return grad, hess\n\n    def rmse(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]:\n        y = dtrain.get_label().reshape(predt.shape)\n        v = np.sqrt(np.sum(np.power(y - predt, 2)))\n        return \"PyRMSE\", v\n\n    X, y = gen_circle()\n    Xy = xgb.DMatrix(X, y)\n    results: Dict[str, Dict[str, List[float]]] = {}\n    # Make sure the `num_target` is passed to XGBoost when custom objective is used.\n    # When builtin objective is used, XGBoost can figure out the number of targets\n    # automatically.\n    booster = xgb.train(\n        {\n            \"tree_method\": \"hist\",\n            \"num_target\": y.shape[1],\n            \"multi_strategy\": strategy,\n        },\n        dtrain=Xy,\n        num_boost_round=128,\n        obj=squared_log,\n        evals=[(Xy, \"Train\")],\n        evals_result=results,\n        custom_metric=rmse,\n    )\n\n    y_predt = booster.inplace_predict(X)\n    if plot_result:\n        plot_predt(y, y_predt, \"multi\")\n\n\nif __name__ == \"__main__\":\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"--plot\", choices=[0, 1], type=int, default=1)\n    args = parser.parse_args()\n\n    # Train with builtin RMSE objective\n    # - One model per output.\n    rmse_model(args.plot == 1, \"one_output_per_tree\")\n    # - One model for all outputs, this is still working in progress, many features are\n    # missing.\n    rmse_model(args.plot == 1, \"multi_output_tree\")\n\n    # Train with custom objective.\n    # - One model per output.\n    custom_rmse_model(args.plot == 1, \"one_output_per_tree\")\n    # - One model for all outputs, this is still working in progress, many features are\n    # missing.\n    custom_rmse_model(args.plot == 1, \"multi_output_tree\")"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.8.18"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}