7.7 KiB
7.7 KiB
A demo for multi-output regression¶
The demo is adopted from scikit-learn:
See :doc:/tutorials/multioutput
for more information.
Note
The feature is experimental. For the `multi_output_tree` strategy, many features are missing.
In [1]:
import argparse from typing import Dict, List, Tuple import numpy as np from matplotlib import pyplot as plt import xgboost as xgb def plot_predt(y: np.ndarray, y_predt: np.ndarray, name: str) -> None: s = 25 plt.scatter(y[:, 0], y[:, 1], c="navy", s=s, edgecolor="black", label="data") plt.scatter( y_predt[:, 0], y_predt[:, 1], c="cornflowerblue", s=s, edgecolor="black" ) plt.xlim([-1, 2]) plt.ylim([-1, 2]) plt.show() def gen_circle() -> Tuple[np.ndarray, np.ndarray]: "Generate a sample dataset that y is a 2 dim circle." rng = np.random.RandomState(1994) X = np.sort(200 * rng.rand(100, 1) - 100, axis=0) y = np.array([np.pi * np.sin(X).ravel(), np.pi * np.cos(X).ravel()]).T y[::5, :] += 0.5 - rng.rand(20, 2) y = y - y.min() y = y / y.max() return X, y def rmse_model(plot_result: bool, strategy: str) -> None: """Draw a circle with 2-dim coordinate as target variables.""" X, y = gen_circle() # Train a regressor on it reg = xgb.XGBRegressor( tree_method="hist", n_estimators=128, n_jobs=16, max_depth=8, multi_strategy=strategy, subsample=0.6, ) reg.fit(X, y, eval_set=[(X, y)]) y_predt = reg.predict(X) if plot_result: plot_predt(y, y_predt, "multi") def custom_rmse_model(plot_result: bool, strategy: str) -> None: """Train using Python implementation of Squared Error.""" # As the experimental support status, custom objective doesn't support matrix as # gradient and hessian, which will be changed in future release. def gradient(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray: """Compute the gradient squared error.""" y = dtrain.get_label().reshape(predt.shape) return (predt - y).reshape(y.size) def hessian(predt: np.ndarray, dtrain: xgb.DMatrix) -> np.ndarray: """Compute the hessian for squared error.""" return np.ones(predt.shape).reshape(predt.size) def squared_log( predt: np.ndarray, dtrain: xgb.DMatrix ) -> Tuple[np.ndarray, np.ndarray]: grad = gradient(predt, dtrain) hess = hessian(predt, dtrain) return grad, hess def rmse(predt: np.ndarray, dtrain: xgb.DMatrix) -> Tuple[str, float]: y = dtrain.get_label().reshape(predt.shape) v = np.sqrt(np.sum(np.power(y - predt, 2))) return "PyRMSE", v X, y = gen_circle() Xy = xgb.DMatrix(X, y) results: Dict[str, Dict[str, List[float]]] = {} # Make sure the `num_target` is passed to XGBoost when custom objective is used. # When builtin objective is used, XGBoost can figure out the number of targets # automatically. booster = xgb.train( { "tree_method": "hist", "num_target": y.shape[1], "multi_strategy": strategy, }, dtrain=Xy, num_boost_round=128, obj=squared_log, evals=[(Xy, "Train")], evals_result=results, custom_metric=rmse, ) y_predt = booster.inplace_predict(X) if plot_result: plot_predt(y, y_predt, "multi") if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--plot", choices=[0, 1], type=int, default=1) args = parser.parse_args() # Train with builtin RMSE objective # - One model per output. rmse_model(args.plot == 1, "one_output_per_tree") # - One model for all outputs, this is still working in progress, many features are # missing. rmse_model(args.plot == 1, "multi_output_tree") # Train with custom objective. # - One model per output. custom_rmse_model(args.plot == 1, "one_output_per_tree") # - One model for all outputs, this is still working in progress, many features are # missing. custom_rmse_model(args.plot == 1, "multi_output_tree")
--------------------------------------------------------------------------- ModuleNotFoundError Traceback (most recent call last) Cell In[1], line 7 4 import numpy as np 5 from matplotlib import pyplot as plt ----> 7 import xgboost as xgb 10 def plot_predt(y: np.ndarray, y_predt: np.ndarray, name: str) -> None: 11 s = 25 ModuleNotFoundError: No module named 'xgboost'
In [ ]: