45 KiB
45 KiB
In [29]:
import pandas as pd from hyperopt import hp, fmin, tpe, STATUS_OK, Trials from sklearn.model_selection import train_test_split import numpy as np
In [30]:
data = pd.read_excel('/mnt/tanzk/mxx/rejie2.xlsx') data.head()
Out[30]:
A | V | FC | C | H | N | S | O | H/C | O/C | N/C | Rt | Hr | dp | T | Tar | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 25.56 | 45.32 | 54.68 | 69.13 | 4.91 | 1.13 | 0.81 | 24.02 | 0.852307 | 0.260596 | 0.014011 | 30.0 | 10.0 | 0.2 | 600 | 3.974958 |
1 | 15.26 | 45.11 | 54.89 | 63.80 | 5.11 | 1.56 | 0.88 | 28.65 | 0.961129 | 0.336795 | 0.020958 | 30.0 | 10.0 | 0.2 | 600 | 4.629865 |
2 | 9.92 | 35.16 | 64.84 | 80.25 | 4.66 | 0.91 | 0.56 | 13.62 | 0.696822 | 0.127290 | 0.009720 | 20.0 | 20.0 | 6.0 | 400 | 6.452928 |
3 | 9.92 | 35.16 | 64.84 | 80.25 | 4.66 | 0.91 | 0.56 | 13.62 | 0.696822 | 0.127290 | 0.009720 | 20.0 | 20.0 | 6.0 | 450 | 8.724672 |
4 | 9.92 | 35.16 | 64.84 | 80.25 | 4.66 | 0.91 | 0.56 | 13.62 | 0.696822 | 0.127290 | 0.009720 | 20.0 | 20.0 | 6.0 | 500 | 10.075968 |
In [31]:
out_cols = ['Tar']
In [32]:
feature_cols = [x for x in data.columns if x not in out_cols]
In [33]:
train_data = data.reset_index(drop=True)
In [34]:
import xgboost as xgb from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
In [35]:
# 定义超参数的搜索空间 # space = { # 'eta': hp.loguniform('eta', -5, 0), # 学习率,搜索范围是 [1e-5, 1] # 'max_depth': hp.choice('max_depth', range(5, 30)), # 树的最大深度,搜索范围是 [1, 10] # 'min_child_weight': hp.uniform('min_child_weight', 0, 10), # 子节点最小的权重和 # 'gamma': hp.loguniform('gamma', -5, 0), # 叶子节点分裂所需的最小损失减少 # 'subsample': hp.uniform('subsample', 0.5, 1), # 训练集的采样率 # 'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1), # 特征的采样率 # } space = { 'eta': hp.loguniform('eta', -4, 0.5), # 学习率,搜索范围是 [1e-5, 1] 'max_depth': hp.choice('max_depth', range(5, 50)), # 树的最大深度,搜索范围是 [1, 10] 'min_child_weight': hp.uniform('min_child_weight', 0, 20), # 子节点最小的权重和 'gamma': hp.loguniform('gamma', -3, 0.7), # 叶子节点分裂所需的最小损失减少 'subsample': hp.uniform('subsample', 0.5, 0.9), # 训练集的采样率 'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 0.9), # 特征的采样率 }
In [36]:
# 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(train_data[feature_cols], train_data[out_cols], test_size=0.1, random_state=42)
In [37]:
# 定义目标函数,用于评估模型的性能 def objective(params): # 创建决策树分类器实例 gbr = xgb.XGBRegressor(**params) # 训练模型 gbr.fit(X_train, y_train) # 使用模型进行预测 y_pred = gbr.predict(X_test) mae = mean_absolute_error(y_test, y_pred) return {'loss': mae, 'status': STATUS_OK}
In [38]:
# 创建 Trials 对象来记录搜索历史 trials = Trials() # 使用 fmin 函数进行超参数优化 best_params = fmin(fn=objective, space=space, algo=tpe.suggest, max_evals=100, trials=trials)
100%|██████████| 100/100 [00:06<00:00, 15.37trial/s, best loss: 0.927005504531766]
In [40]:
print(best_params)
{'colsample_bytree': 0.7591178772740766, 'eta': 0.29006097172943296, 'gamma': 0.32020608660889016, 'max_depth': 21, 'min_child_weight': 5.912424330716954, 'subsample': 0.8011115810485918}
In [41]:
from sklearn.model_selection import KFold, train_test_split kf = KFold(n_splits=10, shuffle=True, random_state=42)
In [42]:
num_boost_round = 1000
In [43]:
import matplotlib.pyplot as plt
In [44]:
plt.rcParams["font.sans-serif"] = ["WenQuanYi Micro Hei"] # 设置字体 plt.rcParams["axes.unicode_minus"] = False # 正常显示负号
In [45]:
eva_list = list() eva_cols = ['MSE', 'RMSE', 'MAE', 'MAPE', 'R2'] for (train_index, test_index) in kf.split(train_data): train = train_data.loc[train_index] valid = train_data.loc[test_index] X_train, Y_train = train[feature_cols], train[out_cols] X_valid, Y_valid = valid[feature_cols], valid[out_cols] dtrain = xgb.DMatrix(X_train, Y_train) dvalid = xgb.DMatrix(X_valid, Y_valid) watchlist = [(dvalid, 'eval')] gb_model = xgb.train(best_params, dtrain, num_boost_round, evals=watchlist, early_stopping_rounds=100, verbose_eval=False) y_pred = gb_model.predict(xgb.DMatrix(X_valid)) y_true = Y_valid.values MSE = mean_squared_error(y_true, y_pred) RMSE = np.sqrt(mean_squared_error(y_true, y_pred)) MAE = mean_absolute_error(y_true, y_pred) MAPE = mean_absolute_percentage_error(y_true, y_pred) R_2 = r2_score(y_true, y_pred) print('MSE:', round(MSE, 4), end=', ') print('RMSE:', round(RMSE, 4), end=', ') print('MAE:', round(MAE, 4), end=', ') print('MAPE:', round(MAPE*100, 2), '%', end=', ') print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差 eva_list.append([MSE, RMSE, MAE, MAPE, R_2])
MSE: 2.336, RMSE: 1.5284, MAE: 1.0563, MAPE: 17.41 %, R_2: 0.7993 MSE: 2.1045, RMSE: 1.4507, MAE: 0.9808, MAPE: 14.64 %, R_2: 0.7211 MSE: 1.7837, RMSE: 1.3356, MAE: 0.9841, MAPE: 12.31 %, R_2: 0.7866 MSE: 3.7098, RMSE: 1.9261, MAE: 1.0768, MAPE: 21.11 %, R_2: 0.7009 MSE: 1.5407, RMSE: 1.2412, MAE: 0.7912, MAPE: 13.12 %, R_2: 0.8225 MSE: 2.5428, RMSE: 1.5946, MAE: 1.0797, MAPE: 15.42 %, R_2: 0.5668 MSE: 1.6981, RMSE: 1.3031, MAE: 0.9132, MAPE: 13.13 %, R_2: 0.7728 MSE: 1.6532, RMSE: 1.2858, MAE: 0.8018, MAPE: 9.68 %, R_2: 0.7663 MSE: 2.2342, RMSE: 1.4947, MAE: 1.1072, MAPE: 16.14 %, R_2: 0.7479 MSE: 1.6337, RMSE: 1.2782, MAE: 0.9285, MAPE: 12.27 %, R_2: 0.7293
In [46]:
eva_df = pd.DataFrame.from_records(eva_list, columns=eva_cols)
In [47]:
eva_df.mean()
Out[47]:
MSE 2.123660 RMSE 1.443832 MAE 0.971950 MAPE 0.145207 R2 0.741365 dtype: float64
In [48]:
import xgboost as xgb from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score from sklearn.model_selection import train_test_split # Extract best parameters best_params = {'colsample_bytree': 0.7591178772740766, 'eta': 0.29006097172943296, 'gamma': 0.32020608660889016, 'max_depth': 21, 'min_child_weight': 5.912424330716954, 'subsample': 0.8011115810485918} # Re-train the model with the best parameters and evaluate X_train, X_test, y_train, y_test = train_test_split(train_data[feature_cols], train_data[out_cols], test_size=0.1, random_state=42) gbr = xgb.XGBRegressor(**best_params, n_estimators=1500) gbr.fit(X_train, y_train) # Predict and evaluate y_pred = gbr.predict(X_test) mse = mean_squared_error(y_test, y_pred) rmse = np.sqrt(mse) mae = mean_absolute_error(y_test, y_pred) mape = mean_absolute_percentage_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) mse, rmse, mae, mape, r2
Out[48]:
(1.861425600429374, 1.3643407200656932, 0.9369589057201204, 0.14918013653153422, 0.8400893043071764)
In [49]:
import xgboost as xgb from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score from sklearn.model_selection import train_test_split # Extract best parameters best_params = {'colsample_bytree': 0.8182688124328266, 'eta': 0.39669872117044186, 'gamma': 0.67893237292294242, 'max_depth': 23, 'min_child_weight': 7.274037788798998, 'subsample': 0.6957233806783182} # Re-train the model with the best parameters and evaluate X_train, X_test, y_train, y_test = train_test_split(train_data[feature_cols], train_data[out_cols], test_size=0.1, random_state=42) gbr = xgb.XGBRegressor(**best_params, n_estimators=1500) gbr.fit(X_train, y_train) # Predict and evaluate y_pred = gbr.predict(X_test) mse = mean_squared_error(y_test, y_pred) rmse = np.sqrt(mse) mae = mean_absolute_error(y_test, y_pred) mape = mean_absolute_percentage_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) mse, rmse, mae, mape, r2
Out[49]:
(1.78639546955746, 1.3365610609162082, 0.9437850044721565, 0.1579769923858331, 0.846534966396966)
In [50]:
import xgboost as xgb from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score from sklearn.model_selection import train_test_split # Extract best parameters best_params = {'colsample_bytree': 0.7756001484050402, 'eta': 0.31927224345318256, 'gamma': 0.5049174573053737, 'max_depth': 42, 'min_child_weight': 6.449650970113468, 'subsample': 0.7873416063207794} # Re-train the model with the best parameters and evaluate X_train, X_test, y_train, y_test = train_test_split(train_data[feature_cols], train_data[out_cols], test_size=0.1, random_state=42) gbr = xgb.XGBRegressor(**best_params, n_estimators=1500) gbr.fit(X_train, y_train) # Predict and evaluate y_pred = gbr.predict(X_test) mse = mean_squared_error(y_test, y_pred) rmse = np.sqrt(mse) mae = mean_absolute_error(y_test, y_pred) mape = mean_absolute_percentage_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) mse, rmse, mae, mape, r2
Out[50]:
(1.5784385533129173, 1.256359245324727, 0.8012875783803378, 0.12426418160928662, 0.8644000560052363)
In [52]:
import xgboost as xgb from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score from sklearn.model_selection import train_test_split # Extract best parameters best_params = {'colsample_bytree': 0.8997619766112827, 'eta': 0.48900376453173927, 'gamma': 0.09568323449358279, 'max_depth': 29, 'min_child_weight': 2.0607020689885673, 'subsample': 0.5621662915587151} # Re-train the model with the best parameters and evaluate X_train, X_test, y_train, y_test = train_test_split(train_data[feature_cols], train_data[out_cols], test_size=0.1, random_state=42) gbr = xgb.XGBRegressor(**best_params, n_estimators=1500) # gbr.fit(X_train, y_train) gbr.fit(X_train, y_train, early_stopping_rounds=50, eval_set=[(X_test, y_test)], verbose=True) # Predict and evaluate y_pred = gbr.predict(X_test) mse = mean_squared_error(y_test, y_pred) rmse = np.sqrt(mse) mae = mean_absolute_error(y_test, y_pred) mape = mean_absolute_percentage_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) mse, rmse, mae, mape, r2
[0] validation_0-rmse:2.94022 [1] validation_0-rmse:2.32601 [2] validation_0-rmse:2.15546 [3] validation_0-rmse:1.97899 [4] validation_0-rmse:1.76293 [5] validation_0-rmse:1.66392 [6] validation_0-rmse:1.56808 [7] validation_0-rmse:1.57516 [8] validation_0-rmse:1.55810 [9] validation_0-rmse:1.49683 [10] validation_0-rmse:1.42146 [11] validation_0-rmse:1.38196 [12] validation_0-rmse:1.33497 [13] validation_0-rmse:1.29656 [14] validation_0-rmse:1.31387 [15] validation_0-rmse:1.29630 [16] validation_0-rmse:1.32641 [17] validation_0-rmse:1.34701 [18] validation_0-rmse:1.34493 [19] validation_0-rmse:1.34176 [20] validation_0-rmse:1.35030 [21] validation_0-rmse:1.35103 [22] validation_0-rmse:1.36426 [23] validation_0-rmse:1.38450 [24] validation_0-rmse:1.35743 [25] validation_0-rmse:1.34604 [26] validation_0-rmse:1.36367 [27] validation_0-rmse:1.34772 [28] validation_0-rmse:1.35335 [29] validation_0-rmse:1.37848 [30] validation_0-rmse:1.37084 [31] validation_0-rmse:1.36131 [32] validation_0-rmse:1.37337 [33] validation_0-rmse:1.36706 [34] validation_0-rmse:1.35412 [35] validation_0-rmse:1.35344 [36] validation_0-rmse:1.33891 [37] validation_0-rmse:1.31269 [38] validation_0-rmse:1.31461 [39] validation_0-rmse:1.31811 [40] validation_0-rmse:1.32056 [41] validation_0-rmse:1.32437 [42] validation_0-rmse:1.28383 [43] validation_0-rmse:1.28747 [44] validation_0-rmse:1.28136 [45] validation_0-rmse:1.30384 [46] validation_0-rmse:1.31680 [47] validation_0-rmse:1.32577 [48] validation_0-rmse:1.32435 [49] validation_0-rmse:1.34947 [50] validation_0-rmse:1.33535 [51] validation_0-rmse:1.33444 [52] validation_0-rmse:1.32391 [53] validation_0-rmse:1.32339 [54] validation_0-rmse:1.31571 [55] validation_0-rmse:1.32033 [56] validation_0-rmse:1.32455 [57] validation_0-rmse:1.33108 [58] validation_0-rmse:1.33065 [59] validation_0-rmse:1.33104 [60] validation_0-rmse:1.32995 [61] validation_0-rmse:1.33504 [62] validation_0-rmse:1.33599 [63] validation_0-rmse:1.33493 [64] validation_0-rmse:1.33562 [65] validation_0-rmse:1.33627 [66] validation_0-rmse:1.33196 [67] validation_0-rmse:1.32833 [68] validation_0-rmse:1.33790 [69] validation_0-rmse:1.33838 [70] validation_0-rmse:1.34104 [71] validation_0-rmse:1.34476 [72] validation_0-rmse:1.34435 [73] validation_0-rmse:1.33861 [74] validation_0-rmse:1.33732 [75] validation_0-rmse:1.34115 [76] validation_0-rmse:1.33682 [77] validation_0-rmse:1.33896 [78] validation_0-rmse:1.34560 [79] validation_0-rmse:1.34630 [80] validation_0-rmse:1.34347 [81] validation_0-rmse:1.34940 [82] validation_0-rmse:1.34326 [83] validation_0-rmse:1.34504 [84] validation_0-rmse:1.33780 [85] validation_0-rmse:1.34329 [86] validation_0-rmse:1.34396 [87] validation_0-rmse:1.34863 [88] validation_0-rmse:1.34721 [89] validation_0-rmse:1.35453 [90] validation_0-rmse:1.35999 [91] validation_0-rmse:1.34558 [92] validation_0-rmse:1.35019 [93] validation_0-rmse:1.35153 [94] validation_0-rmse:1.34330
/root/miniconda3/envs/python38/lib/python3.8/site-packages/xgboost/sklearn.py:889: UserWarning: `early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead. warnings.warn(
Out[52]:
(1.6418860457242266, 1.2813610130342763, 0.8241156918341648, 0.13793177033015197, 0.8589494311459256)
In [53]:
import xgboost as xgb from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score from sklearn.model_selection import train_test_split # Extract best parameters best_params = {'colsample_bytree': 0.7756001484050402, 'eta': 0.31927224345318256, 'gamma': 0.5049174573053737, 'max_depth': 42, 'min_child_weight': 6.449650970113468, 'subsample': 0.7873416063207794} # Re-train the model with the best parameters and evaluate X_train, X_test, y_train, y_test = train_test_split(train_data[feature_cols], train_data[out_cols], test_size=0.1, random_state=42) gbr = xgb.XGBRegressor(**best_params, n_estimators=1500) # gbr.fit(X_train, y_train) gbr.fit(X_train, y_train, early_stopping_rounds=50, eval_set=[(X_test, y_test)], verbose=True) # Predict and evaluate y_pred = gbr.predict(X_test) mse = mean_squared_error(y_test, y_pred) rmse = np.sqrt(mse) mae = mean_absolute_error(y_test, y_pred) mape = mean_absolute_percentage_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) mse, rmse, mae, mape, r2
[0] validation_0-rmse:3.03131 [1] validation_0-rmse:2.62574 [2] validation_0-rmse:2.41405 [3] validation_0-rmse:2.21943 [4] validation_0-rmse:2.06257 [5] validation_0-rmse:1.96808 [6] validation_0-rmse:1.73678 [7] validation_0-rmse:1.70331 [8] validation_0-rmse:1.64866 [9] validation_0-rmse:1.62014 [10] validation_0-rmse:1.55086 [11] validation_0-rmse:1.50959 [12] validation_0-rmse:1.47113 [13] validation_0-rmse:1.44401 [14] validation_0-rmse:1.42153 [15] validation_0-rmse:1.39003 [16] validation_0-rmse:1.35574 [17] validation_0-rmse:1.32175 [18] validation_0-rmse:1.31396 [19] validation_0-rmse:1.33739 [20] validation_0-rmse:1.33013 [21] validation_0-rmse:1.32880 [22] validation_0-rmse:1.33059 [23] validation_0-rmse:1.34923 [24] validation_0-rmse:1.31799 [25] validation_0-rmse:1.31411 [26] validation_0-rmse:1.30925 [27] validation_0-rmse:1.29766 [28] validation_0-rmse:1.30442 [29] validation_0-rmse:1.30966 [30] validation_0-rmse:1.28975 [31] validation_0-rmse:1.28810 [32] validation_0-rmse:1.28587 [33] validation_0-rmse:1.28566 [34] validation_0-rmse:1.26751 [35] validation_0-rmse:1.28569 [36] validation_0-rmse:1.27332 [37] validation_0-rmse:1.25467 [38] validation_0-rmse:1.25508 [39] validation_0-rmse:1.24264 [40] validation_0-rmse:1.24019 [41] validation_0-rmse:1.24022 [42] validation_0-rmse:1.24175 [43] validation_0-rmse:1.23122 [44] validation_0-rmse:1.22942 [45] validation_0-rmse:1.24364 [46] validation_0-rmse:1.22211 [47] validation_0-rmse:1.24425 [48] validation_0-rmse:1.23057 [49] validation_0-rmse:1.24111 [50] validation_0-rmse:1.23988 [51] validation_0-rmse:1.23965 [52] validation_0-rmse:1.23850 [53] validation_0-rmse:1.23933 [54] validation_0-rmse:1.23724 [55] validation_0-rmse:1.23362 [56] validation_0-rmse:1.23369 [57] validation_0-rmse:1.23845 [58] validation_0-rmse:1.23947 [59] validation_0-rmse:1.23491 [60] validation_0-rmse:1.23482 [61] validation_0-rmse:1.23441 [62] validation_0-rmse:1.23183 [63] validation_0-rmse:1.23244 [64] validation_0-rmse:1.23290 [65] validation_0-rmse:1.23486 [66] validation_0-rmse:1.23535 [67] validation_0-rmse:1.23568 [68] validation_0-rmse:1.23613 [69] validation_0-rmse:1.23555 [70] validation_0-rmse:1.23506 [71] validation_0-rmse:1.23512 [72] validation_0-rmse:1.23276 [73] validation_0-rmse:1.23287 [74] validation_0-rmse:1.23235 [75] validation_0-rmse:1.23922 [76] validation_0-rmse:1.23873 [77] validation_0-rmse:1.22865 [78] validation_0-rmse:1.23035 [79] validation_0-rmse:1.24439 [80] validation_0-rmse:1.23953 [81] validation_0-rmse:1.24745 [82] validation_0-rmse:1.23214 [83] validation_0-rmse:1.23968 [84] validation_0-rmse:1.24333 [85] validation_0-rmse:1.24434 [86] validation_0-rmse:1.25339 [87] validation_0-rmse:1.25396 [88] validation_0-rmse:1.25383 [89] validation_0-rmse:1.25276 [90] validation_0-rmse:1.25349 [91] validation_0-rmse:1.25226 [92] validation_0-rmse:1.25328 [93] validation_0-rmse:1.25284 [94] validation_0-rmse:1.25222 [95] validation_0-rmse:1.25212 [96] validation_0-rmse:1.25266
/root/miniconda3/envs/python38/lib/python3.8/site-packages/xgboost/sklearn.py:889: UserWarning: `early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead. warnings.warn(
Out[53]:
(1.4935549430378676, 1.2221108554619207, 0.8199210432780543, 0.1256990216917289, 0.8716922073374574)
In [54]:
import xgboost as xgb from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score from sklearn.model_selection import train_test_split # Extract best parameters best_params = {'colsample_bytree': 0.8182688124328266, 'eta': 0.39669872117044186, 'gamma': 0.67893237292294242, 'max_depth': 23, 'min_child_weight': 7.274037788798998, 'subsample': 0.6957233806783182} # Re-train the model with the best parameters and evaluate X_train, X_test, y_train, y_test = train_test_split(train_data[feature_cols], train_data[out_cols], test_size=0.1, random_state=42) gbr = xgb.XGBRegressor(**best_params, n_estimators=1500) gbr.fit(X_train, y_train,early_stopping_rounds=50, eval_set=[(X_test, y_test)], verbose=True) # Predict and evaluate y_pred = gbr.predict(X_test) mse = mean_squared_error(y_test, y_pred) rmse = np.sqrt(mse) mae = mean_absolute_error(y_test, y_pred) mape = mean_absolute_percentage_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) mse, rmse, mae, mape, r2
[0] validation_0-rmse:2.94796 [1] validation_0-rmse:2.47933 [2] validation_0-rmse:2.32980 [3] validation_0-rmse:2.10618 [4] validation_0-rmse:1.97724 [5] validation_0-rmse:1.95411 [6] validation_0-rmse:1.84506 [7] validation_0-rmse:1.84115 [8] validation_0-rmse:1.82857 [9] validation_0-rmse:1.76881 [10] validation_0-rmse:1.66718 [11] validation_0-rmse:1.65162 [12] validation_0-rmse:1.61560 [13] validation_0-rmse:1.57779 [14] validation_0-rmse:1.54531 [15] validation_0-rmse:1.51655 [16] validation_0-rmse:1.49713 [17] validation_0-rmse:1.46052 [18] validation_0-rmse:1.42253 [19] validation_0-rmse:1.40742 [20] validation_0-rmse:1.36721 [21] validation_0-rmse:1.35093 [22] validation_0-rmse:1.35702 [23] validation_0-rmse:1.33818 [24] validation_0-rmse:1.32813 [25] validation_0-rmse:1.35230 [26] validation_0-rmse:1.34903 [27] validation_0-rmse:1.34344 [28] validation_0-rmse:1.34388 [29] validation_0-rmse:1.35948 [30] validation_0-rmse:1.33423 [31] validation_0-rmse:1.35954 [32] validation_0-rmse:1.35215 [33] validation_0-rmse:1.33783 [34] validation_0-rmse:1.31944 [35] validation_0-rmse:1.32828 [36] validation_0-rmse:1.29862 [37] validation_0-rmse:1.27408 [38] validation_0-rmse:1.28541 [39] validation_0-rmse:1.27269 [40] validation_0-rmse:1.26776 [41] validation_0-rmse:1.26017 [42] validation_0-rmse:1.29093 [43] validation_0-rmse:1.28878 [44] validation_0-rmse:1.29039 [45] validation_0-rmse:1.28695 [46] validation_0-rmse:1.28674 [47] validation_0-rmse:1.29803 [48] validation_0-rmse:1.28585 [49] validation_0-rmse:1.29421 [50] validation_0-rmse:1.29601 [51] validation_0-rmse:1.31381 [52] validation_0-rmse:1.32534 [53] validation_0-rmse:1.32683 [54] validation_0-rmse:1.31673 [55] validation_0-rmse:1.32776 [56] validation_0-rmse:1.32463 [57] validation_0-rmse:1.32801 [58] validation_0-rmse:1.32713 [59] validation_0-rmse:1.33055 [60] validation_0-rmse:1.33246 [61] validation_0-rmse:1.32767 [62] validation_0-rmse:1.33396 [63] validation_0-rmse:1.33038 [64] validation_0-rmse:1.32723 [65] validation_0-rmse:1.32445 [66] validation_0-rmse:1.32666 [67] validation_0-rmse:1.32470 [68] validation_0-rmse:1.32556 [69] validation_0-rmse:1.32294 [70] validation_0-rmse:1.32810 [71] validation_0-rmse:1.32573 [72] validation_0-rmse:1.33002 [73] validation_0-rmse:1.33069 [74] validation_0-rmse:1.33042 [75] validation_0-rmse:1.33060 [76] validation_0-rmse:1.33120 [77] validation_0-rmse:1.33269 [78] validation_0-rmse:1.32919 [79] validation_0-rmse:1.32429 [80] validation_0-rmse:1.32078 [81] validation_0-rmse:1.31978 [82] validation_0-rmse:1.30738 [83] validation_0-rmse:1.30778 [84] validation_0-rmse:1.31087 [85] validation_0-rmse:1.30729 [86] validation_0-rmse:1.30339 [87] validation_0-rmse:1.30681 [88] validation_0-rmse:1.30676 [89] validation_0-rmse:1.30625 [90] validation_0-rmse:1.30712
/root/miniconda3/envs/python38/lib/python3.8/site-packages/xgboost/sklearn.py:889: UserWarning: `early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead. warnings.warn(
Out[54]:
(1.5880184359288727, 1.2601660350639803, 0.9411508613501586, 0.1599000195341022, 0.8635770708193553)
In [55]:
import xgboost as xgb from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, r2_score from sklearn.model_selection import train_test_split # Extract best parameters best_params = {'colsample_bytree': 0.7591178772740766, 'eta': 0.29006097172943296, 'gamma': 0.32020608660889016, 'max_depth': 21, 'min_child_weight': 5.912424330716954, 'subsample': 0.8011115810485918} # Re-train the model with the best parameters and evaluate X_train, X_test, y_train, y_test = train_test_split(train_data[feature_cols], train_data[out_cols], test_size=0.1, random_state=42) gbr = xgb.XGBRegressor(**best_params, n_estimators=1500) gbr.fit(X_train, y_train,early_stopping_rounds=50, eval_set=[(X_test, y_test)], verbose=True) # Predict and evaluate y_pred = gbr.predict(X_test) mse = mean_squared_error(y_test, y_pred) rmse = np.sqrt(mse) mae = mean_absolute_error(y_test, y_pred) mape = mean_absolute_percentage_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) mse, rmse, mae, mape, r2
[0] validation_0-rmse:2.99087 [1] validation_0-rmse:2.66375 [2] validation_0-rmse:2.40788 [3] validation_0-rmse:2.21494 [4] validation_0-rmse:2.08983 [5] validation_0-rmse:2.00727 [6] validation_0-rmse:1.85305 [7] validation_0-rmse:1.78475 [8] validation_0-rmse:1.70319 [9] validation_0-rmse:1.65511 [10] validation_0-rmse:1.58430 [11] validation_0-rmse:1.55495 [12] validation_0-rmse:1.53684 [13] validation_0-rmse:1.48998 [14] validation_0-rmse:1.48409 [15] validation_0-rmse:1.42885 [16] validation_0-rmse:1.39821 [17] validation_0-rmse:1.36639 [18] validation_0-rmse:1.36579 [19] validation_0-rmse:1.36524 [20] validation_0-rmse:1.37165 [21] validation_0-rmse:1.37061 [22] validation_0-rmse:1.37467 [23] validation_0-rmse:1.36140 [24] validation_0-rmse:1.34469 [25] validation_0-rmse:1.34300 [26] validation_0-rmse:1.34283 [27] validation_0-rmse:1.33821 [28] validation_0-rmse:1.33426 [29] validation_0-rmse:1.35185 [30] validation_0-rmse:1.34884 [31] validation_0-rmse:1.35015 [32] validation_0-rmse:1.36925 [33] validation_0-rmse:1.36742 [34] validation_0-rmse:1.36185 [35] validation_0-rmse:1.38217 [36] validation_0-rmse:1.37213 [37] validation_0-rmse:1.35339 [38] validation_0-rmse:1.35314 [39] validation_0-rmse:1.35475 [40] validation_0-rmse:1.34997 [41] validation_0-rmse:1.33195 [42] validation_0-rmse:1.33518 [43] validation_0-rmse:1.33585 [44] validation_0-rmse:1.33598 [45] validation_0-rmse:1.34456 [46] validation_0-rmse:1.33476 [47] validation_0-rmse:1.35722 [48] validation_0-rmse:1.36327 [49] validation_0-rmse:1.36469 [50] validation_0-rmse:1.35926 [51] validation_0-rmse:1.36052 [52] validation_0-rmse:1.36092 [53] validation_0-rmse:1.36165 [54] validation_0-rmse:1.34672 [55] validation_0-rmse:1.35078 [56] validation_0-rmse:1.35164 [57] validation_0-rmse:1.35124 [58] validation_0-rmse:1.34962 [59] validation_0-rmse:1.35293 [60] validation_0-rmse:1.34839 [61] validation_0-rmse:1.33587 [62] validation_0-rmse:1.32711 [63] validation_0-rmse:1.32892 [64] validation_0-rmse:1.33008 [65] validation_0-rmse:1.33023 [66] validation_0-rmse:1.33082 [67] validation_0-rmse:1.33207 [68] validation_0-rmse:1.33262 [69] validation_0-rmse:1.33185 [70] validation_0-rmse:1.33114 [71] validation_0-rmse:1.33287 [72] validation_0-rmse:1.34803 [73] validation_0-rmse:1.35223 [74] validation_0-rmse:1.34266 [75] validation_0-rmse:1.34423 [76] validation_0-rmse:1.34351 [77] validation_0-rmse:1.33684 [78] validation_0-rmse:1.33450 [79] validation_0-rmse:1.35080 [80] validation_0-rmse:1.34307 [81] validation_0-rmse:1.33828 [82] validation_0-rmse:1.33786 [83] validation_0-rmse:1.33990 [84] validation_0-rmse:1.34383 [85] validation_0-rmse:1.34400 [86] validation_0-rmse:1.34246 [87] validation_0-rmse:1.34125 [88] validation_0-rmse:1.34723 [89] validation_0-rmse:1.34835 [90] validation_0-rmse:1.34827 [91] validation_0-rmse:1.34661 [92] validation_0-rmse:1.34880 [93] validation_0-rmse:1.34823 [94] validation_0-rmse:1.36337 [95] validation_0-rmse:1.36356 [96] validation_0-rmse:1.36404 [97] validation_0-rmse:1.36244 [98] validation_0-rmse:1.36225 [99] validation_0-rmse:1.36267 [100] validation_0-rmse:1.36269 [101] validation_0-rmse:1.36312 [102] validation_0-rmse:1.37098 [103] validation_0-rmse:1.37105 [104] validation_0-rmse:1.37414 [105] validation_0-rmse:1.37391 [106] validation_0-rmse:1.37318 [107] validation_0-rmse:1.36753 [108] validation_0-rmse:1.36538 [109] validation_0-rmse:1.36617 [110] validation_0-rmse:1.36542 [111] validation_0-rmse:1.36646 [112] validation_0-rmse:1.38288
/root/miniconda3/envs/python38/lib/python3.8/site-packages/xgboost/sklearn.py:889: UserWarning: `early_stopping_rounds` in `fit` method is deprecated for better compatibility with scikit-learn, use `early_stopping_rounds` in constructor or`set_params` instead. warnings.warn(
Out[55]:
(1.7612323261291336, 1.3271142852554687, 0.9281123720415317, 0.14669084489239353, 0.8486966728710329)
In [ ]: