20 KiB
20 KiB
In [1]:
import pandas as pd
In [2]:
data_0102 = pd.read_excel('./data/20240102/20240102.xlsx', header=[0,1,2]) data_0102
Out[2]:
Unnamed: 0_level_0 | 氢 | 碳 | 氮 | 氧 | 弹筒发热量 | 挥发分 | 固定炭 | |
---|---|---|---|---|---|---|---|---|
化验编号 | Had | Cad | Nad | Oad | Qb,ad | Vad | Fcad | |
Unnamed: 0_level_2 | (%) | (%) | (%) | (%) | MJ/kg | (%) | (%) | |
0 | 2720110529 | 3.93 | 70.18 | 0.81 | 25.079 | 27.820 | 32.06 | 55.68 |
1 | 2720096883 | 3.78 | 68.93 | 0.77 | 26.512 | 27.404 | 29.96 | 54.71 |
2 | 2720109084 | 3.48 | 69.60 | 0.76 | 26.148 | 27.578 | 29.31 | 55.99 |
3 | 2720084708 | 3.47 | 66.71 | 0.76 | 29.055 | 26.338 | 28.58 | 53.87 |
4 | 2720062721 | 3.87 | 68.78 | 0.80 | 26.542 | 27.280 | 29.97 | 54.78 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
223 | 2720030490 | 4.12 | 68.85 | 0.97 | 26.055 | 27.864 | 32.94 | 51.89 |
224 | 2720028633 | 3.97 | 67.04 | 0.94 | 28.043 | 27.368 | 31.88 | 51.38 |
225 | 2720028634 | 4.12 | 68.42 | 0.96 | 26.493 | 27.886 | 33.16 | 52.00 |
226 | 2720017683 | 3.88 | 67.42 | 0.94 | 27.760 | 26.616 | 31.65 | 50.56 |
227 | 2720017678 | 3.81 | 66.74 | 0.92 | 28.530 | 26.688 | 31.02 | 50.82 |
228 rows × 8 columns
In [3]:
cols = [''.join([y for y in x if 'Unnamed' not in y]) for x in data_0102.columns] cols
Out[3]:
['化验编号', '氢Had(%)', '碳Cad(%)', '氮Nad(%)', '氧Oad(%)', '弹筒发热量Qb,adMJ/kg', '挥发分Vad(%)', '固定炭Fcad(%)']
In [4]:
data_0102.columns = cols
In [5]:
import xgboost as xgb
In [6]:
feature_cols = cols[1:6] feature_cols
Out[6]:
['氢Had(%)', '碳Cad(%)', '氮Nad(%)', '氧Oad(%)', '弹筒发热量Qb,adMJ/kg']
In [7]:
train_data = data_0102.copy() train_data.head()
Out[7]:
化验编号 | 氢Had(%) | 碳Cad(%) | 氮Nad(%) | 氧Oad(%) | 弹筒发热量Qb,adMJ/kg | 挥发分Vad(%) | 固定炭Fcad(%) | |
---|---|---|---|---|---|---|---|---|
0 | 2720110529 | 3.93 | 70.18 | 0.81 | 25.079 | 27.820 | 32.06 | 55.68 |
1 | 2720096883 | 3.78 | 68.93 | 0.77 | 26.512 | 27.404 | 29.96 | 54.71 |
2 | 2720109084 | 3.48 | 69.60 | 0.76 | 26.148 | 27.578 | 29.31 | 55.99 |
3 | 2720084708 | 3.47 | 66.71 | 0.76 | 29.055 | 26.338 | 28.58 | 53.87 |
4 | 2720062721 | 3.87 | 68.78 | 0.80 | 26.542 | 27.280 | 29.97 | 54.78 |
In [8]:
import numpy as np
In [9]:
train_data.reset_index(drop=True, inplace=True)
In [10]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
In [25]:
params_xgb = {"objective": 'reg:squarederror', "subsample": 1, "max_depth": 15, "eta": 0.3, "gamma": 0, "lambda": 1, "alpha": 0, "colsample_bytree": 0.9,} num_boost_round = 1000
In [26]:
from sklearn.model_selection import KFold, train_test_split kf = KFold(n_splits=10, shuffle=True, random_state=42)
In [27]:
eva_list = list() for (train_index, test_index) in kf.split(train_data): train = train_data.loc[train_index] valid = train_data.loc[test_index] X_train, Y_train = train[feature_cols], np.log1p(train['挥发分Vad(%)']) X_valid, Y_valid = valid[feature_cols], np.log1p(valid['挥发分Vad(%)']) dtrain = xgb.DMatrix(X_train, Y_train) dvalid = xgb.DMatrix(X_valid, Y_valid) watchlist = [(dvalid, 'eval')] gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist, early_stopping_rounds=50, verbose_eval=False) y_pred = np.expm1(gb_model.predict(xgb.DMatrix(X_valid))) y_true = np.expm1(Y_valid.values) MSE = mean_squared_error(y_true, y_pred) RMSE = np.sqrt(mean_squared_error(y_true, y_pred)) MAE = mean_absolute_error(y_true, y_pred) MAPE = mean_absolute_percentage_error(y_true, y_pred) R_2 = r2_score(y_true, y_pred) print('MSE:', round(MSE, 4), end=', ') print('RMSE:', round(RMSE, 4), end=', ') print('MAE:', round(MAE, 4), end=', ') print('MAPE:', round(MAPE*100, 2), '%', end=', ') print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差 eva_list.append([MSE, RMSE, MAE, MAPE, R_2]) data_df = pd.DataFrame.from_records(eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R_2']) data_df.mean()
MSE: 0.475, RMSE: 0.6892, MAE: 0.5507, MAPE: 1.86 %, R_2: 0.9046 MSE: 1.1415, RMSE: 1.0684, MAE: 0.9133, MAPE: 3.06 %, R_2: 0.6923 MSE: 0.7247, RMSE: 0.8513, MAE: 0.6606, MAPE: 2.32 %, R_2: 0.9247 MSE: 1.3652, RMSE: 1.1684, MAE: 0.9609, MAPE: 3.24 %, R_2: 0.6698 MSE: 0.4552, RMSE: 0.6747, MAE: 0.5732, MAPE: 1.94 %, R_2: 0.903 MSE: 0.6357, RMSE: 0.7973, MAE: 0.6374, MAPE: 2.2 %, R_2: 0.8771 MSE: 0.9972, RMSE: 0.9986, MAE: 0.752, MAPE: 2.47 %, R_2: 0.8141 MSE: 1.5218, RMSE: 1.2336, MAE: 1.0569, MAPE: 3.45 %, R_2: 0.2363 MSE: 0.6891, RMSE: 0.8301, MAE: 0.6825, MAPE: 2.22 %, R_2: 0.9005 MSE: 1.6864, RMSE: 1.2986, MAE: 1.0004, MAPE: 3.51 %, R_2: 0.6893
Out[27]:
MSE 0.969172 RMSE 0.961023 MAE 0.778783 MAPE 0.026288 R_2 0.761188 dtype: float64
In [28]:
eva_list = list() for (train_index, test_index) in kf.split(train_data): train = train_data.loc[train_index] valid = train_data.loc[test_index] X_train, Y_train = train[feature_cols], np.log1p(train['固定炭Fcad(%)']) X_valid, Y_valid = valid[feature_cols], np.log1p(valid['固定炭Fcad(%)']) dtrain = xgb.DMatrix(X_train, Y_train) dvalid = xgb.DMatrix(X_valid, Y_valid) watchlist = [(dvalid, 'eval')] gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist, early_stopping_rounds=50, verbose_eval=False) y_pred = np.expm1(gb_model.predict(xgb.DMatrix(X_valid))) y_true = np.expm1(Y_valid.values) MSE = mean_squared_error(y_true, y_pred) RMSE = np.sqrt(mean_squared_error(y_true, y_pred)) MAE = mean_absolute_error(y_true, y_pred) MAPE = mean_absolute_percentage_error(y_true, y_pred) R_2 = r2_score(y_true, y_pred) print('MSE:', round(MSE, 4), end=', ') print('RMSE:', round(RMSE, 4), end=', ') print('MAE:', round(MAE, 4), end=', ') print('MAPE:', round(MAPE*100, 2), '%', end=', ') print('R2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差 eva_list.append([MSE, RMSE, MAE, MAPE, R_2]) data_df = pd.DataFrame.from_records(eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R2']) data_df.mean()
MSE: 0.9821, RMSE: 0.991, MAE: 0.7698, MAPE: 1.44 %, R2: 0.9652 MSE: 1.2674, RMSE: 1.1258, MAE: 0.8756, MAPE: 1.64 %, R2: 0.9174 MSE: 0.9137, RMSE: 0.9559, MAE: 0.757, MAPE: 1.46 %, R2: 0.9864 MSE: 1.6012, RMSE: 1.2654, MAE: 1.0173, MAPE: 1.89 %, R2: 0.9292 MSE: 1.4694, RMSE: 1.2122, MAE: 0.8524, MAPE: 1.59 %, R2: 0.9142 MSE: 0.7552, RMSE: 0.869, MAE: 0.7202, MAPE: 1.39 %, R2: 0.9779 MSE: 0.5474, RMSE: 0.7398, MAE: 0.5467, MAPE: 1.0 %, R2: 0.9783 MSE: 1.2779, RMSE: 1.1305, MAE: 0.9452, MAPE: 1.73 %, R2: 0.853 MSE: 1.1908, RMSE: 1.0912, MAE: 0.9004, MAPE: 1.72 %, R2: 0.9597 MSE: 3.9312, RMSE: 1.9827, MAE: 1.2707, MAPE: 2.65 %, R2: 0.8775
Out[28]:
MSE 1.393623 RMSE 1.136351 MAE 0.865538 MAPE 0.016509 R2 0.935872 dtype: float64
In [ ]:
In [ ]: