22 KiB
22 KiB
In [1]:
import pandas as pd
In [6]:
data_0102 = pd.read_excel('./data/20240102/20240102.xlsx', header=[0,1,2]) data_0102
Out[6]:
Unnamed: 0_level_0 | 氢 | 碳 | 氮 | 氧 | 弹筒发热量 | 挥发分 | 固定炭 | |
---|---|---|---|---|---|---|---|---|
化验编号 | Had | Cad | Nad | Oad | Qb,ad | Vad | Fcad | |
Unnamed: 0_level_2 | (%) | (%) | (%) | (%) | MJ/kg | (%) | (%) | |
0 | 2720110529 | 3.93 | 70.18 | 0.81 | 25.079 | 27.820 | 32.06 | 55.68 |
1 | 2720096883 | 3.78 | 68.93 | 0.77 | 26.512 | 27.404 | 29.96 | 54.71 |
2 | 2720109084 | 3.48 | 69.60 | 0.76 | 26.148 | 27.578 | 29.31 | 55.99 |
3 | 2720084708 | 3.47 | 66.71 | 0.76 | 29.055 | 26.338 | 28.58 | 53.87 |
4 | 2720062721 | 3.87 | 68.78 | 0.80 | 26.542 | 27.280 | 29.97 | 54.78 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
223 | 2720030490 | 4.12 | 68.85 | 0.97 | 26.055 | 27.864 | 32.94 | 51.89 |
224 | 2720028633 | 3.97 | 67.04 | 0.94 | 28.043 | 27.368 | 31.88 | 51.38 |
225 | 2720028634 | 4.12 | 68.42 | 0.96 | 26.493 | 27.886 | 33.16 | 52.00 |
226 | 2720017683 | 3.88 | 67.42 | 0.94 | 27.760 | 26.616 | 31.65 | 50.56 |
227 | 2720017678 | 3.81 | 66.74 | 0.92 | 28.530 | 26.688 | 31.02 | 50.82 |
228 rows × 8 columns
In [10]:
cols = [''.join([y for y in x if 'Unnamed' not in y]) for x in data_0102.columns] cols
Out[10]:
['化验编号', '氢Had(%)', '碳Cad(%)', '氮Nad(%)', '氧Oad(%)', '弹筒发热量Qb,adMJ/kg', '挥发分Vad(%)', '固定炭Fcad(%)']
In [11]:
data_0102.columns = cols
In [27]:
import xgboost as xgb
In [16]:
feature_cols = cols[1:6] feature_cols
Out[16]:
['氢Had(%)', '碳Cad(%)', '氮Nad(%)', '氧Oad(%)', '弹筒发热量Qb,adMJ/kg']
In [44]:
train_data = data_0102.copy() train_data.head()
Out[44]:
化验编号 | 氢Had(%) | 碳Cad(%) | 氮Nad(%) | 氧Oad(%) | 弹筒发热量Qb,adMJ/kg | 挥发分Vad(%) | 固定炭Fcad(%) | |
---|---|---|---|---|---|---|---|---|
0 | 2720110529 | 3.93 | 70.18 | 0.81 | 25.079 | 27.820 | 32.06 | 55.68 |
1 | 2720096883 | 3.78 | 68.93 | 0.77 | 26.512 | 27.404 | 29.96 | 54.71 |
2 | 2720109084 | 3.48 | 69.60 | 0.76 | 26.148 | 27.578 | 29.31 | 55.99 |
3 | 2720084708 | 3.47 | 66.71 | 0.76 | 29.055 | 26.338 | 28.58 | 53.87 |
4 | 2720062721 | 3.87 | 68.78 | 0.80 | 26.542 | 27.280 | 29.97 | 54.78 |
In [19]:
import seaborn as sns
In [21]:
import numpy as np
In [23]:
train_data.reset_index(drop=True, inplace=True)
In [24]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
In [40]:
param_xgb = {"silent": True, "obj": 'reg:linear', "subsample": 1, "max_depth": 15, "eta": 0.3, "gamma": 0, "lambda": 1, "alpha": 0, "colsample_bytree": 0.9,} num_round = 1000
In [41]:
from sklearn.model_selection import KFold, train_test_split kf = KFold(n_splits=10, shuffle=True, random_state=42)
In [43]:
eva_list = list() for (train_index, test_index) in kf.split(train_data): train = train_data.loc[train_index] valid = train_data.loc[test_index] X_train, Y_train = train[feature_cols], np.log1p(train['挥发分Vad(%)']) X_valid, Y_valid = valid[feature_cols], np.log1p(valid['挥发分Vad(%)']) dtrain = xgb.DMatrix(X_train, Y_train) dvalid = xgb.DMatrix(X_valid, Y_valid) watchlist = [(dvalid, 'eval')] gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist, early_stopping_rounds=50, verbose_eval=False) y_pred = np.expm1(gb_model.predict(xgb.DMatrix(X_valid))) y_true = np.expm1(Y_valid.values) MSE = mean_squared_error(y_true, y_pred) RMSE = np.sqrt(mean_squared_error(y_true, y_pred)) MAE = mean_absolute_error(y_true, y_pred) MAPE = mean_absolute_percentage_error(y_true, y_pred) R_2 = r2_score(y_true, y_pred) print('MSE:', round(MSE, 4), end=', ') print('RMSE:', round(RMSE, 4), end=', ') print('MAE:', round(MAE, 4), end=', ') print('MAPE:', round(MAPE*100, 2), '%', end=', ') print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差 eva_list.append([MSE, RMSE, MAE, MAPE, R_2]) data_df = pd.DataFrame.from_records(eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R_2']) data_df.mean()
MSE: 1.9436, RMSE: 1.3941, MAE: 1.1861, MAPE: 3.94 %, R_2: 0.6095 MSE: 1.8735, RMSE: 1.3688, MAE: 1.132, MAPE: 3.77 %, R_2: 0.495 MSE: 2.0587, RMSE: 1.4348, MAE: 1.0706, MAPE: 4.08 %, R_2: 0.7862 MSE: 1.9298, RMSE: 1.3892, MAE: 1.1469, MAPE: 3.84 %, R_2: 0.5332 MSE: 1.4583, RMSE: 1.2076, MAE: 1.097, MAPE: 3.67 %, R_2: 0.6894 MSE: 2.0822, RMSE: 1.443, MAE: 1.1645, MAPE: 3.88 %, R_2: 0.5975 MSE: 1.3521, RMSE: 1.1628, MAE: 0.9905, MAPE: 3.37 %, R_2: 0.7479 MSE: 1.4057, RMSE: 1.1856, MAE: 0.9998, MAPE: 3.3 %, R_2: 0.2946 MSE: 2.2274, RMSE: 1.4925, MAE: 1.2638, MAPE: 4.19 %, R_2: 0.6785 MSE: 1.4866, RMSE: 1.2193, MAE: 1.0797, MAPE: 3.67 %, R_2: 0.7261
Out[43]:
MSE 1.781792 RMSE 1.329760 MAE 1.113084 MAPE 0.037719 R_2 0.615796 dtype: float64
In [48]:
eva_list = list() for (train_index, test_index) in kf.split(train_data): train = train_data.loc[train_index] valid = train_data.loc[test_index] X_train, Y_train = train[feature_cols], np.log1p(train['固定炭Fcad(%)']) X_valid, Y_valid = valid[feature_cols], np.log1p(valid['固定炭Fcad(%)']) dtrain = xgb.DMatrix(X_train, Y_train) dvalid = xgb.DMatrix(X_valid, Y_valid) watchlist = [(dvalid, 'eval')] gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist, early_stopping_rounds=50, verbose_eval=False) y_pred = np.expm1(gb_model.predict(xgb.DMatrix(X_valid))) y_true = np.expm1(Y_valid.values) MSE = mean_squared_error(y_true, y_pred) RMSE = np.sqrt(mean_squared_error(y_true, y_pred)) MAE = mean_absolute_error(y_true, y_pred) MAPE = mean_absolute_percentage_error(y_true, y_pred) R_2 = r2_score(y_true, y_pred) print('MSE:', round(MSE, 4), end=', ') print('RMSE:', round(RMSE, 4), end=', ') print('MAE:', round(MAE, 4), end=', ') print('MAPE:', round(MAPE*100, 2), '%', end=', ') print('R2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差 eva_list.append([MSE, RMSE, MAE, MAPE, R_2]) data_df = pd.DataFrame.from_records(eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R2']) data_df.mean()
MSE: 4.6724, RMSE: 2.1616, MAE: 1.7297, MAPE: 3.42 %, R2: 0.8346 MSE: 3.0512, RMSE: 1.7468, MAE: 1.4485, MAPE: 2.62 %, R2: 0.8011 MSE: 7.6672, RMSE: 2.769, MAE: 1.951, MAPE: 4.56 %, R2: 0.8856 MSE: 4.0334, RMSE: 2.0083, MAE: 1.487, MAPE: 2.77 %, R2: 0.8216 MSE: 2.6382, RMSE: 1.6243, MAE: 1.1551, MAPE: 2.12 %, R2: 0.846 MSE: 5.8097, RMSE: 2.4103, MAE: 1.8683, MAPE: 3.8 %, R2: 0.83 MSE: 2.3446, RMSE: 1.5312, MAE: 1.1294, MAPE: 2.28 %, R2: 0.9069 MSE: 3.0069, RMSE: 1.734, MAE: 1.3782, MAPE: 2.46 %, R2: 0.6541 MSE: 4.1652, RMSE: 2.0409, MAE: 1.5685, MAPE: 3.2 %, R2: 0.859 MSE: 4.2023, RMSE: 2.05, MAE: 1.6284, MAPE: 3.2 %, R2: 0.869
Out[48]:
MSE 4.159107 RMSE 2.007631 MAE 1.534427 MAPE 0.030424 R2 0.830794 dtype: float64
In [67]:
Out[67]:
MSE | RMSE | MAE | MAPE | R2 | |
---|---|---|---|---|---|
十折交叉验证均值 | 4.159107 | 2.007631 | 1.534427 | 0.030424 | 0.830794 |
In [ ]: