606 KiB
606 KiB
In [1]:
import pandas as pd
In [2]:
data = pd.read_excel('./data/20240123/煤沥青数据.xlsx') data.head()
Out[2]:
碳源 | 共碳化物质 | 共碳化物/煤沥青 | 加热次数 | 是否有碳化过程 | 模板剂种类 | 模板剂比例 | KOH与煤沥青比例 | 活化温度 | 升温速率 | 活化时间 | 混合方式 | 比表面积 | 总孔体积 | 微孔体积 | 平均孔径 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 煤沥青 | 无 | 0.0 | 1 | 否 | 自制氧化钙 | 1.0 | 1.0 | 500 | 5 | 2.0 | 溶剂 | 908.07 | 0.40 | 0.34 | 1.75 |
1 | 煤沥青 | 无 | 0.0 | 1 | 否 | 自制氧化钙 | 1.0 | 0.5 | 600 | 5 | 2.0 | 溶剂 | 953.95 | 0.66 | 0.35 | 2.76 |
2 | 煤沥青 | 无 | 0.0 | 1 | 否 | 自制氧化钙 | 1.0 | 1.0 | 600 | 5 | 2.0 | 溶剂 | 1388.62 | 0.61 | 0.53 | 1.77 |
3 | 煤沥青 | 无 | 0.0 | 1 | 否 | 自制氧化钙 | 1.0 | 2.0 | 600 | 5 | 2.0 | 溶剂 | 1444.63 | 0.59 | 0.55 | 1.62 |
4 | 煤沥青 | 无 | 0.0 | 2 | 是 | 自制碱式碳酸镁 | 1.0 | 1.0 | 600 | 5 | 2.0 | 溶剂 | 1020.99 | 0.45 | 0.35 | 1.77 |
In [3]:
data.shape
Out[3]:
(149, 16)
In [4]:
data.columns
Out[4]:
Index(['碳源', '共碳化物质', '共碳化物/煤沥青', '加热次数', '是否有碳化过程', '模板剂种类', '模板剂比例', 'KOH与煤沥青比例', '活化温度', '升温速率', '活化时间', '混合方式', '比表面积', '总孔体积', '微孔体积', '平均孔径'], dtype='object')
In [5]:
data.drop(columns=['碳源'], inplace=True)
In [6]:
object_cols = ['共碳化物质', '是否有碳化过程', '模板剂种类', '混合方式']
In [7]:
data = pd.get_dummies(data, columns=object_cols)
In [8]:
out_cols = ['比表面积', '总孔体积', '微孔体积', '平均孔径'] feature_cols = [x for x in data.columns if x not in out_cols]
In [9]:
train_data = data.reset_index(drop=True)
In [10]:
train_data.shape
Out[10]:
(149, 40)
In [11]:
import xgboost as xgb from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
In [12]:
from sklearn.model_selection import KFold, train_test_split kf = KFold(n_splits=5, shuffle=True, random_state=666)
In [13]:
import numpy as np
In [14]:
params_xgb = {"objective": 'reg:squarederror', "subsample": 0.9, "max_depth": 20, "eta": 0.01, "colsample_bytree": 0.9,} num_boost_round = 1000
In [15]:
import matplotlib.pyplot as plt
In [16]:
plt.rcParams["font.sans-serif"] = ["SimHei"] # 设置字体 plt.rcParams["axes.unicode_minus"] = False # 正常显示负号
In [18]:
eva_total = list() index_list = list() eva_cols = ['MSE', 'RMSE', 'MAE', 'MAPE', 'R2'] for col in out_cols: eva_list = list() train_data = train_data[~train_data[col].isna()].reset_index(drop=True) cur_test = list() cur_real = list() for (train_index, test_index) in kf.split(train_data): train = train_data.loc[train_index] valid = train_data.loc[test_index] X_train, Y_train = train[feature_cols], train[col] X_valid, Y_valid = valid[feature_cols], valid[col] dtrain = xgb.DMatrix(X_train, Y_train) dvalid = xgb.DMatrix(X_valid, Y_valid) watchlist = [(dvalid, 'eval')] gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist, early_stopping_rounds=100, verbose_eval=False) y_pred = gb_model.predict(xgb.DMatrix(X_valid)) y_true = Y_valid.values MSE = mean_squared_error(y_true, y_pred) RMSE = np.sqrt(mean_squared_error(y_true, y_pred)) MAE = mean_absolute_error(y_true, y_pred) MAPE = mean_absolute_percentage_error(y_true, y_pred) R_2 = r2_score(y_true, y_pred) cur_test.extend(y_pred[:7]) cur_real.extend(y_true[:7]) print('MSE:', round(MSE, 4), end=', ') print('RMSE:', round(RMSE, 4), end=', ') print('MAE:', round(MAE, 4), end=', ') print('MAPE:', round(MAPE*100, 2), '%', end=', ') print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差 eva_list.append([MSE, RMSE, MAE, MAPE, R_2]) plt.figure(figsize=(12, 8)) plt.plot(range(len(cur_test)), cur_real, 'o-', label='real') plt.plot(range(len(cur_test)), cur_test, '*-', label='pred') plt.legend(loc='best') plt.title(f'{col}') plt.show() eva_total.append(np.mean(eva_list, axis=0)) index_list.append(f"{col}")
MSE: 146052.1781, RMSE: 382.1677, MAE: 335.8072, MAPE: 27.62 %, R_2: 0.3237 MSE: 457536.2053, RMSE: 676.4142, MAE: 524.2504, MAPE: 436.7 %, R_2: 0.4597 MSE: 426986.1964, RMSE: 653.4418, MAE: 517.7005, MAPE: 28.25 %, R_2: 0.1735 MSE: 276509.2691, RMSE: 525.8415, MAE: 387.3172, MAPE: 32.43 %, R_2: 0.4786 MSE: 300204.7099, RMSE: 547.9094, MAE: 395.1222, MAPE: 314.87 %, R_2: 0.3381 MSE: 243884.6623, RMSE: 493.8468, MAE: 382.9586, MAPE: 1077.01 %, R_2: 0.6543 MSE: 380516.2705, RMSE: 616.86, MAE: 528.3397, MAPE: 42.43 %, R_2: 0.294 MSE: 457352.6686, RMSE: 676.2785, MAE: 515.0433, MAPE: 547.78 %, R_2: 0.5355 MSE: 275148.3579, RMSE: 524.5459, MAE: 464.9701, MAPE: 48.3 %, R_2: 0.3033 MSE: 215299.6743, RMSE: 464.004, MAE: 385.4702, MAPE: 20.69 %, R_2: 0.4055
MSE: 0.0565, RMSE: 0.2377, MAE: 0.1623, MAPE: 30.73 %, R_2: 0.7346 MSE: 0.1443, RMSE: 0.3798, MAE: 0.2874, MAPE: 154.87 %, R_2: 0.801 MSE: 0.3168, RMSE: 0.5628, MAE: 0.4358, MAPE: 43.0 %, R_2: 0.4067 MSE: 0.1148, RMSE: 0.3389, MAE: 0.2794, MAPE: 29.47 %, R_2: 0.597 MSE: 0.1082, RMSE: 0.329, MAE: 0.2451, MAPE: 125.28 %, R_2: 0.3208 MSE: 0.0987, RMSE: 0.3141, MAE: 0.2595, MAPE: 338.62 %, R_2: 0.6563 MSE: 0.1457, RMSE: 0.3817, MAE: 0.2933, MAPE: 40.45 %, R_2: 0.3009 MSE: 0.1538, RMSE: 0.3922, MAE: 0.3011, MAPE: 441.83 %, R_2: 0.4244 MSE: 0.1302, RMSE: 0.3609, MAE: 0.2923, MAPE: 46.21 %, R_2: 0.1553 MSE: 0.0737, RMSE: 0.2715, MAE: 0.2209, MAPE: 22.16 %, R_2: 0.6708
MSE: 0.0334, RMSE: 0.1829, MAE: 0.1388, MAPE: 28.81 %, R_2: -0.1911 MSE: 0.0604, RMSE: 0.2457, MAE: 0.1958, MAPE: 1278.96 %, R_2: 0.6126 MSE: 0.0646, RMSE: 0.2542, MAE: 0.1992, MAPE: 41.7 %, R_2: 0.3841 MSE: 0.0459, RMSE: 0.2142, MAE: 0.153, MAPE: 38.47 %, R_2: 0.563 MSE: 0.0213, RMSE: 0.1459, MAE: 0.1258, MAPE: 216.58 %, R_2: 0.6774 MSE: 0.0332, RMSE: 0.1822, MAE: 0.1545, MAPE: 1355.85 %, R_2: 0.7458 MSE: 0.0534, RMSE: 0.231, MAE: 0.1976, MAPE: 63.72 %, R_2: 0.19 MSE: 0.0217, RMSE: 0.1474, MAE: 0.1131, MAPE: 1044.2 %, R_2: 0.7267 MSE: 0.09, RMSE: 0.3, MAE: 0.261, MAPE: 76.62 %, R_2: 0.058 MSE: 0.0616, RMSE: 0.2482, MAE: 0.1984, MAPE: 44.34 %, R_2: 0.3462
MSE: 0.7362, RMSE: 0.858, MAE: 0.5955, MAPE: 23.28 %, R_2: 0.6539 MSE: 0.6157, RMSE: 0.7847, MAE: 0.4994, MAPE: 15.91 %, R_2: 0.3943 MSE: 0.2767, RMSE: 0.526, MAE: 0.4052, MAPE: 18.6 %, R_2: 0.6936 MSE: 0.5095, RMSE: 0.7138, MAE: 0.4774, MAPE: 16.58 %, R_2: 0.7721 MSE: 2.0145, RMSE: 1.4193, MAE: 0.9327, MAPE: 24.89 %, R_2: 0.0073 MSE: 1.4449, RMSE: 1.202, MAE: 0.5396, MAPE: 13.41 %, R_2: 0.2873 MSE: 0.4101, RMSE: 0.6404, MAE: 0.4024, MAPE: 16.32 %, R_2: -1.8128 MSE: 1.4384, RMSE: 1.1993, MAE: 0.8923, MAPE: 39.29 %, R_2: -0.313 MSE: 0.3805, RMSE: 0.6168, MAE: 0.4343, MAPE: 16.0 %, R_2: 0.6971 MSE: 0.0937, RMSE: 0.3061, MAE: 0.2214, MAPE: 9.25 %, R_2: 0.9435
In [19]:
pd.DataFrame.from_records(eva_total, index=index_list, columns=eva_cols)
Out[19]:
MSE | RMSE | MAE | MAPE | R2 | |
---|---|---|---|---|---|
比表面积 | 317949.019239 | 556.130983 | 443.697934 | 2.576080 | 0.396622 |
总孔体积 | 0.134278 | 0.356866 | 0.277702 | 1.272616 | 0.506787 |
微孔体积 | 0.048545 | 0.215159 | 0.173718 | 4.189249 | 0.411268 |
平均孔径 | 0.792019 | 0.826652 | 0.540024 | 0.193539 | 0.232322 |
In [19]:
pd.DataFrame.from_records(eva_total, index=index_list, columns=eva_cols)
Out[19]:
MSE | RMSE | MAE | MAPE | R2 | |
---|---|---|---|---|---|
比表面积 | 315103.064636 | 547.014045 | 398.015647 | 0.976955 | 0.466536 |
总孔体积 | 0.127985 | 0.353757 | 0.268668 | 1.179238 | 0.470801 |
微孔体积 | 0.044655 | 0.207595 | 0.164963 | 4.548583 | 0.536350 |
平均孔径 | 0.801410 | 0.828470 | 0.533512 | 0.184272 | 0.302607 |
In [ ]: