coal_materials/.ipynb_checkpoints/20240123_煤炭-checkpoint.ipynb

392 KiB
Raw Permalink Blame History

In [1]:
import pandas as pd
In [2]:
data = pd.read_excel('./data/20240123/煤炭数据.xlsx', header=[1])
data.head()
Out[2]:
灰分(d) 挥发分(daf 活化剂种类 活化剂比例 混合方式 活化温度 活化时间 升温速率 比表面积 总孔体积 微孔体积 Unnamed: 11
0 11.25 17.06 KOH 3.0 研磨 800 1.0 5.0 2784.0 1.0830 0.853 刘宇昊\n煤基活性炭的制备及其电化学性能研究 学位论文
1 8.53 13.46 KOH 3.0 研磨 800 1.0 5.0 2934.0 1.2290 1.074 NaN
2 18.08 13.85 KOH 3.0 研磨 800 1.0 5.0 3059.0 1.3044 1.011 NaN
3 11.42 12.31 KOH 3.0 研磨 800 1.0 5.0 2365.0 0.8030 0.605 NaN
4 11.60 8.49 KOH 3.0 研磨 800 1.0 5.0 2988.0 1.2820 0.944 NaN
In [3]:
data.drop(columns=data.columns[-1], inplace=True)
In [4]:
data.columns
Out[4]:
Index(['灰分(d)', '挥发分(daf', '活化剂种类', '活化剂比例', '混合方式', '活化温度', '活化时间', '升温速率',
       '比表面积', '总孔体积', '微孔体积'],
      dtype='object')
In [5]:
object_cols = ['活化剂种类', '混合方式']
In [6]:
data = pd.get_dummies(data, columns=object_cols)
In [7]:
out_cols = ['比表面积', '总孔体积', '微孔体积']
feature_cols = [x for x in data.columns if x not in out_cols]
In [8]:
train_data = data.reset_index(drop=True)
In [9]:
train_data.shape
Out[9]:
(174, 12)
In [10]:
import xgboost as xgb
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
In [11]:
from sklearn.model_selection import KFold, train_test_split
kf = KFold(n_splits=6, shuffle=True, random_state=42)
In [12]:
import numpy as np
In [13]:
params_xgb = {"objective": 'reg:squarederror',
             "subsample": 0.8,
             "max_depth": 20,
             "eta": 0.01,
             "colsample_bytree": 0.9,}
num_boost_round = 1000
In [14]:
import matplotlib.pyplot as plt
In [15]:
plt.rcParams["font.sans-serif"] = ["SimHei"]  # 设置字体
plt.rcParams["axes.unicode_minus"] = False  # 正常显示负号
In [16]:
eva_total = list()
index_list = list()
eva_cols = ['MSE', 'RMSE', 'MAE', 'MAPE', 'R2']
for col in out_cols:
    eva_list = list()
    train_data = train_data[~train_data[col].isna()].reset_index(drop=True)
    cur_test = list()
    cur_real = list()
    for (train_index, test_index) in kf.split(train_data):
        train = train_data.loc[train_index]
        valid = train_data.loc[test_index]
        X_train, Y_train = train[feature_cols], train[col]
        X_valid, Y_valid = valid[feature_cols], valid[col]
        dtrain = xgb.DMatrix(X_train, Y_train)
        dvalid = xgb.DMatrix(X_valid, Y_valid)
        watchlist = [(dvalid, 'eval')]
        gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,
                        early_stopping_rounds=100, verbose_eval=False)
        y_pred = gb_model.predict(xgb.DMatrix(X_valid))
        y_true = Y_valid.values
        MSE = mean_squared_error(y_true, y_pred)
        RMSE = np.sqrt(mean_squared_error(y_true, y_pred))
        MAE = mean_absolute_error(y_true, y_pred)
        MAPE = mean_absolute_percentage_error(y_true, y_pred)
        R_2 = r2_score(y_true, y_pred)
        cur_test.extend(y_pred[:7])
        cur_real.extend(y_true[:7])
        print('MSE:', round(MSE, 4), end=', ')
        print('RMSE:', round(RMSE, 4), end=', ')
        print('MAE:', round(MAE, 4), end=', ')
        print('MAPE:', round(MAPE*100, 2), '%', end=', ')
        print('R_2:', round(R_2, 4))  #R方为负就说明拟合效果比平均值差
        eva_list.append([MSE, RMSE, MAE, MAPE, R_2])
    plt.figure(figsize=(12, 8))
    plt.plot(range(len(cur_test)), cur_real, 'o-', label='real')
    plt.plot(range(len(cur_test)), cur_test, '*-', label='pred')
    plt.legend(loc='best')
    plt.title(f'{col}')
    plt.show()
    eva_total.append(np.mean(eva_list, axis=0))
    index_list.append(f"{col}")
MSE: 159642.0815, RMSE: 399.5524, MAE: 274.5969, MAPE: 24.34 %, R_2: 0.7942
MSE: 193553.7041, RMSE: 439.9474, MAE: 328.694, MAPE: 35.35 %, R_2: 0.814
MSE: 220477.246, RMSE: 469.55, MAE: 306.6159, MAPE: 22.42 %, R_2: 0.7381
MSE: 290738.4856, RMSE: 539.2017, MAE: 385.6497, MAPE: 63.87 %, R_2: 0.6771
MSE: 198924.8773, RMSE: 446.01, MAE: 301.0048, MAPE: 31.69 %, R_2: 0.5602
MSE: 129420.5336, RMSE: 359.7507, MAE: 268.3835, MAPE: 38.83 %, R_2: 0.8213
No description has been provided for this image
MSE: 0.0829, RMSE: 0.2879, MAE: 0.193, MAPE: 55.19 %, R_2: 0.7327
MSE: 0.0702, RMSE: 0.2649, MAE: 0.1854, MAPE: 20.28 %, R_2: 0.8429
MSE: 0.0386, RMSE: 0.1964, MAE: 0.1393, MAPE: 19.43 %, R_2: 0.8072
MSE: 0.0497, RMSE: 0.2229, MAE: 0.1722, MAPE: 26.26 %, R_2: 0.8521
MSE: 0.0634, RMSE: 0.2519, MAE: 0.1608, MAPE: 37.87 %, R_2: 0.4996
MSE: 0.0488, RMSE: 0.2209, MAE: 0.1573, MAPE: 56.68 %, R_2: 0.8303
No description has been provided for this image
MSE: 0.0212, RMSE: 0.1456, MAE: 0.1027, MAPE: 19.78 %, R_2: 0.6947
MSE: 0.0632, RMSE: 0.2514, MAE: 0.1479, MAPE: 22.03 %, R_2: 0.6062
MSE: 0.0898, RMSE: 0.2997, MAE: 0.1758, MAPE: 27.82 %, R_2: 0.3597
MSE: 0.0234, RMSE: 0.153, MAE: 0.1014, MAPE: 21.71 %, R_2: 0.499
MSE: 0.0434, RMSE: 0.2083, MAE: 0.1318, MAPE: 46.08 %, R_2: 0.5236
MSE: 0.056, RMSE: 0.2366, MAE: 0.1601, MAPE: 23.9 %, R_2: 0.2317
No description has been provided for this image
In [17]:
pd.DataFrame.from_records(eva_total, index=index_list, columns=eva_cols)
Out[17]:
MSE RMSE MAE MAPE R2
比表面积 198792.821359 442.335351 310.824134 0.360828 0.734146
总孔体积 0.058931 0.240825 0.167995 0.359498 0.760793
微孔体积 0.049497 0.215764 0.136619 0.268889 0.485802
In [ ]: