coal_materials/20240123_煤炭.ipynb at master

407 KiB

Raw Permalink Blame History

In [1]:

import pandas as pd

In [2]:

data = pd.read_excel('./data/20240123/煤炭数据.xlsx', header=[1])
data.head()

Out[2]:

	灰分(d)	挥发分(daf）	活化剂种类	活化剂比例	混合方式	活化温度	活化时间	升温速率	比表面积	总孔体积	微孔体积	Unnamed: 11
0	11.25	17.06	KOH	3.0	研磨	800	1.0	5.0	2784.0	1.0830	0.853	刘宇昊\n煤基活性炭的制备及其电化学性能研究学位论文
1	8.53	13.46	KOH	3.0	研磨	800	1.0	5.0	2934.0	1.2290	1.074	NaN
2	18.08	13.85	KOH	3.0	研磨	800	1.0	5.0	3059.0	1.3044	1.011	NaN
3	11.42	12.31	KOH	3.0	研磨	800	1.0	5.0	2365.0	0.8030	0.605	NaN
4	11.60	8.49	KOH	3.0	研磨	800	1.0	5.0	2988.0	1.2820	0.944	NaN

In [3]:

data.drop(columns=data.columns[-1], inplace=True)

In [4]:

data.columns

Out[4]:

Index(['灰分(d)', '挥发分(daf）', '活化剂种类', '活化剂比例', '混合方式', '活化温度', '活化时间', '升温速率',
       '比表面积', '总孔体积', '微孔体积'],
      dtype='object')

In [5]:

object_cols = ['活化剂种类', '混合方式']

In [6]:

data = pd.get_dummies(data, columns=object_cols)

In [7]:

out_cols = ['比表面积', '总孔体积', '微孔体积']
feature_cols = [x for x in data.columns if x not in out_cols]

In [8]:

train_data = data.reset_index(drop=True)

In [9]:

train_data.shape

Out[9]:

(174, 12)

In [10]:

import xgboost as xgb
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score

In [11]:

from sklearn.model_selection import KFold, train_test_split
kf = KFold(n_splits=6, shuffle=True, random_state=42)

In [12]:

import numpy as np

In [18]:

params_xgb = {"objective": 'reg:squarederror',
             "subsample": 0.8,
             "max_depth": 20,
             "eta": 0.01,
             "colsample_bytree": 0.9,}
num_boost_round = 1000

In [19]:

import matplotlib.pyplot as plt

In [20]:

plt.rcParams["font.sans-serif"] = ["SimHei"]  # 设置字体
plt.rcParams["axes.unicode_minus"] = False  # 正常显示负号

In [21]:

eva_total = list()
index_list = list()
eva_cols = ['MSE', 'RMSE', 'MAE', 'MAPE', 'R2']
for col in out_cols:
    eva_list = list()
    train_data = train_data[~train_data[col].isna()].reset_index(drop=True)
    cur_test = list()
    cur_real = list()
    for (train_index, test_index) in kf.split(train_data):
        train = train_data.loc[train_index]
        valid = train_data.loc[test_index]
        X_train, Y_train = train[feature_cols], train[col]
        X_valid, Y_valid = valid[feature_cols], valid[col]
        dtrain = xgb.DMatrix(X_train, Y_train)
        dvalid = xgb.DMatrix(X_valid, Y_valid)
        watchlist = [(dvalid, 'eval')]
        gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,
                        early_stopping_rounds=100, verbose_eval=False)
        y_pred = gb_model.predict(xgb.DMatrix(X_valid))
        y_true = Y_valid.values
        MSE = mean_squared_error(y_true, y_pred)
        RMSE = np.sqrt(mean_squared_error(y_true, y_pred))
        MAE = mean_absolute_error(y_true, y_pred)
        MAPE = mean_absolute_percentage_error(y_true, y_pred)
        R_2 = r2_score(y_true, y_pred)
        cur_test.extend(y_pred[:7])
        cur_real.extend(y_true[:7])
        print('MSE:', round(MSE, 4), end=', ')
        print('RMSE:', round(RMSE, 4), end=', ')
        print('MAE:', round(MAE, 4), end=', ')
        print('MAPE:', round(MAPE*100, 2), '%', end=', ')
        print('R_2:', round(R_2, 4))  #R方为负就说明拟合效果比平均值差
        eva_list.append([MSE, RMSE, MAE, MAPE, R_2])
    plt.figure(figsize=(12, 8))
    plt.plot(range(len(cur_test)), cur_real, 'o-', label='real')
    plt.plot(range(len(cur_test)), cur_test, '*-', label='pred')
    plt.legend(loc='best')
    plt.title(f'{col}')
    plt.show()
    eva_total.append(np.mean(eva_list, axis=0))
    index_list.append(f"{col}")

MSE: 83933.6379, RMSE: 289.713, MAE: 205.8723, MAPE: 16.71 %, R_2: 0.8599
MSE: 151368.0568, RMSE: 389.0605, MAE: 331.2811, MAPE: 25.0 %, R_2: 0.8364
MSE: 179281.5189, RMSE: 423.4165, MAE: 293.9454, MAPE: 16.84 %, R_2: 0.7792
MSE: 230625.1215, RMSE: 480.2344, MAE: 288.9958, MAPE: 56.39 %, R_2: 0.5948
MSE: 212246.0972, RMSE: 460.7017, MAE: 312.8322, MAPE: 39.54 %, R_2: 0.6924
MSE: 231044.2089, RMSE: 480.6706, MAE: 359.0907, MAPE: 18.98 %, R_2: 0.6907

No description has been provided for this image

MSE: 0.0309, RMSE: 0.1758, MAE: 0.1127, MAPE: 14.96 %, R_2: 0.8353
MSE: 0.0477, RMSE: 0.2184, MAE: 0.1858, MAPE: 23.49 %, R_2: 0.8287
MSE: 0.0656, RMSE: 0.2561, MAE: 0.1692, MAPE: 17.14 %, R_2: 0.8098
MSE: 0.0338, RMSE: 0.184, MAE: 0.122, MAPE: 18.98 %, R_2: 0.7735
MSE: 0.0511, RMSE: 0.2261, MAE: 0.1652, MAPE: 36.1 %, R_2: 0.8148
MSE: 0.0684, RMSE: 0.2615, MAE: 0.192, MAPE: 18.13 %, R_2: 0.7924

MSE: 0.0185, RMSE: 0.1362, MAE: 0.0952, MAPE: 18.49 %, R_2: 0.7329
MSE: 0.0503, RMSE: 0.2242, MAE: 0.1338, MAPE: 21.21 %, R_2: 0.6868
MSE: 0.0768, RMSE: 0.2771, MAE: 0.1638, MAPE: 26.24 %, R_2: 0.4523
MSE: 0.0222, RMSE: 0.1489, MAE: 0.0975, MAPE: 21.11 %, R_2: 0.5256
MSE: 0.0395, RMSE: 0.1987, MAE: 0.1253, MAPE: 42.88 %, R_2: 0.5666
MSE: 0.0525, RMSE: 0.229, MAE: 0.1566, MAPE: 23.66 %, R_2: 0.2799

In [22]:

pd.DataFrame.from_records(eva_total, index=index_list, columns=eva_cols)

Out[22]:

	MSE	RMSE	MAE	MAPE	R2
比表面积	181416.440212	420.632794	298.669574	0.289103	0.742225
总孔体积	0.049592	0.220315	0.157799	0.214682	0.809092
微孔体积	0.043286	0.202355	0.128714	0.255982	0.540697

In [ ]:

407 KiB Raw Permalink Blame History Unescape Escape

407 KiB

Raw Permalink Blame History