coal_materials/20240123_煤沥青.ipynb at master

456 KiB

Raw Permalink Blame History

In [1]:

import pandas as pd

In [2]:

data = pd.read_excel('./data/20240123/煤沥青数据.xlsx')
data.head()

Out[2]:

	碳源	共碳化物质	加热次数	是否有碳化过程	模板剂种类	模板剂比例	KOH与煤沥青比例	活化温度	升温速率	活化时间	混合方式	比表面积	总孔体积	微孔体积	平均孔径
0	煤沥青	无	1	否	自制氧化钙	1.0	1.0	500	5	2.0	溶剂	908.07	0.40	0.34	1.75
1	煤沥青	无	1	否	自制氧化钙	1.0	0.5	600	5	2.0	溶剂	953.95	0.66	0.35	2.76
2	煤沥青	无	1	否	自制氧化钙	1.0	1.0	600	5	2.0	溶剂	1388.62	0.61	0.53	1.77
3	煤沥青	无	1	否	自制氧化钙	1.0	2.0	600	5	2.0	溶剂	1444.63	0.59	0.55	1.62
4	煤沥青	无	2	是	自制碱式碳酸镁	1.0	1.0	600	5	2.0	溶剂	1020.99	0.45	0.35	1.77

In [3]:

data.shape

Out[3]:

(149, 16)

In [4]:

data.columns

Out[4]:

Index(['碳源', '共碳化物质', '共碳化物/煤沥青', '加热次数', '是否有碳化过程', '模板剂种类', '模板剂比例',
       'KOH与煤沥青比例', '活化温度', '升温速率', '活化时间', '混合方式', '比表面积', '总孔体积', '微孔体积',
       '平均孔径'],
      dtype='object')

In [5]:

data.drop(columns=['碳源'], inplace=True)

In [6]:

object_cols = ['共碳化物质', '是否有碳化过程', '模板剂种类', '混合方式']

In [7]:

data = pd.get_dummies(data, columns=object_cols)

In [8]:

out_cols = ['比表面积', '总孔体积', '微孔体积', '平均孔径']
feature_cols = [x for x in data.columns if x not in out_cols]

In [9]:

train_data = data.reset_index(drop=True)

In [10]:

train_data.shape

Out[10]:

(149, 40)

In [11]:

import xgboost as xgb
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score

In [12]:

from sklearn.model_selection import KFold, train_test_split
kf = KFold(n_splits=5, shuffle=True, random_state=666)

In [13]:

import numpy as np

In [14]:

params_xgb = {"objective": 'reg:squarederror',
             "subsample": 0.9,
             "max_depth": 20,
             "eta": 0.01,
             "colsample_bytree": 0.9,}
num_boost_round = 1000

In [15]:

import matplotlib.pyplot as plt

In [16]:

plt.rcParams["font.sans-serif"] = ["SimHei"]  # 设置字体
plt.rcParams["axes.unicode_minus"] = False  # 正常显示负号

In [17]:

eva_total = list()
index_list = list()
eva_cols = ['MSE', 'RMSE', 'MAE', 'MAPE', 'R2']
for col in out_cols:
    eva_list = list()
    train_data = train_data[~train_data[col].isna()].reset_index(drop=True)
    cur_test = list()
    cur_real = list()
    for (train_index, test_index) in kf.split(train_data):
        train = train_data.loc[train_index]
        valid = train_data.loc[test_index]
        X_train, Y_train = train[feature_cols], train[col]
        X_valid, Y_valid = valid[feature_cols], valid[col]
        dtrain = xgb.DMatrix(X_train, Y_train)
        dvalid = xgb.DMatrix(X_valid, Y_valid)
        watchlist = [(dvalid, 'eval')]
        gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,
                        early_stopping_rounds=100, verbose_eval=False)
        y_pred = gb_model.predict(xgb.DMatrix(X_valid))
        y_true = Y_valid.values
        MSE = mean_squared_error(y_true, y_pred)
        RMSE = np.sqrt(mean_squared_error(y_true, y_pred))
        MAE = mean_absolute_error(y_true, y_pred)
        MAPE = mean_absolute_percentage_error(y_true, y_pred)
        R_2 = r2_score(y_true, y_pred)
        cur_test.extend(y_pred[:7])
        cur_real.extend(y_true[:7])
        print('MSE:', round(MSE, 4), end=', ')
        print('RMSE:', round(RMSE, 4), end=', ')
        print('MAE:', round(MAE, 4), end=', ')
        print('MAPE:', round(MAPE*100, 2), '%', end=', ')
        print('R_2:', round(R_2, 4))  #R方为负就说明拟合效果比平均值差
        eva_list.append([MSE, RMSE, MAE, MAPE, R_2])
    plt.figure(figsize=(12, 8))
    plt.plot(range(len(cur_test)), cur_real, 'o-', label='real')
    plt.plot(range(len(cur_test)), cur_test, '*-', label='pred')
    plt.legend(loc='best')
    plt.title(f'{col}')
    plt.show()
    eva_total.append(np.mean(eva_list, axis=0))
    index_list.append(f"{col}")

MSE: 267691.4403, RMSE: 517.3891, MAE: 395.2788, MAPE: 94.67 %, R_2: 0.4467
MSE: 242169.4062, RMSE: 492.1071, MAE: 353.5184, MAPE: 153.84 %, R_2: 0.7103
MSE: 337963.1058, RMSE: 581.3459, MAE: 453.5923, MAPE: 368.53 %, R_2: 0.5508
MSE: 241296.272, RMSE: 491.2192, MAE: 378.0324, MAPE: 36.02 %, R_2: 0.5678
MSE: 393198.8331, RMSE: 627.0557, MAE: 494.652, MAPE: 424.8 %, R_2: 0.309

No description has been provided for this image

MSE: 0.1984, RMSE: 0.4454, MAE: 0.3543, MAPE: 72.07 %, R_2: 0.616
MSE: 0.1439, RMSE: 0.3794, MAE: 0.3062, MAPE: 224.83 %, R_2: 0.4173
MSE: 0.1073, RMSE: 0.3275, MAE: 0.2583, MAPE: 30.27 %, R_2: 0.6678
MSE: 0.1076, RMSE: 0.3281, MAE: 0.2422, MAPE: 39.55 %, R_2: 0.5426
MSE: 0.187, RMSE: 0.4324, MAE: 0.3131, MAPE: 389.39 %, R_2: 0.0647

MSE: 0.0303, RMSE: 0.1739, MAE: 0.1339, MAPE: 144.75 %, R_2: 0.6541
MSE: 0.0652, RMSE: 0.2554, MAE: 0.1954, MAPE: 55.86 %, R_2: 0.1165
MSE: 0.0546, RMSE: 0.2337, MAE: 0.1888, MAPE: 1337.75 %, R_2: 0.6439
MSE: 0.0312, RMSE: 0.1765, MAE: 0.1505, MAPE: 43.36 %, R_2: 0.5198
MSE: 0.0565, RMSE: 0.2377, MAE: 0.1762, MAPE: 496.94 %, R_2: 0.5316

MSE: 0.606, RMSE: 0.7785, MAE: 0.5382, MAPE: 19.71 %, R_2: 0.6204
MSE: 0.4154, RMSE: 0.6445, MAE: 0.4482, MAPE: 18.23 %, R_2: 0.7462
MSE: 1.7064, RMSE: 1.3063, MAE: 0.766, MAPE: 20.09 %, R_2: 0.1468
MSE: 0.7332, RMSE: 0.8563, MAE: 0.5696, MAPE: 25.1 %, R_2: -0.1811
MSE: 0.4071, RMSE: 0.638, MAE: 0.4065, MAPE: 13.74 %, R_2: 0.7213

In [18]:

pd.DataFrame.from_records(eva_total, index=index_list, columns=eva_cols)

Out[18]:

	MSE	RMSE	MAE	MAPE	R2
比表面积	296463.811484	541.823393	415.014783	2.155717	0.516912
总孔体积	0.148839	0.382557	0.294842	1.512200	0.461672
微孔体积	0.047554	0.215457	0.168962	4.157314	0.493163
平均孔径	0.773611	0.844710	0.545686	0.193752	0.410705

In [ ]:

456 KiB Raw Permalink Blame History

456 KiB

Raw Permalink Blame History