coal_materials/20240102.ipynb

20 KiB
Raw Permalink Blame History

In [1]:
import pandas as pd
In [2]:
data_0102 = pd.read_excel('./data/20240102/20240102.xlsx', header=[0,1,2])
data_0102
Out[2]:
Unnamed: 0_level_0 弹筒发热量 挥发分 固定炭
化验编号 Had Cad Nad Oad Qb,ad Vad Fcad
Unnamed: 0_level_2 (%) (%) (%) (%) MJ/kg (%) (%)
0 2720110529 3.93 70.18 0.81 25.079 27.820 32.06 55.68
1 2720096883 3.78 68.93 0.77 26.512 27.404 29.96 54.71
2 2720109084 3.48 69.60 0.76 26.148 27.578 29.31 55.99
3 2720084708 3.47 66.71 0.76 29.055 26.338 28.58 53.87
4 2720062721 3.87 68.78 0.80 26.542 27.280 29.97 54.78
... ... ... ... ... ... ... ... ...
223 2720030490 4.12 68.85 0.97 26.055 27.864 32.94 51.89
224 2720028633 3.97 67.04 0.94 28.043 27.368 31.88 51.38
225 2720028634 4.12 68.42 0.96 26.493 27.886 33.16 52.00
226 2720017683 3.88 67.42 0.94 27.760 26.616 31.65 50.56
227 2720017678 3.81 66.74 0.92 28.530 26.688 31.02 50.82

228 rows × 8 columns

In [3]:
cols = [''.join([y for y in x if 'Unnamed' not in y]) for x in data_0102.columns]
cols
Out[3]:
['化验编号',
 '氢Had(%)',
 '碳Cad(%)',
 '氮Nad(%)',
 '氧Oad(%)',
 '弹筒发热量Qb,adMJ/kg',
 '挥发分Vad(%)',
 '固定炭Fcad(%)']
In [4]:
data_0102.columns = cols
In [5]:
import xgboost as xgb
In [6]:
feature_cols = cols[1:6]
feature_cols
Out[6]:
['氢Had(%)', '碳Cad(%)', '氮Nad(%)', '氧Oad(%)', '弹筒发热量Qb,adMJ/kg']
In [7]:
train_data = data_0102.copy()
train_data.head()
Out[7]:
化验编号 氢Had(%) 碳Cad(%) 氮Nad(%) 氧Oad(%) 弹筒发热量Qb,adMJ/kg 挥发分Vad(%) 固定炭Fcad(%)
0 2720110529 3.93 70.18 0.81 25.079 27.820 32.06 55.68
1 2720096883 3.78 68.93 0.77 26.512 27.404 29.96 54.71
2 2720109084 3.48 69.60 0.76 26.148 27.578 29.31 55.99
3 2720084708 3.47 66.71 0.76 29.055 26.338 28.58 53.87
4 2720062721 3.87 68.78 0.80 26.542 27.280 29.97 54.78
In [8]:
import numpy as np
In [9]:
train_data.reset_index(drop=True, inplace=True)
In [10]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
In [25]:
params_xgb = {"objective": 'reg:squarederror',
             "subsample": 1,
             "max_depth": 15,
             "eta": 0.3,
             "gamma": 0,
             "lambda": 1,
             "alpha": 0,
             "colsample_bytree": 0.9,}
num_boost_round = 1000
In [26]:
from sklearn.model_selection import KFold, train_test_split
kf = KFold(n_splits=10, shuffle=True, random_state=42)
In [27]:
eva_list = list()
for (train_index, test_index) in kf.split(train_data):
    train = train_data.loc[train_index]
    valid = train_data.loc[test_index]
    X_train, Y_train = train[feature_cols], np.log1p(train['挥发分Vad(%)'])
    X_valid, Y_valid = valid[feature_cols], np.log1p(valid['挥发分Vad(%)'])
    dtrain = xgb.DMatrix(X_train, Y_train)
    dvalid = xgb.DMatrix(X_valid, Y_valid)
    watchlist = [(dvalid, 'eval')]
    gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,
                    early_stopping_rounds=50, verbose_eval=False)
    y_pred = np.expm1(gb_model.predict(xgb.DMatrix(X_valid)))
    y_true = np.expm1(Y_valid.values)
    MSE = mean_squared_error(y_true, y_pred)
    RMSE = np.sqrt(mean_squared_error(y_true, y_pred))
    MAE = mean_absolute_error(y_true, y_pred)
    MAPE = mean_absolute_percentage_error(y_true, y_pred)
    R_2 = r2_score(y_true, y_pred)
    print('MSE:', round(MSE, 4), end=', ')
    print('RMSE:', round(RMSE, 4), end=', ')
    print('MAE:', round(MAE, 4), end=', ')
    print('MAPE:', round(MAPE*100, 2), '%', end=', ')
    print('R_2:', round(R_2, 4))  #R方为负就说明拟合效果比平均值差
    eva_list.append([MSE, RMSE, MAE, MAPE, R_2])
data_df = pd.DataFrame.from_records(eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R_2'])
data_df.mean()
MSE: 0.475, RMSE: 0.6892, MAE: 0.5507, MAPE: 1.86 %, R_2: 0.9046
MSE: 1.1415, RMSE: 1.0684, MAE: 0.9133, MAPE: 3.06 %, R_2: 0.6923
MSE: 0.7247, RMSE: 0.8513, MAE: 0.6606, MAPE: 2.32 %, R_2: 0.9247
MSE: 1.3652, RMSE: 1.1684, MAE: 0.9609, MAPE: 3.24 %, R_2: 0.6698
MSE: 0.4552, RMSE: 0.6747, MAE: 0.5732, MAPE: 1.94 %, R_2: 0.903
MSE: 0.6357, RMSE: 0.7973, MAE: 0.6374, MAPE: 2.2 %, R_2: 0.8771
MSE: 0.9972, RMSE: 0.9986, MAE: 0.752, MAPE: 2.47 %, R_2: 0.8141
MSE: 1.5218, RMSE: 1.2336, MAE: 1.0569, MAPE: 3.45 %, R_2: 0.2363
MSE: 0.6891, RMSE: 0.8301, MAE: 0.6825, MAPE: 2.22 %, R_2: 0.9005
MSE: 1.6864, RMSE: 1.2986, MAE: 1.0004, MAPE: 3.51 %, R_2: 0.6893
Out[27]:
MSE     0.969172
RMSE    0.961023
MAE     0.778783
MAPE    0.026288
R_2     0.761188
dtype: float64
In [28]:
eva_list = list()
for (train_index, test_index) in kf.split(train_data):
    train = train_data.loc[train_index]
    valid = train_data.loc[test_index]
    X_train, Y_train = train[feature_cols], np.log1p(train['固定炭Fcad(%)'])
    X_valid, Y_valid = valid[feature_cols], np.log1p(valid['固定炭Fcad(%)'])
    dtrain = xgb.DMatrix(X_train, Y_train)
    dvalid = xgb.DMatrix(X_valid, Y_valid)
    watchlist = [(dvalid, 'eval')]
    gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,
                    early_stopping_rounds=50, verbose_eval=False)
    y_pred = np.expm1(gb_model.predict(xgb.DMatrix(X_valid)))
    y_true = np.expm1(Y_valid.values)
    MSE = mean_squared_error(y_true, y_pred)
    RMSE = np.sqrt(mean_squared_error(y_true, y_pred))
    MAE = mean_absolute_error(y_true, y_pred)
    MAPE = mean_absolute_percentage_error(y_true, y_pred)
    R_2 = r2_score(y_true, y_pred)
    print('MSE:', round(MSE, 4), end=', ')
    print('RMSE:', round(RMSE, 4), end=', ')
    print('MAE:', round(MAE, 4), end=', ')
    print('MAPE:', round(MAPE*100, 2), '%', end=', ')
    print('R2:', round(R_2, 4))  #R方为负就说明拟合效果比平均值差
    eva_list.append([MSE, RMSE, MAE, MAPE, R_2])
data_df = pd.DataFrame.from_records(eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R2'])
data_df.mean()
MSE: 0.9821, RMSE: 0.991, MAE: 0.7698, MAPE: 1.44 %, R2: 0.9652
MSE: 1.2674, RMSE: 1.1258, MAE: 0.8756, MAPE: 1.64 %, R2: 0.9174
MSE: 0.9137, RMSE: 0.9559, MAE: 0.757, MAPE: 1.46 %, R2: 0.9864
MSE: 1.6012, RMSE: 1.2654, MAE: 1.0173, MAPE: 1.89 %, R2: 0.9292
MSE: 1.4694, RMSE: 1.2122, MAE: 0.8524, MAPE: 1.59 %, R2: 0.9142
MSE: 0.7552, RMSE: 0.869, MAE: 0.7202, MAPE: 1.39 %, R2: 0.9779
MSE: 0.5474, RMSE: 0.7398, MAE: 0.5467, MAPE: 1.0 %, R2: 0.9783
MSE: 1.2779, RMSE: 1.1305, MAE: 0.9452, MAPE: 1.73 %, R2: 0.853
MSE: 1.1908, RMSE: 1.0912, MAE: 0.9004, MAPE: 1.72 %, R2: 0.9597
MSE: 3.9312, RMSE: 1.9827, MAE: 1.2707, MAPE: 2.65 %, R2: 0.8775
Out[28]:
MSE     1.393623
RMSE    1.136351
MAE     0.865538
MAPE    0.016509
R2      0.935872
dtype: float64
In [ ]:
 
In [ ]: