coal_materials/.ipynb_checkpoints/20240102-checkpoint.ipynb

654 lines
20 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "70ae2cb0-c6f0-4080-b894-2246c9d880e2",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "6a94278b-8f51-4edc-966b-4a32876a4536",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th>Unnamed: 0_level_0</th>\n",
" <th>氢</th>\n",
" <th>碳</th>\n",
" <th>氮</th>\n",
" <th>氧</th>\n",
" <th>弹筒发热量</th>\n",
" <th>挥发分</th>\n",
" <th>固定炭</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>化验编号</th>\n",
" <th>Had</th>\n",
" <th>Cad</th>\n",
" <th>Nad</th>\n",
" <th>Oad</th>\n",
" <th>Qb,ad</th>\n",
" <th>Vad</th>\n",
" <th>Fcad</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>Unnamed: 0_level_2</th>\n",
" <th>(%)</th>\n",
" <th>(%)</th>\n",
" <th>(%)</th>\n",
" <th>(%)</th>\n",
" <th>MJ/kg</th>\n",
" <th>(%)</th>\n",
" <th>(%)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2720110529</td>\n",
" <td>3.93</td>\n",
" <td>70.18</td>\n",
" <td>0.81</td>\n",
" <td>25.079</td>\n",
" <td>27.820</td>\n",
" <td>32.06</td>\n",
" <td>55.68</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2720096883</td>\n",
" <td>3.78</td>\n",
" <td>68.93</td>\n",
" <td>0.77</td>\n",
" <td>26.512</td>\n",
" <td>27.404</td>\n",
" <td>29.96</td>\n",
" <td>54.71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2720109084</td>\n",
" <td>3.48</td>\n",
" <td>69.60</td>\n",
" <td>0.76</td>\n",
" <td>26.148</td>\n",
" <td>27.578</td>\n",
" <td>29.31</td>\n",
" <td>55.99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2720084708</td>\n",
" <td>3.47</td>\n",
" <td>66.71</td>\n",
" <td>0.76</td>\n",
" <td>29.055</td>\n",
" <td>26.338</td>\n",
" <td>28.58</td>\n",
" <td>53.87</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2720062721</td>\n",
" <td>3.87</td>\n",
" <td>68.78</td>\n",
" <td>0.80</td>\n",
" <td>26.542</td>\n",
" <td>27.280</td>\n",
" <td>29.97</td>\n",
" <td>54.78</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>223</th>\n",
" <td>2720030490</td>\n",
" <td>4.12</td>\n",
" <td>68.85</td>\n",
" <td>0.97</td>\n",
" <td>26.055</td>\n",
" <td>27.864</td>\n",
" <td>32.94</td>\n",
" <td>51.89</td>\n",
" </tr>\n",
" <tr>\n",
" <th>224</th>\n",
" <td>2720028633</td>\n",
" <td>3.97</td>\n",
" <td>67.04</td>\n",
" <td>0.94</td>\n",
" <td>28.043</td>\n",
" <td>27.368</td>\n",
" <td>31.88</td>\n",
" <td>51.38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>225</th>\n",
" <td>2720028634</td>\n",
" <td>4.12</td>\n",
" <td>68.42</td>\n",
" <td>0.96</td>\n",
" <td>26.493</td>\n",
" <td>27.886</td>\n",
" <td>33.16</td>\n",
" <td>52.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>226</th>\n",
" <td>2720017683</td>\n",
" <td>3.88</td>\n",
" <td>67.42</td>\n",
" <td>0.94</td>\n",
" <td>27.760</td>\n",
" <td>26.616</td>\n",
" <td>31.65</td>\n",
" <td>50.56</td>\n",
" </tr>\n",
" <tr>\n",
" <th>227</th>\n",
" <td>2720017678</td>\n",
" <td>3.81</td>\n",
" <td>66.74</td>\n",
" <td>0.92</td>\n",
" <td>28.530</td>\n",
" <td>26.688</td>\n",
" <td>31.02</td>\n",
" <td>50.82</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>228 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0_level_0 氢 碳 氮 氧 弹筒发热量 挥发分 固定炭\n",
" 化验编号 Had Cad Nad Oad Qb,ad Vad Fcad\n",
" Unnamed: 0_level_2 (%) (%) (%) (%) MJ/kg (%) (%)\n",
"0 2720110529 3.93 70.18 0.81 25.079 27.820 32.06 55.68\n",
"1 2720096883 3.78 68.93 0.77 26.512 27.404 29.96 54.71\n",
"2 2720109084 3.48 69.60 0.76 26.148 27.578 29.31 55.99\n",
"3 2720084708 3.47 66.71 0.76 29.055 26.338 28.58 53.87\n",
"4 2720062721 3.87 68.78 0.80 26.542 27.280 29.97 54.78\n",
".. ... ... ... ... ... ... ... ...\n",
"223 2720030490 4.12 68.85 0.97 26.055 27.864 32.94 51.89\n",
"224 2720028633 3.97 67.04 0.94 28.043 27.368 31.88 51.38\n",
"225 2720028634 4.12 68.42 0.96 26.493 27.886 33.16 52.00\n",
"226 2720017683 3.88 67.42 0.94 27.760 26.616 31.65 50.56\n",
"227 2720017678 3.81 66.74 0.92 28.530 26.688 31.02 50.82\n",
"\n",
"[228 rows x 8 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_0102 = pd.read_excel('./data/20240102/20240102.xlsx', header=[0,1,2])\n",
"data_0102"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "f72789a6-f3fa-4ab1-8b62-999413958608",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['化验编号',\n",
" '氢Had(%)',\n",
" '碳Cad(%)',\n",
" '氮Nad(%)',\n",
" '氧Oad(%)',\n",
" '弹筒发热量Qb,adMJ/kg',\n",
" '挥发分Vad(%)',\n",
" '固定炭Fcad(%)']"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cols = [''.join([y for y in x if 'Unnamed' not in y]) for x in data_0102.columns]\n",
"cols"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "6ffb1989-3f45-4d1c-84c9-59b1045b7d9e",
"metadata": {},
"outputs": [],
"source": [
"data_0102.columns = cols"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "9c708cc0-9f1b-4669-a350-6d24cb720794",
"metadata": {},
"outputs": [],
"source": [
"import xgboost as xgb"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "103349e1-aa4a-427a-a489-9ab28787088b",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['氢Had(%)', '碳Cad(%)', '氮Nad(%)', '氧Oad(%)', '弹筒发热量Qb,adMJ/kg']"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"feature_cols = cols[1:6]\n",
"feature_cols"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "839e45dc-e9c8-4956-950b-035687469c81",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>化验编号</th>\n",
" <th>氢Had(%)</th>\n",
" <th>碳Cad(%)</th>\n",
" <th>氮Nad(%)</th>\n",
" <th>氧Oad(%)</th>\n",
" <th>弹筒发热量Qb,adMJ/kg</th>\n",
" <th>挥发分Vad(%)</th>\n",
" <th>固定炭Fcad(%)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2720110529</td>\n",
" <td>3.93</td>\n",
" <td>70.18</td>\n",
" <td>0.81</td>\n",
" <td>25.079</td>\n",
" <td>27.820</td>\n",
" <td>32.06</td>\n",
" <td>55.68</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2720096883</td>\n",
" <td>3.78</td>\n",
" <td>68.93</td>\n",
" <td>0.77</td>\n",
" <td>26.512</td>\n",
" <td>27.404</td>\n",
" <td>29.96</td>\n",
" <td>54.71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2720109084</td>\n",
" <td>3.48</td>\n",
" <td>69.60</td>\n",
" <td>0.76</td>\n",
" <td>26.148</td>\n",
" <td>27.578</td>\n",
" <td>29.31</td>\n",
" <td>55.99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2720084708</td>\n",
" <td>3.47</td>\n",
" <td>66.71</td>\n",
" <td>0.76</td>\n",
" <td>29.055</td>\n",
" <td>26.338</td>\n",
" <td>28.58</td>\n",
" <td>53.87</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2720062721</td>\n",
" <td>3.87</td>\n",
" <td>68.78</td>\n",
" <td>0.80</td>\n",
" <td>26.542</td>\n",
" <td>27.280</td>\n",
" <td>29.97</td>\n",
" <td>54.78</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 化验编号 氢Had(%) 碳Cad(%) 氮Nad(%) 氧Oad(%) 弹筒发热量Qb,adMJ/kg 挥发分Vad(%) \\\n",
"0 2720110529 3.93 70.18 0.81 25.079 27.820 32.06 \n",
"1 2720096883 3.78 68.93 0.77 26.512 27.404 29.96 \n",
"2 2720109084 3.48 69.60 0.76 26.148 27.578 29.31 \n",
"3 2720084708 3.47 66.71 0.76 29.055 26.338 28.58 \n",
"4 2720062721 3.87 68.78 0.80 26.542 27.280 29.97 \n",
"\n",
" 固定炭Fcad(%) \n",
"0 55.68 \n",
"1 54.71 \n",
"2 55.99 \n",
"3 53.87 \n",
"4 54.78 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_data = data_0102.copy()\n",
"train_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "54cd27a6-1a8a-47c0-93d9-c948960a7842",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "bba14f71-9d69-4c82-b6bc-b9b74c725b25",
"metadata": {},
"outputs": [],
"source": [
"train_data.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "e3a9ad55-0132-430f-ac57-c2e7f8e8590a",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "013c6a58-65f6-48e9-8d7f-b56c87de5b11",
"metadata": {},
"outputs": [],
"source": [
"params_xgb = {\"objective\": 'reg:squarederror',\n",
" \"subsample\": 1,\n",
" \"max_depth\": 15,\n",
" \"eta\": 0.3,\n",
" \"gamma\": 0,\n",
" \"lambda\": 1,\n",
" \"alpha\": 0,\n",
" \"colsample_bytree\": 0.9,}\n",
"num_boost_round = 1000"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "086f1901-8388-47e9-ae7c-1b2709bc1e22",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import KFold, train_test_split\n",
"kf = KFold(n_splits=10, shuffle=True, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "fb7b06af-84bc-483c-b086-7826d7befc9c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"MSE: 0.475, RMSE: 0.6892, MAE: 0.5507, MAPE: 1.86 %, R_2: 0.9046\n",
"MSE: 1.1415, RMSE: 1.0684, MAE: 0.9133, MAPE: 3.06 %, R_2: 0.6923\n",
"MSE: 0.7247, RMSE: 0.8513, MAE: 0.6606, MAPE: 2.32 %, R_2: 0.9247\n",
"MSE: 1.3652, RMSE: 1.1684, MAE: 0.9609, MAPE: 3.24 %, R_2: 0.6698\n",
"MSE: 0.4552, RMSE: 0.6747, MAE: 0.5732, MAPE: 1.94 %, R_2: 0.903\n",
"MSE: 0.6357, RMSE: 0.7973, MAE: 0.6374, MAPE: 2.2 %, R_2: 0.8771\n",
"MSE: 0.9972, RMSE: 0.9986, MAE: 0.752, MAPE: 2.47 %, R_2: 0.8141\n",
"MSE: 1.5218, RMSE: 1.2336, MAE: 1.0569, MAPE: 3.45 %, R_2: 0.2363\n",
"MSE: 0.6891, RMSE: 0.8301, MAE: 0.6825, MAPE: 2.22 %, R_2: 0.9005\n",
"MSE: 1.6864, RMSE: 1.2986, MAE: 1.0004, MAPE: 3.51 %, R_2: 0.6893\n"
]
},
{
"data": {
"text/plain": [
"MSE 0.969172\n",
"RMSE 0.961023\n",
"MAE 0.778783\n",
"MAPE 0.026288\n",
"R_2 0.761188\n",
"dtype: float64"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"eva_list = list()\n",
"for (train_index, test_index) in kf.split(train_data):\n",
" train = train_data.loc[train_index]\n",
" valid = train_data.loc[test_index]\n",
" X_train, Y_train = train[feature_cols], np.log1p(train['挥发分Vad(%)'])\n",
" X_valid, Y_valid = valid[feature_cols], np.log1p(valid['挥发分Vad(%)'])\n",
" dtrain = xgb.DMatrix(X_train, Y_train)\n",
" dvalid = xgb.DMatrix(X_valid, Y_valid)\n",
" watchlist = [(dvalid, 'eval')]\n",
" gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n",
" early_stopping_rounds=50, verbose_eval=False)\n",
" y_pred = np.expm1(gb_model.predict(xgb.DMatrix(X_valid)))\n",
" y_true = np.expm1(Y_valid.values)\n",
" MSE = mean_squared_error(y_true, y_pred)\n",
" RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n",
" MAE = mean_absolute_error(y_true, y_pred)\n",
" MAPE = mean_absolute_percentage_error(y_true, y_pred)\n",
" R_2 = r2_score(y_true, y_pred)\n",
" print('MSE:', round(MSE, 4), end=', ')\n",
" print('RMSE:', round(RMSE, 4), end=', ')\n",
" print('MAE:', round(MAE, 4), end=', ')\n",
" print('MAPE:', round(MAPE*100, 2), '%', end=', ')\n",
" print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差\n",
" eva_list.append([MSE, RMSE, MAE, MAPE, R_2])\n",
"data_df = pd.DataFrame.from_records(eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R_2'])\n",
"data_df.mean()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "90841cb7-4f28-4a33-93ac-93df69f1a5a1",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"MSE: 0.9821, RMSE: 0.991, MAE: 0.7698, MAPE: 1.44 %, R2: 0.9652\n",
"MSE: 1.2674, RMSE: 1.1258, MAE: 0.8756, MAPE: 1.64 %, R2: 0.9174\n",
"MSE: 0.9137, RMSE: 0.9559, MAE: 0.757, MAPE: 1.46 %, R2: 0.9864\n",
"MSE: 1.6012, RMSE: 1.2654, MAE: 1.0173, MAPE: 1.89 %, R2: 0.9292\n",
"MSE: 1.4694, RMSE: 1.2122, MAE: 0.8524, MAPE: 1.59 %, R2: 0.9142\n",
"MSE: 0.7552, RMSE: 0.869, MAE: 0.7202, MAPE: 1.39 %, R2: 0.9779\n",
"MSE: 0.5474, RMSE: 0.7398, MAE: 0.5467, MAPE: 1.0 %, R2: 0.9783\n",
"MSE: 1.2779, RMSE: 1.1305, MAE: 0.9452, MAPE: 1.73 %, R2: 0.853\n",
"MSE: 1.1908, RMSE: 1.0912, MAE: 0.9004, MAPE: 1.72 %, R2: 0.9597\n",
"MSE: 3.9312, RMSE: 1.9827, MAE: 1.2707, MAPE: 2.65 %, R2: 0.8775\n"
]
},
{
"data": {
"text/plain": [
"MSE 1.393623\n",
"RMSE 1.136351\n",
"MAE 0.865538\n",
"MAPE 0.016509\n",
"R2 0.935872\n",
"dtype: float64"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"eva_list = list()\n",
"for (train_index, test_index) in kf.split(train_data):\n",
" train = train_data.loc[train_index]\n",
" valid = train_data.loc[test_index]\n",
" X_train, Y_train = train[feature_cols], np.log1p(train['固定炭Fcad(%)'])\n",
" X_valid, Y_valid = valid[feature_cols], np.log1p(valid['固定炭Fcad(%)'])\n",
" dtrain = xgb.DMatrix(X_train, Y_train)\n",
" dvalid = xgb.DMatrix(X_valid, Y_valid)\n",
" watchlist = [(dvalid, 'eval')]\n",
" gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n",
" early_stopping_rounds=50, verbose_eval=False)\n",
" y_pred = np.expm1(gb_model.predict(xgb.DMatrix(X_valid)))\n",
" y_true = np.expm1(Y_valid.values)\n",
" MSE = mean_squared_error(y_true, y_pred)\n",
" RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n",
" MAE = mean_absolute_error(y_true, y_pred)\n",
" MAPE = mean_absolute_percentage_error(y_true, y_pred)\n",
" R_2 = r2_score(y_true, y_pred)\n",
" print('MSE:', round(MSE, 4), end=', ')\n",
" print('RMSE:', round(RMSE, 4), end=', ')\n",
" print('MAE:', round(MAE, 4), end=', ')\n",
" print('MAPE:', round(MAPE*100, 2), '%', end=', ')\n",
" print('R2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差\n",
" eva_list.append([MSE, RMSE, MAE, MAPE, R_2])\n",
"data_df = pd.DataFrame.from_records(eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R2'])\n",
"data_df.mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa67bc97-1258-44bb-9dae-14ace1661ff6",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "ec6e136b-ed49-4469-bb8f-b86c4910bc05",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}