{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "70ae2cb0-c6f0-4080-b894-2246c9d880e2", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "id": "6a94278b-8f51-4edc-966b-4a32876a4536", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0_level_0弹筒发热量挥发分固定炭
化验编号HadCadNadOadQb,adVadFcad
Unnamed: 0_level_2(%)(%)(%)(%)MJ/kg(%)(%)
027201105293.9370.180.8125.07927.82032.0655.68
127200968833.7868.930.7726.51227.40429.9654.71
227201090843.4869.600.7626.14827.57829.3155.99
327200847083.4766.710.7629.05526.33828.5853.87
427200627213.8768.780.8026.54227.28029.9754.78
...........................
22327200304904.1268.850.9726.05527.86432.9451.89
22427200286333.9767.040.9428.04327.36831.8851.38
22527200286344.1268.420.9626.49327.88633.1652.00
22627200176833.8867.420.9427.76026.61631.6550.56
22727200176783.8166.740.9228.53026.68831.0250.82
\n", "

228 rows × 8 columns

\n", "
" ], "text/plain": [ " Unnamed: 0_level_0 氢 碳 氮 氧 弹筒发热量 挥发分 固定炭\n", " 化验编号 Had Cad Nad Oad Qb,ad Vad Fcad\n", " Unnamed: 0_level_2 (%) (%) (%) (%) MJ/kg (%) (%)\n", "0 2720110529 3.93 70.18 0.81 25.079 27.820 32.06 55.68\n", "1 2720096883 3.78 68.93 0.77 26.512 27.404 29.96 54.71\n", "2 2720109084 3.48 69.60 0.76 26.148 27.578 29.31 55.99\n", "3 2720084708 3.47 66.71 0.76 29.055 26.338 28.58 53.87\n", "4 2720062721 3.87 68.78 0.80 26.542 27.280 29.97 54.78\n", ".. ... ... ... ... ... ... ... ...\n", "223 2720030490 4.12 68.85 0.97 26.055 27.864 32.94 51.89\n", "224 2720028633 3.97 67.04 0.94 28.043 27.368 31.88 51.38\n", "225 2720028634 4.12 68.42 0.96 26.493 27.886 33.16 52.00\n", "226 2720017683 3.88 67.42 0.94 27.760 26.616 31.65 50.56\n", "227 2720017678 3.81 66.74 0.92 28.530 26.688 31.02 50.82\n", "\n", "[228 rows x 8 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_0102 = pd.read_excel('./data/20240102/20240102.xlsx', header=[0,1,2])\n", "data_0102" ] }, { "cell_type": "code", "execution_count": 3, "id": "f72789a6-f3fa-4ab1-8b62-999413958608", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['化验编号',\n", " '氢Had(%)',\n", " '碳Cad(%)',\n", " '氮Nad(%)',\n", " '氧Oad(%)',\n", " '弹筒发热量Qb,adMJ/kg',\n", " '挥发分Vad(%)',\n", " '固定炭Fcad(%)']" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cols = [''.join([y for y in x if 'Unnamed' not in y]) for x in data_0102.columns]\n", "cols" ] }, { "cell_type": "code", "execution_count": 4, "id": "6ffb1989-3f45-4d1c-84c9-59b1045b7d9e", "metadata": {}, "outputs": [], "source": [ "data_0102.columns = cols" ] }, { "cell_type": "code", "execution_count": 5, "id": "9c708cc0-9f1b-4669-a350-6d24cb720794", "metadata": {}, "outputs": [], "source": [ "import xgboost as xgb" ] }, { "cell_type": "code", "execution_count": 6, "id": "103349e1-aa4a-427a-a489-9ab28787088b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['氢Had(%)', '碳Cad(%)', '氮Nad(%)', '氧Oad(%)', '弹筒发热量Qb,adMJ/kg']" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "feature_cols = cols[1:6]\n", "feature_cols" ] }, { "cell_type": "code", "execution_count": 7, "id": "839e45dc-e9c8-4956-950b-035687469c81", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
化验编号氢Had(%)碳Cad(%)氮Nad(%)氧Oad(%)弹筒发热量Qb,adMJ/kg挥发分Vad(%)固定炭Fcad(%)
027201105293.9370.180.8125.07927.82032.0655.68
127200968833.7868.930.7726.51227.40429.9654.71
227201090843.4869.600.7626.14827.57829.3155.99
327200847083.4766.710.7629.05526.33828.5853.87
427200627213.8768.780.8026.54227.28029.9754.78
\n", "
" ], "text/plain": [ " 化验编号 氢Had(%) 碳Cad(%) 氮Nad(%) 氧Oad(%) 弹筒发热量Qb,adMJ/kg 挥发分Vad(%) \\\n", "0 2720110529 3.93 70.18 0.81 25.079 27.820 32.06 \n", "1 2720096883 3.78 68.93 0.77 26.512 27.404 29.96 \n", "2 2720109084 3.48 69.60 0.76 26.148 27.578 29.31 \n", "3 2720084708 3.47 66.71 0.76 29.055 26.338 28.58 \n", "4 2720062721 3.87 68.78 0.80 26.542 27.280 29.97 \n", "\n", " 固定炭Fcad(%) \n", "0 55.68 \n", "1 54.71 \n", "2 55.99 \n", "3 53.87 \n", "4 54.78 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_data = data_0102.copy()\n", "train_data.head()" ] }, { "cell_type": "code", "execution_count": 8, "id": "54cd27a6-1a8a-47c0-93d9-c948960a7842", "metadata": {}, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 9, "id": "bba14f71-9d69-4c82-b6bc-b9b74c725b25", "metadata": {}, "outputs": [], "source": [ "train_data.reset_index(drop=True, inplace=True)" ] }, { "cell_type": "code", "execution_count": 10, "id": "e3a9ad55-0132-430f-ac57-c2e7f8e8590a", "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score" ] }, { "cell_type": "code", "execution_count": 25, "id": "013c6a58-65f6-48e9-8d7f-b56c87de5b11", "metadata": {}, "outputs": [], "source": [ "params_xgb = {\"objective\": 'reg:squarederror',\n", " \"subsample\": 1,\n", " \"max_depth\": 15,\n", " \"eta\": 0.3,\n", " \"gamma\": 0,\n", " \"lambda\": 1,\n", " \"alpha\": 0,\n", " \"colsample_bytree\": 0.9,}\n", "num_boost_round = 1000" ] }, { "cell_type": "code", "execution_count": 26, "id": "086f1901-8388-47e9-ae7c-1b2709bc1e22", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import KFold, train_test_split\n", "kf = KFold(n_splits=10, shuffle=True, random_state=42)" ] }, { "cell_type": "code", "execution_count": 27, "id": "fb7b06af-84bc-483c-b086-7826d7befc9c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MSE: 0.475, RMSE: 0.6892, MAE: 0.5507, MAPE: 1.86 %, R_2: 0.9046\n", "MSE: 1.1415, RMSE: 1.0684, MAE: 0.9133, MAPE: 3.06 %, R_2: 0.6923\n", "MSE: 0.7247, RMSE: 0.8513, MAE: 0.6606, MAPE: 2.32 %, R_2: 0.9247\n", "MSE: 1.3652, RMSE: 1.1684, MAE: 0.9609, MAPE: 3.24 %, R_2: 0.6698\n", "MSE: 0.4552, RMSE: 0.6747, MAE: 0.5732, MAPE: 1.94 %, R_2: 0.903\n", "MSE: 0.6357, RMSE: 0.7973, MAE: 0.6374, MAPE: 2.2 %, R_2: 0.8771\n", "MSE: 0.9972, RMSE: 0.9986, MAE: 0.752, MAPE: 2.47 %, R_2: 0.8141\n", "MSE: 1.5218, RMSE: 1.2336, MAE: 1.0569, MAPE: 3.45 %, R_2: 0.2363\n", "MSE: 0.6891, RMSE: 0.8301, MAE: 0.6825, MAPE: 2.22 %, R_2: 0.9005\n", "MSE: 1.6864, RMSE: 1.2986, MAE: 1.0004, MAPE: 3.51 %, R_2: 0.6893\n" ] }, { "data": { "text/plain": [ "MSE 0.969172\n", "RMSE 0.961023\n", "MAE 0.778783\n", "MAPE 0.026288\n", "R_2 0.761188\n", "dtype: float64" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "eva_list = list()\n", "for (train_index, test_index) in kf.split(train_data):\n", " train = train_data.loc[train_index]\n", " valid = train_data.loc[test_index]\n", " X_train, Y_train = train[feature_cols], np.log1p(train['挥发分Vad(%)'])\n", " X_valid, Y_valid = valid[feature_cols], np.log1p(valid['挥发分Vad(%)'])\n", " dtrain = xgb.DMatrix(X_train, Y_train)\n", " dvalid = xgb.DMatrix(X_valid, Y_valid)\n", " watchlist = [(dvalid, 'eval')]\n", " gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", " early_stopping_rounds=50, verbose_eval=False)\n", " y_pred = np.expm1(gb_model.predict(xgb.DMatrix(X_valid)))\n", " y_true = np.expm1(Y_valid.values)\n", " MSE = mean_squared_error(y_true, y_pred)\n", " RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n", " MAE = mean_absolute_error(y_true, y_pred)\n", " MAPE = mean_absolute_percentage_error(y_true, y_pred)\n", " R_2 = r2_score(y_true, y_pred)\n", " print('MSE:', round(MSE, 4), end=', ')\n", " print('RMSE:', round(RMSE, 4), end=', ')\n", " print('MAE:', round(MAE, 4), end=', ')\n", " print('MAPE:', round(MAPE*100, 2), '%', end=', ')\n", " print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差\n", " eva_list.append([MSE, RMSE, MAE, MAPE, R_2])\n", "data_df = pd.DataFrame.from_records(eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R_2'])\n", "data_df.mean()" ] }, { "cell_type": "code", "execution_count": 28, "id": "90841cb7-4f28-4a33-93ac-93df69f1a5a1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MSE: 0.9821, RMSE: 0.991, MAE: 0.7698, MAPE: 1.44 %, R2: 0.9652\n", "MSE: 1.2674, RMSE: 1.1258, MAE: 0.8756, MAPE: 1.64 %, R2: 0.9174\n", "MSE: 0.9137, RMSE: 0.9559, MAE: 0.757, MAPE: 1.46 %, R2: 0.9864\n", "MSE: 1.6012, RMSE: 1.2654, MAE: 1.0173, MAPE: 1.89 %, R2: 0.9292\n", "MSE: 1.4694, RMSE: 1.2122, MAE: 0.8524, MAPE: 1.59 %, R2: 0.9142\n", "MSE: 0.7552, RMSE: 0.869, MAE: 0.7202, MAPE: 1.39 %, R2: 0.9779\n", "MSE: 0.5474, RMSE: 0.7398, MAE: 0.5467, MAPE: 1.0 %, R2: 0.9783\n", "MSE: 1.2779, RMSE: 1.1305, MAE: 0.9452, MAPE: 1.73 %, R2: 0.853\n", "MSE: 1.1908, RMSE: 1.0912, MAE: 0.9004, MAPE: 1.72 %, R2: 0.9597\n", "MSE: 3.9312, RMSE: 1.9827, MAE: 1.2707, MAPE: 2.65 %, R2: 0.8775\n" ] }, { "data": { "text/plain": [ "MSE 1.393623\n", "RMSE 1.136351\n", "MAE 0.865538\n", "MAPE 0.016509\n", "R2 0.935872\n", "dtype: float64" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "eva_list = list()\n", "for (train_index, test_index) in kf.split(train_data):\n", " train = train_data.loc[train_index]\n", " valid = train_data.loc[test_index]\n", " X_train, Y_train = train[feature_cols], np.log1p(train['固定炭Fcad(%)'])\n", " X_valid, Y_valid = valid[feature_cols], np.log1p(valid['固定炭Fcad(%)'])\n", " dtrain = xgb.DMatrix(X_train, Y_train)\n", " dvalid = xgb.DMatrix(X_valid, Y_valid)\n", " watchlist = [(dvalid, 'eval')]\n", " gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", " early_stopping_rounds=50, verbose_eval=False)\n", " y_pred = np.expm1(gb_model.predict(xgb.DMatrix(X_valid)))\n", " y_true = np.expm1(Y_valid.values)\n", " MSE = mean_squared_error(y_true, y_pred)\n", " RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n", " MAE = mean_absolute_error(y_true, y_pred)\n", " MAPE = mean_absolute_percentage_error(y_true, y_pred)\n", " R_2 = r2_score(y_true, y_pred)\n", " print('MSE:', round(MSE, 4), end=', ')\n", " print('RMSE:', round(RMSE, 4), end=', ')\n", " print('MAE:', round(MAE, 4), end=', ')\n", " print('MAPE:', round(MAPE*100, 2), '%', end=', ')\n", " print('R2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差\n", " eva_list.append([MSE, RMSE, MAE, MAPE, R_2])\n", "data_df = pd.DataFrame.from_records(eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R2'])\n", "data_df.mean()" ] }, { "cell_type": "code", "execution_count": null, "id": "aa67bc97-1258-44bb-9dae-14ace1661ff6", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "ec6e136b-ed49-4469-bb8f-b86c4910bc05", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.16" } }, "nbformat": 4, "nbformat_minor": 5 }