{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "70ae2cb0-c6f0-4080-b894-2246c9d880e2", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 6, "id": "6a94278b-8f51-4edc-966b-4a32876a4536", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0_level_0弹筒发热量挥发分固定炭
化验编号HadCadNadOadQb,adVadFcad
Unnamed: 0_level_2(%)(%)(%)(%)MJ/kg(%)(%)
027201105293.9370.180.8125.07927.82032.0655.68
127200968833.7868.930.7726.51227.40429.9654.71
227201090843.4869.600.7626.14827.57829.3155.99
327200847083.4766.710.7629.05526.33828.5853.87
427200627213.8768.780.8026.54227.28029.9754.78
...........................
22327200304904.1268.850.9726.05527.86432.9451.89
22427200286333.9767.040.9428.04327.36831.8851.38
22527200286344.1268.420.9626.49327.88633.1652.00
22627200176833.8867.420.9427.76026.61631.6550.56
22727200176783.8166.740.9228.53026.68831.0250.82
\n", "

228 rows × 8 columns

\n", "
" ], "text/plain": [ " Unnamed: 0_level_0 氢 碳 氮 氧 弹筒发热量 挥发分 固定炭\n", " 化验编号 Had Cad Nad Oad Qb,ad Vad Fcad\n", " Unnamed: 0_level_2 (%) (%) (%) (%) MJ/kg (%) (%)\n", "0 2720110529 3.93 70.18 0.81 25.079 27.820 32.06 55.68\n", "1 2720096883 3.78 68.93 0.77 26.512 27.404 29.96 54.71\n", "2 2720109084 3.48 69.60 0.76 26.148 27.578 29.31 55.99\n", "3 2720084708 3.47 66.71 0.76 29.055 26.338 28.58 53.87\n", "4 2720062721 3.87 68.78 0.80 26.542 27.280 29.97 54.78\n", ".. ... ... ... ... ... ... ... ...\n", "223 2720030490 4.12 68.85 0.97 26.055 27.864 32.94 51.89\n", "224 2720028633 3.97 67.04 0.94 28.043 27.368 31.88 51.38\n", "225 2720028634 4.12 68.42 0.96 26.493 27.886 33.16 52.00\n", "226 2720017683 3.88 67.42 0.94 27.760 26.616 31.65 50.56\n", "227 2720017678 3.81 66.74 0.92 28.530 26.688 31.02 50.82\n", "\n", "[228 rows x 8 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_0102 = pd.read_excel('./data/20240102/20240102.xlsx', header=[0,1,2])\n", "data_0102" ] }, { "cell_type": "code", "execution_count": 10, "id": "f72789a6-f3fa-4ab1-8b62-999413958608", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['化验编号',\n", " '氢Had(%)',\n", " '碳Cad(%)',\n", " '氮Nad(%)',\n", " '氧Oad(%)',\n", " '弹筒发热量Qb,adMJ/kg',\n", " '挥发分Vad(%)',\n", " '固定炭Fcad(%)']" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cols = [''.join([y for y in x if 'Unnamed' not in y]) for x in data_0102.columns]\n", "cols" ] }, { "cell_type": "code", "execution_count": 11, "id": "6ffb1989-3f45-4d1c-84c9-59b1045b7d9e", "metadata": {}, "outputs": [], "source": [ "data_0102.columns = cols" ] }, { "cell_type": "code", "execution_count": 27, "id": "9c708cc0-9f1b-4669-a350-6d24cb720794", "metadata": {}, "outputs": [], "source": [ "import xgboost as xgb" ] }, { "cell_type": "code", "execution_count": 16, "id": "103349e1-aa4a-427a-a489-9ab28787088b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['氢Had(%)', '碳Cad(%)', '氮Nad(%)', '氧Oad(%)', '弹筒发热量Qb,adMJ/kg']" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "feature_cols = cols[1:6]\n", "feature_cols" ] }, { "cell_type": "code", "execution_count": 44, "id": "839e45dc-e9c8-4956-950b-035687469c81", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
化验编号氢Had(%)碳Cad(%)氮Nad(%)氧Oad(%)弹筒发热量Qb,adMJ/kg挥发分Vad(%)固定炭Fcad(%)
027201105293.9370.180.8125.07927.82032.0655.68
127200968833.7868.930.7726.51227.40429.9654.71
227201090843.4869.600.7626.14827.57829.3155.99
327200847083.4766.710.7629.05526.33828.5853.87
427200627213.8768.780.8026.54227.28029.9754.78
\n", "
" ], "text/plain": [ " 化验编号 氢Had(%) 碳Cad(%) 氮Nad(%) 氧Oad(%) 弹筒发热量Qb,adMJ/kg 挥发分Vad(%) \\\n", "0 2720110529 3.93 70.18 0.81 25.079 27.820 32.06 \n", "1 2720096883 3.78 68.93 0.77 26.512 27.404 29.96 \n", "2 2720109084 3.48 69.60 0.76 26.148 27.578 29.31 \n", "3 2720084708 3.47 66.71 0.76 29.055 26.338 28.58 \n", "4 2720062721 3.87 68.78 0.80 26.542 27.280 29.97 \n", "\n", " 固定炭Fcad(%) \n", "0 55.68 \n", "1 54.71 \n", "2 55.99 \n", "3 53.87 \n", "4 54.78 " ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_data = data_0102.copy()\n", "train_data.head()" ] }, { "cell_type": "code", "execution_count": 19, "id": "24233d12-9468-49b8-a371-0c6c508c387e", "metadata": {}, "outputs": [], "source": [ "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 21, "id": "54cd27a6-1a8a-47c0-93d9-c948960a7842", "metadata": {}, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 23, "id": "bba14f71-9d69-4c82-b6bc-b9b74c725b25", "metadata": {}, "outputs": [], "source": [ "train_data.reset_index(drop=True, inplace=True)" ] }, { "cell_type": "code", "execution_count": 24, "id": "e3a9ad55-0132-430f-ac57-c2e7f8e8590a", "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score" ] }, { "cell_type": "code", "execution_count": 40, "id": "013c6a58-65f6-48e9-8d7f-b56c87de5b11", "metadata": {}, "outputs": [], "source": [ "param_xgb = {\"silent\": True,\n", " \"obj\": 'reg:linear',\n", " \"subsample\": 1,\n", " \"max_depth\": 15,\n", " \"eta\": 0.3,\n", " \"gamma\": 0,\n", " \"lambda\": 1,\n", " \"alpha\": 0,\n", " \"colsample_bytree\": 0.9,}\n", "num_round = 1000" ] }, { "cell_type": "code", "execution_count": 41, "id": "086f1901-8388-47e9-ae7c-1b2709bc1e22", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import KFold, train_test_split\n", "kf = KFold(n_splits=10, shuffle=True, random_state=42)" ] }, { "cell_type": "code", "execution_count": 43, "id": "fb7b06af-84bc-483c-b086-7826d7befc9c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MSE: 1.9436, RMSE: 1.3941, MAE: 1.1861, MAPE: 3.94 %, R_2: 0.6095\n", "MSE: 1.8735, RMSE: 1.3688, MAE: 1.132, MAPE: 3.77 %, R_2: 0.495\n", "MSE: 2.0587, RMSE: 1.4348, MAE: 1.0706, MAPE: 4.08 %, R_2: 0.7862\n", "MSE: 1.9298, RMSE: 1.3892, MAE: 1.1469, MAPE: 3.84 %, R_2: 0.5332\n", "MSE: 1.4583, RMSE: 1.2076, MAE: 1.097, MAPE: 3.67 %, R_2: 0.6894\n", "MSE: 2.0822, RMSE: 1.443, MAE: 1.1645, MAPE: 3.88 %, R_2: 0.5975\n", "MSE: 1.3521, RMSE: 1.1628, MAE: 0.9905, MAPE: 3.37 %, R_2: 0.7479\n", "MSE: 1.4057, RMSE: 1.1856, MAE: 0.9998, MAPE: 3.3 %, R_2: 0.2946\n", "MSE: 2.2274, RMSE: 1.4925, MAE: 1.2638, MAPE: 4.19 %, R_2: 0.6785\n", "MSE: 1.4866, RMSE: 1.2193, MAE: 1.0797, MAPE: 3.67 %, R_2: 0.7261\n" ] }, { "data": { "text/plain": [ "MSE 1.781792\n", "RMSE 1.329760\n", "MAE 1.113084\n", "MAPE 0.037719\n", "R_2 0.615796\n", "dtype: float64" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "eva_list = list()\n", "for (train_index, test_index) in kf.split(train_data):\n", " train = train_data.loc[train_index]\n", " valid = train_data.loc[test_index]\n", " X_train, Y_train = train[feature_cols], np.log1p(train['挥发分Vad(%)'])\n", " X_valid, Y_valid = valid[feature_cols], np.log1p(valid['挥发分Vad(%)'])\n", " dtrain = xgb.DMatrix(X_train, Y_train)\n", " dvalid = xgb.DMatrix(X_valid, Y_valid)\n", " watchlist = [(dvalid, 'eval')]\n", " gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", " early_stopping_rounds=50, verbose_eval=False)\n", " y_pred = np.expm1(gb_model.predict(xgb.DMatrix(X_valid)))\n", " y_true = np.expm1(Y_valid.values)\n", " MSE = mean_squared_error(y_true, y_pred)\n", " RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n", " MAE = mean_absolute_error(y_true, y_pred)\n", " MAPE = mean_absolute_percentage_error(y_true, y_pred)\n", " R_2 = r2_score(y_true, y_pred)\n", " print('MSE:', round(MSE, 4), end=', ')\n", " print('RMSE:', round(RMSE, 4), end=', ')\n", " print('MAE:', round(MAE, 4), end=', ')\n", " print('MAPE:', round(MAPE*100, 2), '%', end=', ')\n", " print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差\n", " eva_list.append([MSE, RMSE, MAE, MAPE, R_2])\n", "data_df = pd.DataFrame.from_records(eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R_2'])\n", "data_df.mean()" ] }, { "cell_type": "code", "execution_count": 48, "id": "90841cb7-4f28-4a33-93ac-93df69f1a5a1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MSE: 4.6724, RMSE: 2.1616, MAE: 1.7297, MAPE: 3.42 %, R2: 0.8346\n", "MSE: 3.0512, RMSE: 1.7468, MAE: 1.4485, MAPE: 2.62 %, R2: 0.8011\n", "MSE: 7.6672, RMSE: 2.769, MAE: 1.951, MAPE: 4.56 %, R2: 0.8856\n", "MSE: 4.0334, RMSE: 2.0083, MAE: 1.487, MAPE: 2.77 %, R2: 0.8216\n", "MSE: 2.6382, RMSE: 1.6243, MAE: 1.1551, MAPE: 2.12 %, R2: 0.846\n", "MSE: 5.8097, RMSE: 2.4103, MAE: 1.8683, MAPE: 3.8 %, R2: 0.83\n", "MSE: 2.3446, RMSE: 1.5312, MAE: 1.1294, MAPE: 2.28 %, R2: 0.9069\n", "MSE: 3.0069, RMSE: 1.734, MAE: 1.3782, MAPE: 2.46 %, R2: 0.6541\n", "MSE: 4.1652, RMSE: 2.0409, MAE: 1.5685, MAPE: 3.2 %, R2: 0.859\n", "MSE: 4.2023, RMSE: 2.05, MAE: 1.6284, MAPE: 3.2 %, R2: 0.869\n" ] }, { "data": { "text/plain": [ "MSE 4.159107\n", "RMSE 2.007631\n", "MAE 1.534427\n", "MAPE 0.030424\n", "R2 0.830794\n", "dtype: float64" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "eva_list = list()\n", "for (train_index, test_index) in kf.split(train_data):\n", " train = train_data.loc[train_index]\n", " valid = train_data.loc[test_index]\n", " X_train, Y_train = train[feature_cols], np.log1p(train['固定炭Fcad(%)'])\n", " X_valid, Y_valid = valid[feature_cols], np.log1p(valid['固定炭Fcad(%)'])\n", " dtrain = xgb.DMatrix(X_train, Y_train)\n", " dvalid = xgb.DMatrix(X_valid, Y_valid)\n", " watchlist = [(dvalid, 'eval')]\n", " gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", " early_stopping_rounds=50, verbose_eval=False)\n", " y_pred = np.expm1(gb_model.predict(xgb.DMatrix(X_valid)))\n", " y_true = np.expm1(Y_valid.values)\n", " MSE = mean_squared_error(y_true, y_pred)\n", " RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n", " MAE = mean_absolute_error(y_true, y_pred)\n", " MAPE = mean_absolute_percentage_error(y_true, y_pred)\n", " R_2 = r2_score(y_true, y_pred)\n", " print('MSE:', round(MSE, 4), end=', ')\n", " print('RMSE:', round(RMSE, 4), end=', ')\n", " print('MAE:', round(MAE, 4), end=', ')\n", " print('MAPE:', round(MAPE*100, 2), '%', end=', ')\n", " print('R2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差\n", " eva_list.append([MSE, RMSE, MAE, MAPE, R_2])\n", "data_df = pd.DataFrame.from_records(eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R2'])\n", "data_df.mean()" ] }, { "cell_type": "code", "execution_count": 67, "id": "aa67bc97-1258-44bb-9dae-14ace1661ff6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MSERMSEMAEMAPER2
十折交叉验证均值4.1591072.0076311.5344270.0304240.830794
\n", "
" ], "text/plain": [ " MSE RMSE MAE MAPE R2\n", "十折交叉验证均值 4.159107 2.007631 1.534427 0.030424 0.830794" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "ec6e136b-ed49-4469-bb8f-b86c4910bc05", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.16" } }, "nbformat": 4, "nbformat_minor": 5 }