{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "6b84fefd-5936-4da4-ab6b-5b944329ad1d", "metadata": {}, "outputs": [], "source": [ "import os\n", "os.environ['CUDA_DEVICE_ORDER'] = 'PCB_BUS_ID'\n", "os.environ['CUDA_VISIBLE_DEVICES'] = '0, 1'" ] }, { "cell_type": "code", "execution_count": 2, "id": "9cf130e3-62ef-46e0-bbdc-b13d9d29318d", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "import matplotlib.pyplot as plt\n", "#新增加的两行\n", "from pylab import mpl\n", "# 设置显示中文字体\n", "mpl.rcParams[\"font.sans-serif\"] = [\"SimHei\"]\n", "\n", "mpl.rcParams[\"axes.unicode_minus\"] = False" ] }, { "cell_type": "code", "execution_count": 3, "id": "752381a5-0aeb-4c54-bc48-f9c3f8fc5d17", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0_level_0弹筒发热量挥发分固定炭
化验编号HadCadNadOadQb,adVadFcad
Unnamed: 0_level_2(%)(%)(%)(%)MJ/kg(%)(%)
027201105293.9370.180.8125.07927.82032.0655.68
127200968833.7868.930.7726.51227.40429.9654.71
227201090843.4869.600.7626.14827.57829.3155.99
327200847083.4766.710.7629.05526.33828.5853.87
427200627213.8768.780.8026.54227.28029.9754.78
...........................
22327200304904.1268.850.9726.05527.86432.9451.89
22427200286333.9767.040.9428.04327.36831.8851.38
22527200286344.1268.420.9626.49327.88633.1652.00
22627200176833.8867.420.9427.76026.61631.6550.56
22727200176783.8166.740.9228.53026.68831.0250.82
\n", "

228 rows × 8 columns

\n", "
" ], "text/plain": [ " Unnamed: 0_level_0 氢 碳 氮 氧 弹筒发热量 挥发分 固定炭\n", " 化验编号 Had Cad Nad Oad Qb,ad Vad Fcad\n", " Unnamed: 0_level_2 (%) (%) (%) (%) MJ/kg (%) (%)\n", "0 2720110529 3.93 70.18 0.81 25.079 27.820 32.06 55.68\n", "1 2720096883 3.78 68.93 0.77 26.512 27.404 29.96 54.71\n", "2 2720109084 3.48 69.60 0.76 26.148 27.578 29.31 55.99\n", "3 2720084708 3.47 66.71 0.76 29.055 26.338 28.58 53.87\n", "4 2720062721 3.87 68.78 0.80 26.542 27.280 29.97 54.78\n", ".. ... ... ... ... ... ... ... ...\n", "223 2720030490 4.12 68.85 0.97 26.055 27.864 32.94 51.89\n", "224 2720028633 3.97 67.04 0.94 28.043 27.368 31.88 51.38\n", "225 2720028634 4.12 68.42 0.96 26.493 27.886 33.16 52.00\n", "226 2720017683 3.88 67.42 0.94 27.760 26.616 31.65 50.56\n", "227 2720017678 3.81 66.74 0.92 28.530 26.688 31.02 50.82\n", "\n", "[228 rows x 8 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_0102 = pd.read_excel('./data/20240102/20240102.xlsx', header=[0,1,2])\n", "data_0102" ] }, { "cell_type": "code", "execution_count": 4, "id": "972f1e9c-3ebc-45cf-8d1f-7611645e5238", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['化验编号',\n", " '氢Had(%)',\n", " '碳Cad(%)',\n", " '氮Nad(%)',\n", " '氧Oad(%)',\n", " '弹筒发热量Qb,adMJ/kg',\n", " '挥发分Vad(%)',\n", " '固定炭Fcad(%)']" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cols = [''.join([y for y in x if 'Unnamed' not in y]) for x in data_0102.columns]\n", "cols" ] }, { "cell_type": "code", "execution_count": 5, "id": "c95f1106-b3a4-43c6-88ec-3cdebf91d79a", "metadata": {}, "outputs": [], "source": [ "data_0102.columns = cols" ] }, { "cell_type": "code", "execution_count": 6, "id": "2e96af0a-feda-4a1f-a13e-9c8861c6f4d4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
化验编号氢Had(%)碳Cad(%)氮Nad(%)氧Oad(%)弹筒发热量Qb,adMJ/kg挥发分Vad(%)固定炭Fcad(%)
027201105293.9370.180.8125.07927.82032.0655.68
127200968833.7868.930.7726.51227.40429.9654.71
227201090843.4869.600.7626.14827.57829.3155.99
327200847083.4766.710.7629.05526.33828.5853.87
427200627213.8768.780.8026.54227.28029.9754.78
...........................
22327200304904.1268.850.9726.05527.86432.9451.89
22427200286333.9767.040.9428.04327.36831.8851.38
22527200286344.1268.420.9626.49327.88633.1652.00
22627200176833.8867.420.9427.76026.61631.6550.56
22727200176783.8166.740.9228.53026.68831.0250.82
\n", "

228 rows × 8 columns

\n", "
" ], "text/plain": [ " 化验编号 氢Had(%) 碳Cad(%) 氮Nad(%) 氧Oad(%) 弹筒发热量Qb,adMJ/kg \\\n", "0 2720110529 3.93 70.18 0.81 25.079 27.820 \n", "1 2720096883 3.78 68.93 0.77 26.512 27.404 \n", "2 2720109084 3.48 69.60 0.76 26.148 27.578 \n", "3 2720084708 3.47 66.71 0.76 29.055 26.338 \n", "4 2720062721 3.87 68.78 0.80 26.542 27.280 \n", ".. ... ... ... ... ... ... \n", "223 2720030490 4.12 68.85 0.97 26.055 27.864 \n", "224 2720028633 3.97 67.04 0.94 28.043 27.368 \n", "225 2720028634 4.12 68.42 0.96 26.493 27.886 \n", "226 2720017683 3.88 67.42 0.94 27.760 26.616 \n", "227 2720017678 3.81 66.74 0.92 28.530 26.688 \n", "\n", " 挥发分Vad(%) 固定炭Fcad(%) \n", "0 32.06 55.68 \n", "1 29.96 54.71 \n", "2 29.31 55.99 \n", "3 28.58 53.87 \n", "4 29.97 54.78 \n", ".. ... ... \n", "223 32.94 51.89 \n", "224 31.88 51.38 \n", "225 33.16 52.00 \n", "226 31.65 50.56 \n", "227 31.02 50.82 \n", "\n", "[228 rows x 8 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_0102" ] }, { "cell_type": "code", "execution_count": 7, "id": "04b177a7-2f02-4e23-8ea9-29f34cf3eafc", "metadata": {}, "outputs": [], "source": [ "out_cols = ['挥发分Vad(%)']\n", "# out_cols = ['固定炭Fcad(%)']" ] }, { "cell_type": "code", "execution_count": 8, "id": "31169fbf-d78e-42f7-87f3-71ba3dd0979d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['挥发分Vad(%)']" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "out_cols" ] }, { "cell_type": "code", "execution_count": 9, "id": "feaedd50-f999-45bf-b465-3d359b0c0110", "metadata": {}, "outputs": [], "source": [ "data = data_0102.copy()" ] }, { "cell_type": "code", "execution_count": 10, "id": "a40bee0f-011a-4edb-80f8-4e2f40e755fd", "metadata": {}, "outputs": [], "source": [ "train_data = data.dropna(subset=out_cols).fillna(0)" ] }, { "cell_type": "code", "execution_count": 11, "id": "535d37b6-b9de-4025-ac8f-62f5bdbe2451", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-01-05 17:02:16.953831: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n" ] } ], "source": [ "import tensorflow as tf\n", "from tensorflow import keras\n", "from tensorflow.keras import layers\n", "import tensorflow.keras.backend as K" ] }, { "cell_type": "code", "execution_count": 12, "id": "1c85d462-f248-4ffb-908f-eb4b20eab179", "metadata": {}, "outputs": [], "source": [ "class TransformerBlock(layers.Layer):\n", " def __init__(self, embed_dim, num_heads, ff_dim, name, rate=0.1):\n", " super().__init__()\n", " self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim, name=name)\n", " self.ffn = keras.Sequential(\n", " [layers.Dense(ff_dim, activation=\"relu\"), layers.Dense(embed_dim),]\n", " )\n", " self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)\n", " self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)\n", " self.dropout1 = layers.Dropout(rate)\n", " self.dropout2 = layers.Dropout(rate)\n", "\n", " def call(self, inputs, training):\n", " attn_output = self.att(inputs, inputs)\n", " attn_output = self.dropout1(attn_output, training=training)\n", " out1 = self.layernorm1(inputs + attn_output)\n", " ffn_output = self.ffn(out1)\n", " ffn_output = self.dropout2(ffn_output, training=training)\n", " return self.layernorm2(out1 + ffn_output)" ] }, { "cell_type": "code", "execution_count": 13, "id": "790284a3-b9d3-4144-b481-38a7c3ecb4b9", "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras import Model" ] }, { "cell_type": "code", "execution_count": 14, "id": "cd9a1ca1-d0ca-4cb5-9ef5-fd5d63576cd2", "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras.initializers import Constant" ] }, { "cell_type": "code", "execution_count": 15, "id": "9bc02f29-0fb7-420d-99a8-435eadc06e29", "metadata": {}, "outputs": [], "source": [ "# Custom loss layer\n", "class CustomMultiLossLayer(layers.Layer):\n", " def __init__(self, nb_outputs=2, **kwargs):\n", " self.nb_outputs = nb_outputs\n", " self.is_placeholder = True\n", " super(CustomMultiLossLayer, self).__init__(**kwargs)\n", " \n", " def build(self, input_shape=None):\n", " # initialise log_vars\n", " self.log_vars = []\n", " for i in range(self.nb_outputs):\n", " self.log_vars += [self.add_weight(name='log_var' + str(i), shape=(1,),\n", " initializer=tf.initializers.he_normal(), trainable=True)]\n", " super(CustomMultiLossLayer, self).build(input_shape)\n", "\n", " def multi_loss(self, ys_true, ys_pred):\n", " assert len(ys_true) == self.nb_outputs and len(ys_pred) == self.nb_outputs\n", " loss = 0\n", " for y_true, y_pred, log_var in zip(ys_true, ys_pred, self.log_vars):\n", " mse = (y_true - y_pred) ** 2.\n", " pre = K.exp(-log_var[0])\n", " loss += tf.abs(tf.reduce_logsumexp(pre * mse + log_var[0], axis=-1))\n", " return K.mean(loss)\n", "\n", " def call(self, inputs):\n", " ys_true = inputs[:self.nb_outputs]\n", " ys_pred = inputs[self.nb_outputs:]\n", " loss = self.multi_loss(ys_true, ys_pred)\n", " self.add_loss(loss, inputs=inputs)\n", " # We won't actually use the output.\n", " return K.concatenate(inputs, -1)" ] }, { "cell_type": "code", "execution_count": 16, "id": "a190207e-5a59-4813-9660-758760cf1b73", "metadata": {}, "outputs": [], "source": [ "num_heads, ff_dim = 3, 16" ] }, { "cell_type": "code", "execution_count": 50, "id": "80f32155-e71f-4615-8d0c-01dfd04988fe", "metadata": {}, "outputs": [], "source": [ "def get_prediction_model():\n", " inputs = layers.Input(shape=(1,len(feature_cols)), name='input')\n", " x = layers.Conv1D(filters=64, kernel_size=1, activation='relu')(inputs)\n", " # x = layers.Dropout(rate=0.1)(x)\n", " lstm_out = layers.Bidirectional(layers.LSTM(units=64, return_sequences=True))(x)\n", " lstm_out = layers.Dense(128, activation='relu')(lstm_out)\n", " # transformer_block = TransformerBlock(128, num_heads, ff_dim, name='first_attn')\n", " # out = transformer_block(lstm_out)\n", " # out = layers.GlobalAveragePooling1D()(out)\n", " out = layers.Dropout(0.1)(lstm_out)\n", " out = layers.Dense(64, activation='relu')(out)\n", " bet = layers.Dense(1, activation='sigmoid', name='vad')(out)\n", " model = Model(inputs=inputs, outputs=bet)\n", " return model" ] }, { "cell_type": "code", "execution_count": 19, "id": "372011ea-9876-41eb-a4e6-83ccd6c71559", "metadata": {}, "outputs": [], "source": [ "from tensorflow.python.keras.utils.vis_utils import plot_model" ] }, { "cell_type": "code", "execution_count": 20, "id": "1eebdab3-1f88-48a1-b5e0-bc8787528c1b", "metadata": {}, "outputs": [], "source": [ "maxs = train_data.max()\n", "mins = train_data.min()\n", "for col in train_data.columns:\n", " if maxs[col] - mins[col] == 0:\n", " continue\n", " train_data[col] = (train_data[col] - mins[col]) / (maxs[col] - mins[col])" ] }, { "cell_type": "code", "execution_count": 22, "id": "7f27bd56-4f6b-4242-9f79-c7d6b3ee2f13", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
化验编号氢Had(%)碳Cad(%)氮Nad(%)氧Oad(%)弹筒发热量Qb,adMJ/kg挥发分Vad(%)固定炭Fcad(%)
00.9965470.7739730.8354140.4565220.1714630.8112490.8477370.828147
10.8511180.6712330.7999430.3695650.2102540.7820380.6748970.794606
\n", "
" ], "text/plain": [ " 化验编号 氢Had(%) 碳Cad(%) 氮Nad(%) 氧Oad(%) 弹筒发热量Qb,adMJ/kg \\\n", "0 0.996547 0.773973 0.835414 0.456522 0.171463 0.811249 \n", "1 0.851118 0.671233 0.799943 0.369565 0.210254 0.782038 \n", "\n", " 挥发分Vad(%) 固定炭Fcad(%) \n", "0 0.847737 0.828147 \n", "1 0.674897 0.794606 " ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_data.head(2)" ] }, { "cell_type": "code", "execution_count": 23, "id": "baf45a3d-dc01-44fc-9f0b-456964ac2cdb", "metadata": {}, "outputs": [], "source": [ "# feature_cols = [x for x in train_data.columns if x not in out_cols and '第二次' not in x]\n", "feature_cols = [x for x in train_data.columns if x not in out_cols]\n", "use_cols = feature_cols + out_cols" ] }, { "cell_type": "code", "execution_count": 24, "id": "f2d27538-d2bc-4202-b0cf-d3e0949b4686", "metadata": {}, "outputs": [], "source": [ "use_data = train_data.copy()\n", "for col in use_cols:\n", " use_data[col] = use_data[col].astype('float32')" ] }, { "cell_type": "code", "execution_count": 25, "id": "50daf170-efec-49e5-8f8e-9a45938cacfc", "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import KFold, train_test_split\n", "kf = KFold(n_splits=6, shuffle=True, random_state=42)" ] }, { "cell_type": "code", "execution_count": 26, "id": "0f863423-be12-478b-a08d-e3c6f5dfb8ee", "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras import optimizers\n", "from tensorflow.python.keras.utils.vis_utils import plot_model\n", "from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error" ] }, { "cell_type": "code", "execution_count": 27, "id": "2c89b32a-017c-4d05-ab78-8b9b8eb0dcbb", "metadata": {}, "outputs": [], "source": [ "from keras.callbacks import ReduceLROnPlateau\n", "reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=10, mode='auto')" ] }, { "cell_type": "code", "execution_count": 51, "id": "ae24eea7-7dc1-4e33-9d41-3baff07ebb88", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"model_2\"\n", "_________________________________________________________________\n", "Layer (type) Output Shape Param # \n", "=================================================================\n", "input (InputLayer) [(None, 1, 7)] 0 \n", "_________________________________________________________________\n", "conv1d_3 (Conv1D) (None, 1, 64) 512 \n", "_________________________________________________________________\n", "bidirectional_3 (Bidirection (None, 1, 128) 66048 \n", "_________________________________________________________________\n", "dense_5 (Dense) (None, 1, 128) 16512 \n", "_________________________________________________________________\n", "dropout_3 (Dropout) (None, 1, 128) 0 \n", "_________________________________________________________________\n", "dense_6 (Dense) (None, 1, 64) 8256 \n", "_________________________________________________________________\n", "vad (Dense) (None, 1, 1) 65 \n", "=================================================================\n", "Total params: 91,393\n", "Trainable params: 91,393\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" ] } ], "source": [ "model = get_prediction_model()\n", "model.summary()" ] }, { "cell_type": "code", "execution_count": 31, "id": "ca6ce434-80b6-4609-9596-9a5120680462", "metadata": {}, "outputs": [], "source": [ "def print_eva(y_true, y_pred, tp):\n", " MSE = mean_squared_error(y_true, y_pred)\n", " RMSE = np.sqrt(MSE)\n", " MAE = mean_absolute_error(y_true, y_pred)\n", " MAPE = mean_absolute_percentage_error(y_true, y_pred)\n", " R_2 = r2_score(y_true, y_pred)\n", " print(f\"COL: {tp}, MSE: {format(MSE, '.2E')}\", end=',')\n", " print(f'RMSE: {round(RMSE, 3)}', end=',')\n", " print(f'MAPE: {round(MAPE * 100, 3)} %', end=',')\n", " print(f'MAE: {round(MAE, 3)}', end=',')\n", " print(f'R_2: {round(R_2, 3)}')\n", " return [MSE, RMSE, MAE, MAPE, R_2]" ] }, { "cell_type": "code", "execution_count": 32, "id": "503bbec7-2020-44c8-b622-05bb41082e43", "metadata": {}, "outputs": [], "source": [ "from keras.losses import mean_squared_error" ] }, { "cell_type": "code", "execution_count": 63, "id": "6308b1dc-8e2e-4bf9-9b28-3b81979bf7e0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "COL: 挥发分Vad, MSE: 2.49E-01,RMSE: 0.499,MAPE: 1.336 %,MAE: 0.398,R_2: 0.946\n", "COL: 挥发分Vad, MSE: 3.81E-01,RMSE: 0.617,MAPE: 1.597 %,MAE: 0.455,R_2: 0.954\n", "COL: 挥发分Vad, MSE: 5.71E-01,RMSE: 0.756,MAPE: 2.077 %,MAE: 0.621,R_2: 0.854\n", "WARNING:tensorflow:5 out of the last 45 calls to .predict_function at 0x7f00004145e0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n", "COL: 挥发分Vad, MSE: 3.24E-01,RMSE: 0.569,MAPE: 1.575 %,MAE: 0.46,R_2: 0.943\n", "WARNING:tensorflow:6 out of the last 47 calls to .predict_function at 0x7f0165b81e50> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n", "COL: 挥发分Vad, MSE: 3.13E-01,RMSE: 0.56,MAPE: 1.548 %,MAE: 0.466,R_2: 0.929\n", "COL: 挥发分Vad, MSE: 4.94E-01,RMSE: 0.703,MAPE: 1.852 %,MAE: 0.539,R_2: 0.898\n" ] } ], "source": [ "vad_eva_list = list()\n", "fcad_eva_list = list()\n", "train_data = use_data[use_cols].copy()\n", "for (train_index, test_index) in kf.split(train_data):\n", " train = train_data.loc[train_index]\n", " valid = train_data.loc[test_index]\n", " X = np.expand_dims(train[feature_cols].values, axis=1)\n", " Y = [x for x in train[out_cols].values.T]\n", " X_valid = np.expand_dims(valid[feature_cols].values, axis=1)\n", " Y_valid = [x for x in valid[out_cols].values.T]\n", " prediction_model = get_prediction_model()\n", " prediction_model.compile(optimizer='adam', loss=mean_squared_error)\n", " hist = prediction_model.fit(X, Y[0], epochs=120, batch_size=8, verbose=0, \n", " validation_data=(X_valid, Y_valid[0]),\n", " callbacks=[reduce_lr]\n", " )\n", " rst = prediction_model.predict(X_valid).squeeze(axis=1)\n", " pred_rst = pd.DataFrame.from_records(np.asarray(rst), columns=out_cols)\n", " real_rst = valid[out_cols].copy()\n", " for col in out_cols:\n", " pred_rst[col] = pred_rst[col] * (maxs[col] - mins[col]) + mins[col]\n", " real_rst[col] = real_rst[col] * (maxs[col] - mins[col]) + mins[col]\n", " y_pred_vad = pred_rst[out_cols].values.reshape(-1,)\n", " # y_pred_fcad = pred_rst['固定炭Fcad(%)'].values.reshape(-1,)\n", " y_true_vad = real_rst[out_cols].values.reshape(-1,)\n", " # y_true_fcad = real_rst['固定炭Fcad(%)'].values.reshape(-1,)\n", " vad_eva = print_eva(y_true_vad, y_pred_vad, tp='挥发分Vad')\n", " # fcad_eva = print_eva(y_true_fcad, y_pred_fcad, tp='固定炭Fcad')\n", " vad_eva_list.append(vad_eva)\n", " # fcad_eva_list.append(fcad_eva)\n", " del prediction_model" ] }, { "cell_type": "code", "execution_count": 65, "id": "f7132465-89e9-4193-829b-c6e7606cd266", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "COL: 固定炭Fcad, MSE: 1.75E-01,RMSE: 0.419,MAPE: 0.639 %,MAE: 0.339,R_2: 0.993\n", "COL: 固定炭Fcad, MSE: 2.85E-01,RMSE: 0.534,MAPE: 0.822 %,MAE: 0.386,R_2: 0.994\n", "COL: 固定炭Fcad, MSE: 2.23E-01,RMSE: 0.472,MAPE: 0.609 %,MAE: 0.344,R_2: 0.984\n", "COL: 固定炭Fcad, MSE: 1.89E-01,RMSE: 0.435,MAPE: 0.662 %,MAE: 0.318,R_2: 0.994\n", "COL: 固定炭Fcad, MSE: 2.94E-01,RMSE: 0.542,MAPE: 0.842 %,MAE: 0.446,R_2: 0.986\n", "COL: 固定炭Fcad, MSE: 2.30E-01,RMSE: 0.48,MAPE: 0.741 %,MAE: 0.386,R_2: 0.99\n" ] } ], "source": [ "out_cols = ['固定炭Fcad(%)']\n", "fcad_eva_list = list()\n", "train_data = use_data[use_cols].copy()\n", "for (train_index, test_index) in kf.split(train_data):\n", " train = train_data.loc[train_index]\n", " valid = train_data.loc[test_index]\n", " X = np.expand_dims(train[feature_cols].values, axis=1)\n", " Y = [x for x in train[out_cols].values.T]\n", " X_valid = np.expand_dims(valid[feature_cols].values, axis=1)\n", " Y_valid = [x for x in valid[out_cols].values.T]\n", " prediction_model = get_prediction_model()\n", " prediction_model.compile(optimizer='adam', loss=mean_squared_error)\n", " hist = prediction_model.fit(X, Y[0], epochs=120, batch_size=8, verbose=0, \n", " validation_data=(X_valid, Y_valid[0]),\n", " callbacks=[reduce_lr]\n", " )\n", " rst = prediction_model.predict(X_valid).squeeze(axis=1)\n", " pred_rst = pd.DataFrame.from_records(np.asarray(rst), columns=out_cols)\n", " real_rst = valid[out_cols].copy()\n", " for col in out_cols:\n", " pred_rst[col] = pred_rst[col] * (maxs[col] - mins[col]) + mins[col]\n", " real_rst[col] = real_rst[col] * (maxs[col] - mins[col]) + mins[col]\n", " y_pred = pred_rst[out_cols].values.reshape(-1,)\n", " y_true = real_rst[out_cols].values.reshape(-1,)\n", " fcad_eva = print_eva(y_true, y_pred, tp='固定炭Fcad')\n", " fcad_eva_list.append(fcad_eva)\n", " del prediction_model" ] }, { "cell_type": "code", "execution_count": 66, "id": "27e0abf7-aa29-467f-bc5e-b66a1adf6165", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "MSE 0.388723\n", "RMSE 0.617294\n", "MAE 0.489930\n", "MAPE 0.016641\n", "R_2 0.920706\n", "dtype: float64" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vad_df = pd.DataFrame.from_records(vad_eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R_2'])\n", "vad_df.sort_values(by='R_2').mean()" ] }, { "cell_type": "code", "execution_count": 67, "id": "070cdb94-6e7b-4028-b6d5-ba8570c902ba", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "MSE 0.232791\n", "RMSE 0.480288\n", "MAE 0.369610\n", "MAPE 0.007189\n", "R_2 0.990404\n", "dtype: float64" ] }, "execution_count": 67, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fcad_df = pd.DataFrame.from_records(fcad_eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R_2'])\n", "fcad_df.sort_values(by='R_2').mean()" ] }, { "cell_type": "code", "execution_count": null, "id": "54c1df2c-c297-4b8d-be8a-3a99cff22545", "metadata": {}, "outputs": [], "source": [ "train, valid = train_test_split(use_data[use_cols], test_size=0.3, random_state=42, shuffle=True)\n", "valid, test = train_test_split(valid, test_size=0.3, random_state=42, shuffle=True)" ] }, { "cell_type": "code", "execution_count": 31, "id": "e7a914da-b9c2-40d9-96e0-459b0888adba", "metadata": {}, "outputs": [], "source": [ "prediction_model = get_prediction_model()\n", "trainable_model = get_trainable_model(prediction_model)" ] }, { "cell_type": "code", "execution_count": 34, "id": "2494ef5a-5b2b-4f11-b6cd-dc39503c9106", "metadata": {}, "outputs": [], "source": [ "X = np.expand_dims(train[feature_cols].values, axis=1)\n", "Y = [x for x in train[out_cols].values.T]\n", "Y_valid = [x for x in valid[out_cols].values.T]" ] }, { "cell_type": "code", "execution_count": null, "id": "cf869e4d-0fce-45a2-afff-46fd9b30fd1c", "metadata": {}, "outputs": [], "source": [ "trainable_model.compile(optimizer='adam', loss=None)\n", "hist = trainable_model.fit([X, Y[0], Y[1]], epochs=120, batch_size=8, verbose=1, \n", " validation_data=[np.expand_dims(valid[feature_cols].values, axis=1), Y_valid[0], Y_valid[1]],\n", " callbacks=[reduce_lr]\n", " )" ] }, { "cell_type": "code", "execution_count": 41, "id": "67bfbe88-5f2c-4659-b2dc-eb9f1b824d04", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[array([[0.73740077],\n", " [0.89292204],\n", " [0.7599046 ],\n", " [0.67802393],\n", " [0.6815233 ],\n", " [0.88627005],\n", " [0.6121343 ],\n", " [0.7072234 ],\n", " [0.8561135 ],\n", " [0.52762157],\n", " [0.8325021 ],\n", " [0.50241977],\n", " [0.8242289 ],\n", " [0.68957335],\n", " [0.6980361 ],\n", " [0.82116604],\n", " [0.8566438 ],\n", " [0.53687835],\n", " [0.56832707],\n", " [0.78476715],\n", " [0.85638577]], dtype=float32),\n", " array([[0.68600863],\n", " [0.78454906],\n", " [0.8179163 ],\n", " [0.94351083],\n", " [0.86383885],\n", " [0.69705516],\n", " [0.6913491 ],\n", " [0.80277354],\n", " [0.93557894],\n", " [0.82278305],\n", " [0.82674253],\n", " [0.93518937],\n", " [0.8094449 ],\n", " [0.9206344 ],\n", " [0.7747319 ],\n", " [0.9137207 ],\n", " [0.9491073 ],\n", " [0.93225 ],\n", " [0.6185102 ],\n", " [0.8867341 ],\n", " [0.82890105]], dtype=float32)]" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rst = prediction_model.predict(np.expand_dims(test[feature_cols], axis=1))\n", "rst" ] }, { "cell_type": "code", "execution_count": 42, "id": "7de501e9-05a2-424c-a5f4-85d43ad37592", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0.9991559102070927, 0.9998196796918477]" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[np.exp(K.get_value(log_var[0]))**0.5 for log_var in trainable_model.layers[-1].log_vars]" ] }, { "cell_type": "code", "execution_count": 46, "id": "5c69d03b-34fd-4dbf-aec6-c15093bb22ab", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['挥发分Vad(%)', '固定炭Fcad(%)'], dtype='object')" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "real_rst.columns" ] }, { "cell_type": "code", "execution_count": null, "id": "294813b8-90be-4007-9fd6-c26ee7bb9652", "metadata": {}, "outputs": [], "source": [ "for col in out_cols:\n", " pred_rst[col] = pred_rst[col] * (maxs[col] - mins[col]) + mins[col]\n", " real_rst[col] = real_rst[col] * (maxs[col] - mins[col]) + mins[col]" ] }, { "cell_type": "code", "execution_count": 47, "id": "21739f82-d82a-4bde-8537-9504b68a96d5", "metadata": {}, "outputs": [], "source": [ "y_pred_vad = pred_rst['挥发分Vad(%)'].values.reshape(-1,)\n", "y_pred_fcad = pred_rst['固定炭Fcad(%)'].values.reshape(-1,)\n", "y_true_vad = real_rst['挥发分Vad(%)'].values.reshape(-1,)\n", "y_true_fcad = real_rst['固定炭Fcad(%)'].values.reshape(-1,)" ] }, { "cell_type": "code", "execution_count": 56, "id": "4ec4caa9-7c46-4fc8-a94b-cb659e924304", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "COL: 挥发分Vad, MSE: 3.35E-01,RMSE: 0.579,MAPE: 1.639 %,MAE: 0.504,R_2: 0.87\n", "COL: 固定炭Fcad, MSE: 1.11E+00,RMSE: 1.055,MAPE: 1.497 %,MAE: 0.814,R_2: 0.876\n" ] } ], "source": [ "pm25_eva = print_eva(y_true_vad, y_pred_vad, tp='挥发分Vad')\n", "pm10_eva = print_eva(y_true_fcad, y_pred_fcad, tp='固定炭Fcad')" ] }, { "cell_type": "code", "execution_count": null, "id": "ac4a4339-ec7d-4266-8197-5276c2395288", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "f15cbb91-1ce7-4fb0-979a-a4bdc452a1ec", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.16" } }, "nbformat": 4, "nbformat_minor": 5 }