coal_materials/CBA_vad_fcad.ipynb

1332 lines
45 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "6b84fefd-5936-4da4-ab6b-5b944329ad1d",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.environ['CUDA_DEVICE_ORDER'] = 'PCB_BUS_ID'\n",
"os.environ['CUDA_VISIBLE_DEVICES'] = '0, 1'"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "9cf130e3-62ef-46e0-bbdc-b13d9d29318d",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"import matplotlib.pyplot as plt\n",
"#新增加的两行\n",
"from pylab import mpl\n",
"# 设置显示中文字体\n",
"mpl.rcParams[\"font.sans-serif\"] = [\"SimHei\"]\n",
"\n",
"mpl.rcParams[\"axes.unicode_minus\"] = False"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "752381a5-0aeb-4c54-bc48-f9c3f8fc5d17",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th>Unnamed: 0_level_0</th>\n",
" <th>氢</th>\n",
" <th>碳</th>\n",
" <th>氮</th>\n",
" <th>氧</th>\n",
" <th>弹筒发热量</th>\n",
" <th>挥发分</th>\n",
" <th>固定炭</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>化验编号</th>\n",
" <th>Had</th>\n",
" <th>Cad</th>\n",
" <th>Nad</th>\n",
" <th>Oad</th>\n",
" <th>Qb,ad</th>\n",
" <th>Vad</th>\n",
" <th>Fcad</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>Unnamed: 0_level_2</th>\n",
" <th>(%)</th>\n",
" <th>(%)</th>\n",
" <th>(%)</th>\n",
" <th>(%)</th>\n",
" <th>MJ/kg</th>\n",
" <th>(%)</th>\n",
" <th>(%)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2720110529</td>\n",
" <td>3.93</td>\n",
" <td>70.18</td>\n",
" <td>0.81</td>\n",
" <td>25.079</td>\n",
" <td>27.820</td>\n",
" <td>32.06</td>\n",
" <td>55.68</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2720096883</td>\n",
" <td>3.78</td>\n",
" <td>68.93</td>\n",
" <td>0.77</td>\n",
" <td>26.512</td>\n",
" <td>27.404</td>\n",
" <td>29.96</td>\n",
" <td>54.71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2720109084</td>\n",
" <td>3.48</td>\n",
" <td>69.60</td>\n",
" <td>0.76</td>\n",
" <td>26.148</td>\n",
" <td>27.578</td>\n",
" <td>29.31</td>\n",
" <td>55.99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2720084708</td>\n",
" <td>3.47</td>\n",
" <td>66.71</td>\n",
" <td>0.76</td>\n",
" <td>29.055</td>\n",
" <td>26.338</td>\n",
" <td>28.58</td>\n",
" <td>53.87</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2720062721</td>\n",
" <td>3.87</td>\n",
" <td>68.78</td>\n",
" <td>0.80</td>\n",
" <td>26.542</td>\n",
" <td>27.280</td>\n",
" <td>29.97</td>\n",
" <td>54.78</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>223</th>\n",
" <td>2720030490</td>\n",
" <td>4.12</td>\n",
" <td>68.85</td>\n",
" <td>0.97</td>\n",
" <td>26.055</td>\n",
" <td>27.864</td>\n",
" <td>32.94</td>\n",
" <td>51.89</td>\n",
" </tr>\n",
" <tr>\n",
" <th>224</th>\n",
" <td>2720028633</td>\n",
" <td>3.97</td>\n",
" <td>67.04</td>\n",
" <td>0.94</td>\n",
" <td>28.043</td>\n",
" <td>27.368</td>\n",
" <td>31.88</td>\n",
" <td>51.38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>225</th>\n",
" <td>2720028634</td>\n",
" <td>4.12</td>\n",
" <td>68.42</td>\n",
" <td>0.96</td>\n",
" <td>26.493</td>\n",
" <td>27.886</td>\n",
" <td>33.16</td>\n",
" <td>52.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>226</th>\n",
" <td>2720017683</td>\n",
" <td>3.88</td>\n",
" <td>67.42</td>\n",
" <td>0.94</td>\n",
" <td>27.760</td>\n",
" <td>26.616</td>\n",
" <td>31.65</td>\n",
" <td>50.56</td>\n",
" </tr>\n",
" <tr>\n",
" <th>227</th>\n",
" <td>2720017678</td>\n",
" <td>3.81</td>\n",
" <td>66.74</td>\n",
" <td>0.92</td>\n",
" <td>28.530</td>\n",
" <td>26.688</td>\n",
" <td>31.02</td>\n",
" <td>50.82</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>228 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
" Unnamed: 0_level_0 氢 碳 氮 氧 弹筒发热量 挥发分 固定炭\n",
" 化验编号 Had Cad Nad Oad Qb,ad Vad Fcad\n",
" Unnamed: 0_level_2 (%) (%) (%) (%) MJ/kg (%) (%)\n",
"0 2720110529 3.93 70.18 0.81 25.079 27.820 32.06 55.68\n",
"1 2720096883 3.78 68.93 0.77 26.512 27.404 29.96 54.71\n",
"2 2720109084 3.48 69.60 0.76 26.148 27.578 29.31 55.99\n",
"3 2720084708 3.47 66.71 0.76 29.055 26.338 28.58 53.87\n",
"4 2720062721 3.87 68.78 0.80 26.542 27.280 29.97 54.78\n",
".. ... ... ... ... ... ... ... ...\n",
"223 2720030490 4.12 68.85 0.97 26.055 27.864 32.94 51.89\n",
"224 2720028633 3.97 67.04 0.94 28.043 27.368 31.88 51.38\n",
"225 2720028634 4.12 68.42 0.96 26.493 27.886 33.16 52.00\n",
"226 2720017683 3.88 67.42 0.94 27.760 26.616 31.65 50.56\n",
"227 2720017678 3.81 66.74 0.92 28.530 26.688 31.02 50.82\n",
"\n",
"[228 rows x 8 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_0102 = pd.read_excel('./data/20240102/20240102.xlsx', header=[0,1,2])\n",
"data_0102"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "972f1e9c-3ebc-45cf-8d1f-7611645e5238",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['化验编号',\n",
" '氢Had(%)',\n",
" '碳Cad(%)',\n",
" '氮Nad(%)',\n",
" '氧Oad(%)',\n",
" '弹筒发热量Qb,adMJ/kg',\n",
" '挥发分Vad(%)',\n",
" '固定炭Fcad(%)']"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cols = [''.join([y for y in x if 'Unnamed' not in y]) for x in data_0102.columns]\n",
"cols"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "c95f1106-b3a4-43c6-88ec-3cdebf91d79a",
"metadata": {},
"outputs": [],
"source": [
"data_0102.columns = cols"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "2e96af0a-feda-4a1f-a13e-9c8861c6f4d4",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>化验编号</th>\n",
" <th>氢Had(%)</th>\n",
" <th>碳Cad(%)</th>\n",
" <th>氮Nad(%)</th>\n",
" <th>氧Oad(%)</th>\n",
" <th>弹筒发热量Qb,adMJ/kg</th>\n",
" <th>挥发分Vad(%)</th>\n",
" <th>固定炭Fcad(%)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2720110529</td>\n",
" <td>3.93</td>\n",
" <td>70.18</td>\n",
" <td>0.81</td>\n",
" <td>25.079</td>\n",
" <td>27.820</td>\n",
" <td>32.06</td>\n",
" <td>55.68</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2720096883</td>\n",
" <td>3.78</td>\n",
" <td>68.93</td>\n",
" <td>0.77</td>\n",
" <td>26.512</td>\n",
" <td>27.404</td>\n",
" <td>29.96</td>\n",
" <td>54.71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2720109084</td>\n",
" <td>3.48</td>\n",
" <td>69.60</td>\n",
" <td>0.76</td>\n",
" <td>26.148</td>\n",
" <td>27.578</td>\n",
" <td>29.31</td>\n",
" <td>55.99</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2720084708</td>\n",
" <td>3.47</td>\n",
" <td>66.71</td>\n",
" <td>0.76</td>\n",
" <td>29.055</td>\n",
" <td>26.338</td>\n",
" <td>28.58</td>\n",
" <td>53.87</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2720062721</td>\n",
" <td>3.87</td>\n",
" <td>68.78</td>\n",
" <td>0.80</td>\n",
" <td>26.542</td>\n",
" <td>27.280</td>\n",
" <td>29.97</td>\n",
" <td>54.78</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>223</th>\n",
" <td>2720030490</td>\n",
" <td>4.12</td>\n",
" <td>68.85</td>\n",
" <td>0.97</td>\n",
" <td>26.055</td>\n",
" <td>27.864</td>\n",
" <td>32.94</td>\n",
" <td>51.89</td>\n",
" </tr>\n",
" <tr>\n",
" <th>224</th>\n",
" <td>2720028633</td>\n",
" <td>3.97</td>\n",
" <td>67.04</td>\n",
" <td>0.94</td>\n",
" <td>28.043</td>\n",
" <td>27.368</td>\n",
" <td>31.88</td>\n",
" <td>51.38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>225</th>\n",
" <td>2720028634</td>\n",
" <td>4.12</td>\n",
" <td>68.42</td>\n",
" <td>0.96</td>\n",
" <td>26.493</td>\n",
" <td>27.886</td>\n",
" <td>33.16</td>\n",
" <td>52.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>226</th>\n",
" <td>2720017683</td>\n",
" <td>3.88</td>\n",
" <td>67.42</td>\n",
" <td>0.94</td>\n",
" <td>27.760</td>\n",
" <td>26.616</td>\n",
" <td>31.65</td>\n",
" <td>50.56</td>\n",
" </tr>\n",
" <tr>\n",
" <th>227</th>\n",
" <td>2720017678</td>\n",
" <td>3.81</td>\n",
" <td>66.74</td>\n",
" <td>0.92</td>\n",
" <td>28.530</td>\n",
" <td>26.688</td>\n",
" <td>31.02</td>\n",
" <td>50.82</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>228 rows × 8 columns</p>\n",
"</div>"
],
"text/plain": [
" 化验编号 氢Had(%) 碳Cad(%) 氮Nad(%) 氧Oad(%) 弹筒发热量Qb,adMJ/kg \\\n",
"0 2720110529 3.93 70.18 0.81 25.079 27.820 \n",
"1 2720096883 3.78 68.93 0.77 26.512 27.404 \n",
"2 2720109084 3.48 69.60 0.76 26.148 27.578 \n",
"3 2720084708 3.47 66.71 0.76 29.055 26.338 \n",
"4 2720062721 3.87 68.78 0.80 26.542 27.280 \n",
".. ... ... ... ... ... ... \n",
"223 2720030490 4.12 68.85 0.97 26.055 27.864 \n",
"224 2720028633 3.97 67.04 0.94 28.043 27.368 \n",
"225 2720028634 4.12 68.42 0.96 26.493 27.886 \n",
"226 2720017683 3.88 67.42 0.94 27.760 26.616 \n",
"227 2720017678 3.81 66.74 0.92 28.530 26.688 \n",
"\n",
" 挥发分Vad(%) 固定炭Fcad(%) \n",
"0 32.06 55.68 \n",
"1 29.96 54.71 \n",
"2 29.31 55.99 \n",
"3 28.58 53.87 \n",
"4 29.97 54.78 \n",
".. ... ... \n",
"223 32.94 51.89 \n",
"224 31.88 51.38 \n",
"225 33.16 52.00 \n",
"226 31.65 50.56 \n",
"227 31.02 50.82 \n",
"\n",
"[228 rows x 8 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_0102"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "04b177a7-2f02-4e23-8ea9-29f34cf3eafc",
"metadata": {},
"outputs": [],
"source": [
"# out_cols = ['挥发分Vad(%)']\n",
"# drop_cols = ['化验编号', '固定炭Fcad(%)']\n",
"out_cols = ['固定炭Fcad(%)']\n",
"drop_cols = ['挥发分Vad(%)', '化验编号']"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "31169fbf-d78e-42f7-87f3-71ba3dd0979d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['固定炭Fcad(%)']"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"out_cols"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "feaedd50-f999-45bf-b465-3d359b0c0110",
"metadata": {},
"outputs": [],
"source": [
"data = data_0102.copy()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "a40bee0f-011a-4edb-80f8-4e2f40e755fd",
"metadata": {},
"outputs": [],
"source": [
"train_data = data.dropna(subset=out_cols).fillna(0)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "535d37b6-b9de-4025-ac8f-62f5bdbe2451",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-01-08 18:09:21.597754: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n"
]
}
],
"source": [
"import tensorflow as tf\n",
"from tensorflow import keras\n",
"from tensorflow.keras import layers\n",
"import tensorflow.keras.backend as K"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "1c85d462-f248-4ffb-908f-eb4b20eab179",
"metadata": {},
"outputs": [],
"source": [
"class TransformerBlock(layers.Layer):\n",
" def __init__(self, embed_dim, num_heads, ff_dim, name, rate=0.1):\n",
" super().__init__()\n",
" self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim, name=name)\n",
" self.ffn = keras.Sequential(\n",
" [layers.Dense(ff_dim, activation=\"relu\"), layers.Dense(embed_dim),]\n",
" )\n",
" self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)\n",
" self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)\n",
" self.dropout1 = layers.Dropout(rate)\n",
" self.dropout2 = layers.Dropout(rate)\n",
"\n",
" def call(self, inputs, training):\n",
" attn_output = self.att(inputs, inputs)\n",
" attn_output = self.dropout1(attn_output, training=training)\n",
" out1 = self.layernorm1(inputs + attn_output)\n",
" ffn_output = self.ffn(out1)\n",
" ffn_output = self.dropout2(ffn_output, training=training)\n",
" return self.layernorm2(out1 + ffn_output)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "790284a3-b9d3-4144-b481-38a7c3ecb4b9",
"metadata": {},
"outputs": [],
"source": [
"from tensorflow.keras import Model"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "cd9a1ca1-d0ca-4cb5-9ef5-fd5d63576cd2",
"metadata": {},
"outputs": [],
"source": [
"from tensorflow.keras.initializers import Constant"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "9bc02f29-0fb7-420d-99a8-435eadc06e29",
"metadata": {},
"outputs": [],
"source": [
"# Custom loss layer\n",
"class CustomMultiLossLayer(layers.Layer):\n",
" def __init__(self, nb_outputs=2, **kwargs):\n",
" self.nb_outputs = nb_outputs\n",
" self.is_placeholder = True\n",
" super(CustomMultiLossLayer, self).__init__(**kwargs)\n",
" \n",
" def build(self, input_shape=None):\n",
" # initialise log_vars\n",
" self.log_vars = []\n",
" for i in range(self.nb_outputs):\n",
" self.log_vars += [self.add_weight(name='log_var' + str(i), shape=(1,),\n",
" initializer=tf.initializers.he_normal(), trainable=True)]\n",
" super(CustomMultiLossLayer, self).build(input_shape)\n",
"\n",
" def multi_loss(self, ys_true, ys_pred):\n",
" assert len(ys_true) == self.nb_outputs and len(ys_pred) == self.nb_outputs\n",
" loss = 0\n",
" for y_true, y_pred, log_var in zip(ys_true, ys_pred, self.log_vars):\n",
" mse = (y_true - y_pred) ** 2.\n",
" pre = K.exp(-log_var[0])\n",
" loss += tf.abs(tf.reduce_logsumexp(pre * mse + log_var[0], axis=-1))\n",
" return K.mean(loss)\n",
"\n",
" def call(self, inputs):\n",
" ys_true = inputs[:self.nb_outputs]\n",
" ys_pred = inputs[self.nb_outputs:]\n",
" loss = self.multi_loss(ys_true, ys_pred)\n",
" self.add_loss(loss, inputs=inputs)\n",
" # We won't actually use the output.\n",
" return K.concatenate(inputs, -1)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "a190207e-5a59-4813-9660-758760cf1b73",
"metadata": {},
"outputs": [],
"source": [
"num_heads, ff_dim = 3, 16"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "80f32155-e71f-4615-8d0c-01dfd04988fe",
"metadata": {},
"outputs": [],
"source": [
"def get_prediction_model():\n",
" inputs = layers.Input(shape=(1,len(feature_cols)), name='input')\n",
" x = layers.Conv1D(filters=64, kernel_size=1, activation='relu')(inputs)\n",
" # x = layers.Dropout(rate=0.1)(x)\n",
" lstm_out = layers.Bidirectional(layers.LSTM(units=64, return_sequences=True))(x)\n",
" lstm_out = layers.Dense(128, activation='relu')(lstm_out)\n",
" # transformer_block = TransformerBlock(128, num_heads, ff_dim, name='first_attn')\n",
" # out = transformer_block(lstm_out)\n",
" # out = layers.GlobalAveragePooling1D()(out)\n",
" out = layers.Dropout(0.1)(lstm_out)\n",
" out = layers.Dense(64, activation='relu')(out)\n",
" bet = layers.Dense(1, activation='sigmoid', name='vad')(out)\n",
" model = Model(inputs=inputs, outputs=bet)\n",
" return model"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "372011ea-9876-41eb-a4e6-83ccd6c71559",
"metadata": {},
"outputs": [],
"source": [
"from tensorflow.python.keras.utils.vis_utils import plot_model"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "1eebdab3-1f88-48a1-b5e0-bc8787528c1b",
"metadata": {},
"outputs": [],
"source": [
"maxs = train_data.max()\n",
"mins = train_data.min()\n",
"for col in train_data.columns:\n",
" if maxs[col] - mins[col] == 0:\n",
" continue\n",
" train_data[col] = (train_data[col] - mins[col]) / (maxs[col] - mins[col])"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "7f27bd56-4f6b-4242-9f79-c7d6b3ee2f13",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>化验编号</th>\n",
" <th>氢Had(%)</th>\n",
" <th>碳Cad(%)</th>\n",
" <th>氮Nad(%)</th>\n",
" <th>氧Oad(%)</th>\n",
" <th>弹筒发热量Qb,adMJ/kg</th>\n",
" <th>挥发分Vad(%)</th>\n",
" <th>固定炭Fcad(%)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0.996547</td>\n",
" <td>0.773973</td>\n",
" <td>0.835414</td>\n",
" <td>0.456522</td>\n",
" <td>0.171463</td>\n",
" <td>0.811249</td>\n",
" <td>0.847737</td>\n",
" <td>0.828147</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.851118</td>\n",
" <td>0.671233</td>\n",
" <td>0.799943</td>\n",
" <td>0.369565</td>\n",
" <td>0.210254</td>\n",
" <td>0.782038</td>\n",
" <td>0.674897</td>\n",
" <td>0.794606</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 化验编号 氢Had(%) 碳Cad(%) 氮Nad(%) 氧Oad(%) 弹筒发热量Qb,adMJ/kg \\\n",
"0 0.996547 0.773973 0.835414 0.456522 0.171463 0.811249 \n",
"1 0.851118 0.671233 0.799943 0.369565 0.210254 0.782038 \n",
"\n",
" 挥发分Vad(%) 固定炭Fcad(%) \n",
"0 0.847737 0.828147 \n",
"1 0.674897 0.794606 "
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_data.head(2)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "baf45a3d-dc01-44fc-9f0b-456964ac2cdb",
"metadata": {},
"outputs": [],
"source": [
"# feature_cols = [x for x in train_data.columns if x not in out_cols and '第二次' not in x]\n",
"feature_cols = [x for x in train_data.columns if x not in out_cols and x not in drop_cols]\n",
"use_cols = feature_cols + out_cols"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "f2d27538-d2bc-4202-b0cf-d3e0949b4686",
"metadata": {},
"outputs": [],
"source": [
"use_data = train_data.copy()\n",
"for col in use_cols:\n",
" use_data[col] = use_data[col].astype('float32')"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "50daf170-efec-49e5-8f8e-9a45938cacfc",
"metadata": {},
"outputs": [],
"source": [
"from sklearn.model_selection import KFold, train_test_split\n",
"kf = KFold(n_splits=6, shuffle=True, random_state=42)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "0f863423-be12-478b-a08d-e3c6f5dfb8ee",
"metadata": {},
"outputs": [],
"source": [
"from tensorflow.keras import optimizers\n",
"from tensorflow.python.keras.utils.vis_utils import plot_model\n",
"from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "2c89b32a-017c-4d05-ab78-8b9b8eb0dcbb",
"metadata": {},
"outputs": [],
"source": [
"from keras.callbacks import ReduceLROnPlateau\n",
"reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=10, mode='auto')"
]
},
{
"cell_type": "code",
"execution_count": 26,
"id": "ae24eea7-7dc1-4e33-9d41-3baff07ebb88",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-01-08 18:09:35.590279: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
"2024-01-08 18:09:35.656503: E tensorflow/stream_executor/cuda/cuda_driver.cc:328] failed call to cuInit: CUDA_ERROR_INVALID_DEVICE: invalid device ordinal\n",
"2024-01-08 18:09:35.656548: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: zhaojh-yv621\n",
"2024-01-08 18:09:35.656557: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: zhaojh-yv621\n",
"2024-01-08 18:09:35.656758: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 520.61.5\n",
"2024-01-08 18:09:35.656795: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 520.61.5\n",
"2024-01-08 18:09:35.656802: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 520.61.5\n",
"2024-01-08 18:09:35.657280: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA\n",
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: \"model\"\n",
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"input (InputLayer) [(None, 1, 5)] 0 \n",
"_________________________________________________________________\n",
"conv1d (Conv1D) (None, 1, 64) 384 \n",
"_________________________________________________________________\n",
"bidirectional (Bidirectional (None, 1, 128) 66048 \n",
"_________________________________________________________________\n",
"dense (Dense) (None, 1, 128) 16512 \n",
"_________________________________________________________________\n",
"dropout (Dropout) (None, 1, 128) 0 \n",
"_________________________________________________________________\n",
"dense_1 (Dense) (None, 1, 64) 8256 \n",
"_________________________________________________________________\n",
"vad (Dense) (None, 1, 1) 65 \n",
"=================================================================\n",
"Total params: 91,265\n",
"Trainable params: 91,265\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n"
]
}
],
"source": [
"model = get_prediction_model()\n",
"model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "ca6ce434-80b6-4609-9596-9a5120680462",
"metadata": {},
"outputs": [],
"source": [
"def print_eva(y_true, y_pred, tp):\n",
" MSE = mean_squared_error(y_true, y_pred)\n",
" RMSE = np.sqrt(MSE)\n",
" MAE = mean_absolute_error(y_true, y_pred)\n",
" MAPE = mean_absolute_percentage_error(y_true, y_pred)\n",
" R_2 = r2_score(y_true, y_pred)\n",
" print(f\"COL: {tp}, MSE: {format(MSE, '.2E')}\", end=',')\n",
" print(f'RMSE: {round(RMSE, 3)}', end=',')\n",
" print(f'MAPE: {round(MAPE * 100, 3)} %', end=',')\n",
" print(f'MAE: {round(MAE, 3)}', end=',')\n",
" print(f'R_2: {round(R_2, 3)}')\n",
" return [MSE, RMSE, MAE, MAPE, R_2]"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "503bbec7-2020-44c8-b622-05bb41082e43",
"metadata": {},
"outputs": [],
"source": [
"from keras.losses import mean_squared_error"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "6308b1dc-8e2e-4bf9-9b28-3b81979bf7e0",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-01-08 18:03:50.956250: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
"2024-01-08 18:03:50.974801: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 2200000000 Hz\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"COL: 挥发分Vad, MSE: 5.84E-01,RMSE: 0.764,MAPE: 2.111 %,MAE: 0.633,R_2: 0.874\n",
"COL: 挥发分Vad, MSE: 1.06E+00,RMSE: 1.028,MAPE: 2.941 %,MAE: 0.869,R_2: 0.872\n",
"COL: 挥发分Vad, MSE: 6.70E-01,RMSE: 0.819,MAPE: 2.217 %,MAE: 0.658,R_2: 0.829\n",
"COL: 挥发分Vad, MSE: 5.96E-01,RMSE: 0.772,MAPE: 2.07 %,MAE: 0.607,R_2: 0.896\n",
"WARNING:tensorflow:5 out of the last 9 calls to <function Model.make_predict_function.<locals>.predict_function at 0x7f6e8d6f8940> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n",
"COL: 挥发分Vad, MSE: 8.56E-01,RMSE: 0.925,MAPE: 2.335 %,MAE: 0.717,R_2: 0.805\n",
"WARNING:tensorflow:6 out of the last 11 calls to <function Model.make_predict_function.<locals>.predict_function at 0x7f6e8f6e4160> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has experimental_relax_shapes=True option that relaxes argument shapes that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n",
"COL: 挥发分Vad, MSE: 7.24E-01,RMSE: 0.851,MAPE: 2.435 %,MAE: 0.713,R_2: 0.851\n"
]
}
],
"source": [
"vad_eva_list = list()\n",
"fcad_eva_list = list()\n",
"train_data = use_data[use_cols].copy()\n",
"for (train_index, test_index) in kf.split(train_data):\n",
" train = train_data.loc[train_index]\n",
" valid = train_data.loc[test_index]\n",
" X = np.expand_dims(train[feature_cols].values, axis=1)\n",
" Y = [x for x in train[out_cols].values.T]\n",
" X_valid = np.expand_dims(valid[feature_cols].values, axis=1)\n",
" Y_valid = [x for x in valid[out_cols].values.T]\n",
" prediction_model = get_prediction_model()\n",
" prediction_model.compile(optimizer='adam', loss=mean_squared_error)\n",
" hist = prediction_model.fit(X, Y[0], epochs=120, batch_size=8, verbose=0, \n",
" validation_data=(X_valid, Y_valid[0]),\n",
" callbacks=[reduce_lr]\n",
" )\n",
" rst = prediction_model.predict(X_valid).squeeze(axis=1)\n",
" pred_rst = pd.DataFrame.from_records(np.asarray(rst), columns=out_cols)\n",
" real_rst = valid[out_cols].copy()\n",
" for col in out_cols:\n",
" pred_rst[col] = pred_rst[col] * (maxs[col] - mins[col]) + mins[col]\n",
" real_rst[col] = real_rst[col] * (maxs[col] - mins[col]) + mins[col]\n",
" y_pred_vad = pred_rst[out_cols].values.reshape(-1,)\n",
" # y_pred_fcad = pred_rst['固定炭Fcad(%)'].values.reshape(-1,)\n",
" y_true_vad = real_rst[out_cols].values.reshape(-1,)\n",
" # y_true_fcad = real_rst['固定炭Fcad(%)'].values.reshape(-1,)\n",
" vad_eva = print_eva(y_true_vad, y_pred_vad, tp='挥发分Vad')\n",
" # fcad_eva = print_eva(y_true_fcad, y_pred_fcad, tp='固定炭Fcad')\n",
" vad_eva_list.append(vad_eva)\n",
" # fcad_eva_list.append(fcad_eva)\n",
" del prediction_model"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f7132465-89e9-4193-829b-c6e7606cd266",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2024-01-08 18:09:42.506363: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)\n",
"2024-01-08 18:09:42.522809: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 2200000000 Hz\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"COL: 固定炭Fcad, MSE: 7.16E-01,RMSE: 0.846,MAPE: 1.337 %,MAE: 0.715,R_2: 0.972\n",
"COL: 固定炭Fcad, MSE: 1.04E+00,RMSE: 1.018,MAPE: 1.65 %,MAE: 0.847,R_2: 0.98\n",
"COL: 固定炭Fcad, MSE: 8.89E-01,RMSE: 0.943,MAPE: 1.294 %,MAE: 0.724,R_2: 0.936\n",
"COL: 固定炭Fcad, MSE: 5.17E-01,RMSE: 0.719,MAPE: 1.066 %,MAE: 0.545,R_2: 0.985\n"
]
}
],
"source": [
"out_cols = ['固定炭Fcad(%)']\n",
"fcad_eva_list = list()\n",
"train_data = use_data[use_cols].copy()\n",
"for (train_index, test_index) in kf.split(train_data):\n",
" train = train_data.loc[train_index]\n",
" valid = train_data.loc[test_index]\n",
" X = np.expand_dims(train[feature_cols].values, axis=1)\n",
" Y = [x for x in train[out_cols].values.T]\n",
" X_valid = np.expand_dims(valid[feature_cols].values, axis=1)\n",
" Y_valid = [x for x in valid[out_cols].values.T]\n",
" prediction_model = get_prediction_model()\n",
" prediction_model.compile(optimizer='adam', loss=mean_squared_error)\n",
" hist = prediction_model.fit(X, Y[0], epochs=120, batch_size=8, verbose=0, \n",
" validation_data=(X_valid, Y_valid[0]),\n",
" callbacks=[reduce_lr]\n",
" )\n",
" rst = prediction_model.predict(X_valid).squeeze(axis=1)\n",
" pred_rst = pd.DataFrame.from_records(np.asarray(rst), columns=out_cols)\n",
" real_rst = valid[out_cols].copy()\n",
" for col in out_cols:\n",
" pred_rst[col] = pred_rst[col] * (maxs[col] - mins[col]) + mins[col]\n",
" real_rst[col] = real_rst[col] * (maxs[col] - mins[col]) + mins[col]\n",
" y_pred = pred_rst[out_cols].values.reshape(-1,)\n",
" y_true = real_rst[out_cols].values.reshape(-1,)\n",
" fcad_eva = print_eva(y_true, y_pred, tp='固定炭Fcad')\n",
" fcad_eva_list.append(fcad_eva)\n",
" del prediction_model"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "27e0abf7-aa29-467f-bc5e-b66a1adf6165",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"MSE 0.747816\n",
"RMSE 0.859839\n",
"MAE 0.699474\n",
"MAPE 0.023513\n",
"R_2 0.854338\n",
"dtype: float64"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"vad_df = pd.DataFrame.from_records(vad_eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R_2'])\n",
"vad_df.sort_values(by='R_2').mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "070cdb94-6e7b-4028-b6d5-ba8570c902ba",
"metadata": {},
"outputs": [],
"source": [
"fcad_df = pd.DataFrame.from_records(fcad_eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R_2'])\n",
"fcad_df.sort_values(by='R_2').mean()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "54c1df2c-c297-4b8d-be8a-3a99cff22545",
"metadata": {},
"outputs": [],
"source": [
"train, valid = train_test_split(use_data[use_cols], test_size=0.3, random_state=42, shuffle=True)\n",
"valid, test = train_test_split(valid, test_size=0.3, random_state=42, shuffle=True)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "e7a914da-b9c2-40d9-96e0-459b0888adba",
"metadata": {},
"outputs": [],
"source": [
"prediction_model = get_prediction_model()\n",
"trainable_model = get_trainable_model(prediction_model)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "2494ef5a-5b2b-4f11-b6cd-dc39503c9106",
"metadata": {},
"outputs": [],
"source": [
"X = np.expand_dims(train[feature_cols].values, axis=1)\n",
"Y = [x for x in train[out_cols].values.T]\n",
"Y_valid = [x for x in valid[out_cols].values.T]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf869e4d-0fce-45a2-afff-46fd9b30fd1c",
"metadata": {},
"outputs": [],
"source": [
"trainable_model.compile(optimizer='adam', loss=None)\n",
"hist = trainable_model.fit([X, Y[0], Y[1]], epochs=120, batch_size=8, verbose=1, \n",
" validation_data=[np.expand_dims(valid[feature_cols].values, axis=1), Y_valid[0], Y_valid[1]],\n",
" callbacks=[reduce_lr]\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "67bfbe88-5f2c-4659-b2dc-eb9f1b824d04",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[array([[0.73740077],\n",
" [0.89292204],\n",
" [0.7599046 ],\n",
" [0.67802393],\n",
" [0.6815233 ],\n",
" [0.88627005],\n",
" [0.6121343 ],\n",
" [0.7072234 ],\n",
" [0.8561135 ],\n",
" [0.52762157],\n",
" [0.8325021 ],\n",
" [0.50241977],\n",
" [0.8242289 ],\n",
" [0.68957335],\n",
" [0.6980361 ],\n",
" [0.82116604],\n",
" [0.8566438 ],\n",
" [0.53687835],\n",
" [0.56832707],\n",
" [0.78476715],\n",
" [0.85638577]], dtype=float32),\n",
" array([[0.68600863],\n",
" [0.78454906],\n",
" [0.8179163 ],\n",
" [0.94351083],\n",
" [0.86383885],\n",
" [0.69705516],\n",
" [0.6913491 ],\n",
" [0.80277354],\n",
" [0.93557894],\n",
" [0.82278305],\n",
" [0.82674253],\n",
" [0.93518937],\n",
" [0.8094449 ],\n",
" [0.9206344 ],\n",
" [0.7747319 ],\n",
" [0.9137207 ],\n",
" [0.9491073 ],\n",
" [0.93225 ],\n",
" [0.6185102 ],\n",
" [0.8867341 ],\n",
" [0.82890105]], dtype=float32)]"
]
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rst = prediction_model.predict(np.expand_dims(test[feature_cols], axis=1))\n",
"rst"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "7de501e9-05a2-424c-a5f4-85d43ad37592",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[0.9991559102070927, 0.9998196796918477]"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[np.exp(K.get_value(log_var[0]))**0.5 for log_var in trainable_model.layers[-1].log_vars]"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "5c69d03b-34fd-4dbf-aec6-c15093bb22ab",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['挥发分Vad(%)', '固定炭Fcad(%)'], dtype='object')"
]
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"real_rst.columns"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "294813b8-90be-4007-9fd6-c26ee7bb9652",
"metadata": {},
"outputs": [],
"source": [
"for col in out_cols:\n",
" pred_rst[col] = pred_rst[col] * (maxs[col] - mins[col]) + mins[col]\n",
" real_rst[col] = real_rst[col] * (maxs[col] - mins[col]) + mins[col]"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "21739f82-d82a-4bde-8537-9504b68a96d5",
"metadata": {},
"outputs": [],
"source": [
"y_pred_vad = pred_rst['挥发分Vad(%)'].values.reshape(-1,)\n",
"y_pred_fcad = pred_rst['固定炭Fcad(%)'].values.reshape(-1,)\n",
"y_true_vad = real_rst['挥发分Vad(%)'].values.reshape(-1,)\n",
"y_true_fcad = real_rst['固定炭Fcad(%)'].values.reshape(-1,)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "4ec4caa9-7c46-4fc8-a94b-cb659e924304",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"COL: 挥发分Vad, MSE: 3.35E-01,RMSE: 0.579,MAPE: 1.639 %,MAE: 0.504,R_2: 0.87\n",
"COL: 固定炭Fcad, MSE: 1.11E+00,RMSE: 1.055,MAPE: 1.497 %,MAE: 0.814,R_2: 0.876\n"
]
}
],
"source": [
"pm25_eva = print_eva(y_true_vad, y_pred_vad, tp='挥发分Vad')\n",
"pm10_eva = print_eva(y_true_fcad, y_pred_fcad, tp='固定炭Fcad')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ac4a4339-ec7d-4266-8197-5276c2395288",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "f15cbb91-1ce7-4fb0-979a-a4bdc452a1ec",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}