{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "6b84fefd-5936-4da4-ab6b-5b944329ad1d", "metadata": {}, "outputs": [], "source": [ "import os\n", "os.environ['CUDA_DEVICE_ORDER'] = 'PCB_BUS_ID'\n", "os.environ['CUDA_VISIBLE_DEVICES'] = '0, 1'" ] }, { "cell_type": "code", "execution_count": 2, "id": "9cf130e3-62ef-46e0-bbdc-b13d9d29318d", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "import matplotlib.pyplot as plt\n", "#新增加的两行\n", "from pylab import mpl\n", "# 设置显示中文字体\n", "mpl.rcParams[\"font.sans-serif\"] = [\"SimHei\"]\n", "\n", "mpl.rcParams[\"axes.unicode_minus\"] = False" ] }, { "cell_type": "code", "execution_count": 3, "id": "752381a5-0aeb-4c54-bc48-f9c3f8fc5d17", "metadata": {}, "outputs": [], "source": [ "data = pd.read_csv('./data/20240102/train_data.csv')" ] }, { "cell_type": "code", "execution_count": 4, "id": "04b177a7-2f02-4e23-8ea9-29f34cf3eafc", "metadata": {}, "outputs": [], "source": [ "out_cols = [x for x in data.columns if '碳材料' in x]" ] }, { "cell_type": "code", "execution_count": 5, "id": "31169fbf-d78e-42f7-87f3-71ba3dd0979d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['碳材料结构特征-比表面积', '碳材料结构特征-总孔体积', '碳材料结构特征-微孔体积', '碳材料结构特征-平均孔径']" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "out_cols" ] }, { "cell_type": "code", "execution_count": 6, "id": "a40bee0f-011a-4edb-80f8-4e2f40e755fd", "metadata": {}, "outputs": [], "source": [ "train_data = data.dropna(subset=out_cols).fillna(0)" ] }, { "cell_type": "code", "execution_count": 7, "id": "535d37b6-b9de-4025-ac8f-62f5bdbe2451", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "2024-01-04 16:22:35.199530: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n" ] } ], "source": [ "import tensorflow as tf\n", "from tensorflow import keras\n", "from tensorflow.keras import layers\n", "import tensorflow.keras.backend as K" ] }, { "cell_type": "code", "execution_count": 8, "id": "c2318ce6-60d2-495c-91cd-67ca53609cf8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "WARNING:tensorflow:From /tmp/ipykernel_44444/337460670.py:1: is_gpu_available (from tensorflow.python.framework.test_util) is deprecated and will be removed in a future version.\n", "Instructions for updating:\n", "Use `tf.config.list_physical_devices('GPU')` instead.\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "2024-01-04 16:22:36.097926: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA\n", "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n", "2024-01-04 16:22:36.142225: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n", "2024-01-04 16:22:36.232036: E tensorflow/stream_executor/cuda/cuda_driver.cc:328] failed call to cuInit: CUDA_ERROR_INVALID_DEVICE: invalid device ordinal\n", "2024-01-04 16:22:36.232061: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: zhaojh-yv621\n", "2024-01-04 16:22:36.232065: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: zhaojh-yv621\n", "2024-01-04 16:22:36.232185: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 520.61.5\n", "2024-01-04 16:22:36.232204: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 520.61.5\n", "2024-01-04 16:22:36.232207: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 520.61.5\n" ] }, { "data": { "text/plain": [ "False" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tf.test.is_gpu_available()" ] }, { "cell_type": "code", "execution_count": 9, "id": "1c85d462-f248-4ffb-908f-eb4b20eab179", "metadata": {}, "outputs": [], "source": [ "class TransformerBlock(layers.Layer):\n", " def __init__(self, embed_dim, num_heads, ff_dim, name, rate=0.1):\n", " super().__init__()\n", " self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim, name=name)\n", " self.ffn = keras.Sequential(\n", " [layers.Dense(ff_dim, activation=\"relu\"), layers.Dense(embed_dim),]\n", " )\n", " self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)\n", " self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)\n", " self.dropout1 = layers.Dropout(rate)\n", " self.dropout2 = layers.Dropout(rate)\n", "\n", " def call(self, inputs, training):\n", " attn_output = self.att(inputs, inputs)\n", " attn_output = self.dropout1(attn_output, training=training)\n", " out1 = self.layernorm1(inputs + attn_output)\n", " ffn_output = self.ffn(out1)\n", " ffn_output = self.dropout2(ffn_output, training=training)\n", " return self.layernorm2(out1 + ffn_output)" ] }, { "cell_type": "code", "execution_count": 10, "id": "790284a3-b9d3-4144-b481-38a7c3ecb4b9", "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras import Model" ] }, { "cell_type": "code", "execution_count": 11, "id": "cd9a1ca1-d0ca-4cb5-9ef5-fd5d63576cd2", "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras.initializers import Constant" ] }, { "cell_type": "code", "execution_count": 12, "id": "9bc02f29-0fb7-420d-99a8-435eadc06e29", "metadata": {}, "outputs": [], "source": [ "# Custom loss layer\n", "class CustomMultiLossLayer(layers.Layer):\n", " def __init__(self, nb_outputs=2, **kwargs):\n", " self.nb_outputs = nb_outputs\n", " self.is_placeholder = True\n", " super(CustomMultiLossLayer, self).__init__(**kwargs)\n", " \n", " def build(self, input_shape=None):\n", " # initialise log_vars\n", " self.log_vars = []\n", " for i in range(self.nb_outputs):\n", " self.log_vars += [self.add_weight(name='log_var' + str(i), shape=(1,),\n", " initializer=tf.initializers.he_normal(), trainable=True)]\n", " super(CustomMultiLossLayer, self).build(input_shape)\n", "\n", " def multi_loss(self, ys_true, ys_pred):\n", " assert len(ys_true) == self.nb_outputs and len(ys_pred) == self.nb_outputs\n", " loss = 0\n", " for y_true, y_pred, log_var in zip(ys_true, ys_pred, self.log_vars):\n", " mse = (y_true - y_pred) ** 2.\n", " pre = K.exp(-log_var[0])\n", " loss += tf.abs(tf.reduce_logsumexp(pre * mse + log_var[0], axis=-1))\n", " return K.mean(loss)\n", "\n", " def call(self, inputs):\n", " ys_true = inputs[:self.nb_outputs]\n", " ys_pred = inputs[self.nb_outputs:]\n", " loss = self.multi_loss(ys_true, ys_pred)\n", " self.add_loss(loss, inputs=inputs)\n", " # We won't actually use the output.\n", " return K.concatenate(inputs, -1)" ] }, { "cell_type": "code", "execution_count": 13, "id": "a190207e-5a59-4813-9660-758760cf1b73", "metadata": {}, "outputs": [], "source": [ "num_heads, ff_dim = 1, 12" ] }, { "cell_type": "code", "execution_count": 14, "id": "80f32155-e71f-4615-8d0c-01dfd04988fe", "metadata": {}, "outputs": [], "source": [ "def get_prediction_model():\n", " def build_output(out, out_name):\n", " self_block = TransformerBlock(64, num_heads, ff_dim, name=f'{out_name}_attn')\n", " out = self_block(out)\n", " out = layers.GlobalAveragePooling1D()(out)\n", " out = layers.Dropout(0.1)(out)\n", " out = layers.Dense(32, activation=\"relu\")(out)\n", " # out = layers.Dense(1, name=out_name, activation=\"sigmoid\")(out)\n", " return out\n", " inputs = layers.Input(shape=(1,len(feature_cols)), name='input')\n", " x = layers.Conv1D(filters=64, kernel_size=1, activation='relu')(inputs)\n", " # x = layers.Dropout(rate=0.1)(x)\n", " lstm_out = layers.Bidirectional(layers.LSTM(units=64, return_sequences=True))(x)\n", " lstm_out = layers.Dense(128, activation='relu')(lstm_out)\n", " transformer_block = TransformerBlock(128, num_heads, ff_dim, name='first_attn')\n", " out = transformer_block(lstm_out)\n", " out = layers.GlobalAveragePooling1D()(out)\n", " out = layers.Dropout(0.1)(out)\n", " out = layers.Dense(64, activation='relu')(out)\n", " out = K.expand_dims(out, axis=1)\n", "\n", " bet = build_output(out, 'bet')\n", " mesco = build_output(out, 'mesco')\n", " micro = build_output(out, 'micro')\n", " avg = build_output(out, 'avg')\n", "\n", " bet = layers.Dense(1, activation='sigmoid', name='bet')(bet)\n", " mesco = layers.Dense(1, activation='sigmoid', name='mesco')(mesco)\n", " micro = layers.Dense(1, activation='sigmoid', name='micro')(micro)\n", " avg = layers.Dense(1, activation='sigmoid', name='avg')(avg)\n", "\n", " model = Model(inputs=[inputs], outputs=[bet, mesco, micro, avg])\n", " return model\n" ] }, { "cell_type": "code", "execution_count": 15, "id": "264001b1-5e4a-4786-96fd-2b5c70ab3212", "metadata": {}, "outputs": [], "source": [ "def get_trainable_model(prediction_model):\n", " inputs = layers.Input(shape=(1,len(feature_cols)), name='input')\n", " bet, mesco, micro, avg = prediction_model(inputs)\n", " bet_real = layers.Input(shape=(1,), name='bet_real')\n", " mesco_real = layers.Input(shape=(1,), name='mesco_real')\n", " micro_real = layers.Input(shape=(1,), name='micro_real')\n", " avg_real = layers.Input(shape=(1,), name='avg_real')\n", " out = CustomMultiLossLayer(nb_outputs=4)([bet_real, mesco_real, micro_real, avg_real, bet, mesco, micro, avg])\n", " return Model([inputs, bet_real, mesco_real, micro_real, avg_real], out)" ] }, { "cell_type": "code", "execution_count": 16, "id": "1eebdab3-1f88-48a1-b5e0-bc8787528c1b", "metadata": {}, "outputs": [], "source": [ "maxs = train_data.max()\n", "mins = train_data.min()\n", "for col in train_data.columns:\n", " if maxs[col] - mins[col] == 0:\n", " continue\n", " train_data[col] = (train_data[col] - mins[col]) / (maxs[col] - mins[col])" ] }, { "cell_type": "code", "execution_count": 17, "id": "7f27bd56-4f6b-4242-9f79-c7d6b3ee2f13", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
热处理条件-热处理次数热处理条件-是否是中温停留第一次热处理-温度第一次热处理-升温速率第一次热处理-保留时间第二次热处理-温度第二次热处理-升温速率·第二次热处理-保留时间共碳化-是否是共碳化物质共碳化-共碳化物质/沥青...模板剂-种类_二氧化硅模板剂-种类_氢氧化镁模板剂-种类_氧化钙模板剂-种类_氧化锌模板剂-种类_氧化镁模板剂-种类_氯化钠模板剂-种类_氯化钾模板剂-种类_碱式碳酸镁模板剂-种类_碳酸钙模板剂-种类_纤维素
00.00.00.1666670.30.50.0000000.00.0000000.00.0...00.01.000.00.000.00.00.0
10.00.00.3333330.30.50.0000000.00.0000000.00.0...00.01.000.00.000.00.00.0
20.00.00.3333330.30.50.0000000.00.0000000.00.0...00.01.000.00.000.00.00.0
30.00.00.3333330.30.50.0000000.00.0000000.00.0...00.01.000.00.000.00.00.0
41.00.00.1666670.30.50.6666670.50.6666670.00.0...00.00.000.00.001.00.00.0
..................................................................
1440.00.00.3333330.30.00.0000000.00.0000000.00.0...00.00.000.00.000.00.00.0
1450.00.00.5000000.30.00.0000000.00.0000000.00.0...00.00.000.00.000.00.00.0
1460.00.00.6666670.30.00.0000000.00.0000000.00.0...00.00.000.00.000.00.00.0
1470.00.00.5000000.30.00.0000000.00.0000000.00.0...00.00.000.00.000.00.00.0
1480.00.00.5000000.30.00.0000000.00.0000000.00.0...00.00.000.00.000.00.00.0
\n", "

123 rows × 42 columns

\n", "
" ], "text/plain": [ " 热处理条件-热处理次数 热处理条件-是否是中温停留 第一次热处理-温度 第一次热处理-升温速率 第一次热处理-保留时间 \\\n", "0 0.0 0.0 0.166667 0.3 0.5 \n", "1 0.0 0.0 0.333333 0.3 0.5 \n", "2 0.0 0.0 0.333333 0.3 0.5 \n", "3 0.0 0.0 0.333333 0.3 0.5 \n", "4 1.0 0.0 0.166667 0.3 0.5 \n", ".. ... ... ... ... ... \n", "144 0.0 0.0 0.333333 0.3 0.0 \n", "145 0.0 0.0 0.500000 0.3 0.0 \n", "146 0.0 0.0 0.666667 0.3 0.0 \n", "147 0.0 0.0 0.500000 0.3 0.0 \n", "148 0.0 0.0 0.500000 0.3 0.0 \n", "\n", " 第二次热处理-温度 第二次热处理-升温速率· 第二次热处理-保留时间 共碳化-是否是共碳化物质 共碳化-共碳化物质/沥青 ... \\\n", "0 0.000000 0.0 0.000000 0.0 0.0 ... \n", "1 0.000000 0.0 0.000000 0.0 0.0 ... \n", "2 0.000000 0.0 0.000000 0.0 0.0 ... \n", "3 0.000000 0.0 0.000000 0.0 0.0 ... \n", "4 0.666667 0.5 0.666667 0.0 0.0 ... \n", ".. ... ... ... ... ... ... \n", "144 0.000000 0.0 0.000000 0.0 0.0 ... \n", "145 0.000000 0.0 0.000000 0.0 0.0 ... \n", "146 0.000000 0.0 0.000000 0.0 0.0 ... \n", "147 0.000000 0.0 0.000000 0.0 0.0 ... \n", "148 0.000000 0.0 0.000000 0.0 0.0 ... \n", "\n", " 模板剂-种类_二氧化硅 模板剂-种类_氢氧化镁 模板剂-种类_氧化钙 模板剂-种类_氧化锌 模板剂-种类_氧化镁 模板剂-种类_氯化钠 \\\n", "0 0 0.0 1.0 0 0.0 0.0 \n", "1 0 0.0 1.0 0 0.0 0.0 \n", "2 0 0.0 1.0 0 0.0 0.0 \n", "3 0 0.0 1.0 0 0.0 0.0 \n", "4 0 0.0 0.0 0 0.0 0.0 \n", ".. ... ... ... ... ... ... \n", "144 0 0.0 0.0 0 0.0 0.0 \n", "145 0 0.0 0.0 0 0.0 0.0 \n", "146 0 0.0 0.0 0 0.0 0.0 \n", "147 0 0.0 0.0 0 0.0 0.0 \n", "148 0 0.0 0.0 0 0.0 0.0 \n", "\n", " 模板剂-种类_氯化钾 模板剂-种类_碱式碳酸镁 模板剂-种类_碳酸钙 模板剂-种类_纤维素 \n", "0 0 0.0 0.0 0.0 \n", "1 0 0.0 0.0 0.0 \n", "2 0 0.0 0.0 0.0 \n", "3 0 0.0 0.0 0.0 \n", "4 0 1.0 0.0 0.0 \n", ".. ... ... ... ... \n", "144 0 0.0 0.0 0.0 \n", "145 0 0.0 0.0 0.0 \n", "146 0 0.0 0.0 0.0 \n", "147 0 0.0 0.0 0.0 \n", "148 0 0.0 0.0 0.0 \n", "\n", "[123 rows x 42 columns]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_data" ] }, { "cell_type": "code", "execution_count": 18, "id": "baf45a3d-dc01-44fc-9f0b-456964ac2cdb", "metadata": {}, "outputs": [], "source": [ "# feature_cols = [x for x in train_data.columns if x not in out_cols and '第二次' not in x]\n", "feature_cols = [x for x in train_data.columns if x not in out_cols]\n", "use_cols = feature_cols + out_cols" ] }, { "cell_type": "code", "execution_count": 19, "id": "f2d27538-d2bc-4202-b0cf-d3e0949b4686", "metadata": {}, "outputs": [], "source": [ "use_data = train_data.copy()\n", "for col in use_cols:\n", " use_data[col] = use_data[col].astype('float32')" ] }, { "cell_type": "code", "execution_count": 20, "id": "54c1df2c-c297-4b8d-be8a-3a99cff22545", "metadata": {}, "outputs": [], "source": [ "train, valid = train_test_split(use_data[use_cols], test_size=0.3, random_state=42, shuffle=True)\n", "valid, test = train_test_split(valid, test_size=0.3, random_state=42, shuffle=True)" ] }, { "cell_type": "code", "execution_count": 21, "id": "e7a914da-b9c2-40d9-96e0-459b0888adba", "metadata": {}, "outputs": [], "source": [ "prediction_model = get_prediction_model()\n", "trainable_model = get_trainable_model(prediction_model)" ] }, { "cell_type": "code", "execution_count": 22, "id": "4f832a1e-48e2-4467-b381-35b9d2f1271a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"model\"\n", "__________________________________________________________________________________________________\n", "Layer (type) Output Shape Param # Connected to \n", "==================================================================================================\n", "input (InputLayer) [(None, 1, 38)] 0 \n", "__________________________________________________________________________________________________\n", "conv1d (Conv1D) (None, 1, 64) 2496 input[0][0] \n", "__________________________________________________________________________________________________\n", "bidirectional (Bidirectional) (None, 1, 128) 66048 conv1d[0][0] \n", "__________________________________________________________________________________________________\n", "dense (Dense) (None, 1, 128) 16512 bidirectional[0][0] \n", "__________________________________________________________________________________________________\n", "transformer_block (TransformerB (None, 1, 128) 69772 dense[0][0] \n", "__________________________________________________________________________________________________\n", "global_average_pooling1d (Globa (None, 128) 0 transformer_block[0][0] \n", "__________________________________________________________________________________________________\n", "dropout_2 (Dropout) (None, 128) 0 global_average_pooling1d[0][0] \n", "__________________________________________________________________________________________________\n", "dense_3 (Dense) (None, 64) 8256 dropout_2[0][0] \n", "__________________________________________________________________________________________________\n", "tf.expand_dims (TFOpLambda) (None, 1, 64) 0 dense_3[0][0] \n", "__________________________________________________________________________________________________\n", "transformer_block_1 (Transforme (None, 1, 64) 18508 tf.expand_dims[0][0] \n", "__________________________________________________________________________________________________\n", "transformer_block_2 (Transforme (None, 1, 64) 18508 tf.expand_dims[0][0] \n", "__________________________________________________________________________________________________\n", "transformer_block_3 (Transforme (None, 1, 64) 18508 tf.expand_dims[0][0] \n", "__________________________________________________________________________________________________\n", "transformer_block_4 (Transforme (None, 1, 64) 18508 tf.expand_dims[0][0] \n", "__________________________________________________________________________________________________\n", "global_average_pooling1d_1 (Glo (None, 64) 0 transformer_block_1[0][0] \n", "__________________________________________________________________________________________________\n", "global_average_pooling1d_2 (Glo (None, 64) 0 transformer_block_2[0][0] \n", "__________________________________________________________________________________________________\n", "global_average_pooling1d_3 (Glo (None, 64) 0 transformer_block_3[0][0] \n", "__________________________________________________________________________________________________\n", "global_average_pooling1d_4 (Glo (None, 64) 0 transformer_block_4[0][0] \n", "__________________________________________________________________________________________________\n", "dropout_5 (Dropout) (None, 64) 0 global_average_pooling1d_1[0][0] \n", "__________________________________________________________________________________________________\n", "dropout_8 (Dropout) (None, 64) 0 global_average_pooling1d_2[0][0] \n", "__________________________________________________________________________________________________\n", "dropout_11 (Dropout) (None, 64) 0 global_average_pooling1d_3[0][0] \n", "__________________________________________________________________________________________________\n", "dropout_14 (Dropout) (None, 64) 0 global_average_pooling1d_4[0][0] \n", "__________________________________________________________________________________________________\n", "dense_6 (Dense) (None, 32) 2080 dropout_5[0][0] \n", "__________________________________________________________________________________________________\n", "dense_9 (Dense) (None, 32) 2080 dropout_8[0][0] \n", "__________________________________________________________________________________________________\n", "dense_12 (Dense) (None, 32) 2080 dropout_11[0][0] \n", "__________________________________________________________________________________________________\n", "dense_15 (Dense) (None, 32) 2080 dropout_14[0][0] \n", "__________________________________________________________________________________________________\n", "bet (Dense) (None, 1) 33 dense_6[0][0] \n", "__________________________________________________________________________________________________\n", "mesco (Dense) (None, 1) 33 dense_9[0][0] \n", "__________________________________________________________________________________________________\n", "micro (Dense) (None, 1) 33 dense_12[0][0] \n", "__________________________________________________________________________________________________\n", "avg (Dense) (None, 1) 33 dense_15[0][0] \n", "==================================================================================================\n", "Total params: 245,568\n", "Trainable params: 245,568\n", "Non-trainable params: 0\n", "__________________________________________________________________________________________________\n" ] } ], "source": [ "prediction_model.summary()" ] }, { "cell_type": "code", "execution_count": 23, "id": "9289f452-a5a4-40c4-b942-f6cb2e348548", "metadata": {}, "outputs": [], "source": [ "from tensorflow.keras import optimizers\n", "from tensorflow.python.keras.utils.vis_utils import plot_model" ] }, { "cell_type": "code", "execution_count": 24, "id": "2494ef5a-5b2b-4f11-b6cd-dc39503c9106", "metadata": {}, "outputs": [], "source": [ "X = np.expand_dims(train[feature_cols].values, axis=1)\n", "Y = [x for x in train[out_cols].values.T]\n", "Y_valid = [x for x in valid[out_cols].values.T]" ] }, { "cell_type": "code", "execution_count": 25, "id": "9a62dea1-4f05-411b-9756-a91623580581", "metadata": {}, "outputs": [], "source": [ "from keras.callbacks import ReduceLROnPlateau\n", "reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=10, mode='auto')" ] }, { "cell_type": "code", "execution_count": 39, "id": "cf869e4d-0fce-45a2-afff-46fd9b30fd1c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/40\n", "11/11 [==============================] - 6s 108ms/step - loss: 0.0316 - val_loss: 0.0835\n", "Epoch 2/40\n", "11/11 [==============================] - 0s 20ms/step - loss: 0.0281 - val_loss: 0.0958\n", "Epoch 3/40\n", "11/11 [==============================] - 0s 27ms/step - loss: 0.0278 - val_loss: 0.0891\n", "Epoch 4/40\n", "11/11 [==============================] - 0s 21ms/step - loss: 0.0233 - val_loss: 0.0912\n", "Epoch 5/40\n", "11/11 [==============================] - 0s 27ms/step - loss: 0.0215 - val_loss: 0.1023\n", "Epoch 6/40\n", "11/11 [==============================] - 0s 33ms/step - loss: 0.0348 - val_loss: 0.0864\n", "Epoch 7/40\n", "11/11 [==============================] - 0s 16ms/step - loss: 0.0207 - val_loss: 0.0823\n", "Epoch 8/40\n", "11/11 [==============================] - 0s 25ms/step - loss: 0.0222 - val_loss: 0.0883\n", "Epoch 9/40\n", "11/11 [==============================] - 0s 22ms/step - loss: 0.0258 - val_loss: 0.1029\n", "Epoch 10/40\n", "11/11 [==============================] - 0s 26ms/step - loss: 0.0288 - val_loss: 0.0857\n", "Epoch 11/40\n", "11/11 [==============================] - 0s 22ms/step - loss: 0.0249 - val_loss: 0.0880\n", "Epoch 12/40\n", "11/11 [==============================] - 0s 21ms/step - loss: 0.0219 - val_loss: 0.0882\n", "Epoch 13/40\n", "11/11 [==============================] - 0s 24ms/step - loss: 0.0191 - val_loss: 0.0873\n", "Epoch 14/40\n", "11/11 [==============================] - 0s 20ms/step - loss: 0.0187 - val_loss: 0.0929\n", "Epoch 15/40\n", "11/11 [==============================] - 0s 23ms/step - loss: 0.0183 - val_loss: 0.0988\n", "Epoch 16/40\n", "11/11 [==============================] - 0s 19ms/step - loss: 0.0189 - val_loss: 0.0905\n", "Epoch 17/40\n", "11/11 [==============================] - 0s 20ms/step - loss: 0.0209 - val_loss: 0.0823\n", "Epoch 18/40\n", "11/11 [==============================] - 0s 27ms/step - loss: 0.0185 - val_loss: 0.0834\n", "Epoch 19/40\n", "11/11 [==============================] - 0s 20ms/step - loss: 0.0177 - val_loss: 0.0916\n", "Epoch 20/40\n", "11/11 [==============================] - 0s 24ms/step - loss: 0.0163 - val_loss: 0.0919\n", "Epoch 21/40\n", "11/11 [==============================] - 0s 20ms/step - loss: 0.0141 - val_loss: 0.0898\n", "Epoch 22/40\n", "11/11 [==============================] - 0s 27ms/step - loss: 0.0144 - val_loss: 0.0923\n", "Epoch 23/40\n", "11/11 [==============================] - 0s 19ms/step - loss: 0.0138 - val_loss: 0.0906\n", "Epoch 24/40\n", "11/11 [==============================] - 0s 20ms/step - loss: 0.0140 - val_loss: 0.0897\n", "Epoch 25/40\n", "11/11 [==============================] - 0s 23ms/step - loss: 0.0126 - val_loss: 0.0892\n", "Epoch 26/40\n", "11/11 [==============================] - 0s 20ms/step - loss: 0.0129 - val_loss: 0.0918\n", "Epoch 27/40\n", "11/11 [==============================] - 0s 25ms/step - loss: 0.0123 - val_loss: 0.0935\n", "Epoch 28/40\n", "11/11 [==============================] - 0s 25ms/step - loss: 0.0131 - val_loss: 0.0933\n", "Epoch 29/40\n", "11/11 [==============================] - 0s 17ms/step - loss: 0.0125 - val_loss: 0.0933\n", "Epoch 30/40\n", "11/11 [==============================] - 0s 23ms/step - loss: 0.0119 - val_loss: 0.0932\n", "Epoch 31/40\n", "11/11 [==============================] - 0s 20ms/step - loss: 0.0129 - val_loss: 0.0936\n", "Epoch 32/40\n", "11/11 [==============================] - 0s 28ms/step - loss: 0.0114 - val_loss: 0.0933\n", "Epoch 33/40\n", "11/11 [==============================] - 0s 20ms/step - loss: 0.0122 - val_loss: 0.0932\n", "Epoch 34/40\n", "11/11 [==============================] - 0s 21ms/step - loss: 0.0114 - val_loss: 0.0936\n", "Epoch 35/40\n", "11/11 [==============================] - 0s 23ms/step - loss: 0.0119 - val_loss: 0.0938\n", "Epoch 36/40\n", "11/11 [==============================] - 0s 20ms/step - loss: 0.0118 - val_loss: 0.0937\n", "Epoch 37/40\n", "11/11 [==============================] - 0s 20ms/step - loss: 0.0127 - val_loss: 0.0937\n", "Epoch 38/40\n", "11/11 [==============================] - 0s 27ms/step - loss: 0.0123 - val_loss: 0.0937\n", "Epoch 39/40\n", "11/11 [==============================] - 0s 19ms/step - loss: 0.0124 - val_loss: 0.0937\n", "Epoch 40/40\n", "11/11 [==============================] - 0s 20ms/step - loss: 0.0129 - val_loss: 0.0937\n" ] } ], "source": [ "trainable_model.compile(optimizer='adam', loss=None)\n", "hist = trainable_model.fit([X, Y[0], Y[1], Y[2], Y[3]], epochs=40, batch_size=8, verbose=1, \n", " validation_data=[np.expand_dims(valid[feature_cols].values, axis=1), Y_valid[0], Y_valid[1], Y_valid[2], Y_valid[3]],\n", " callbacks=[reduce_lr]\n", " )" ] }, { "cell_type": "code", "execution_count": 40, "id": "67bfbe88-5f2c-4659-b2dc-eb9f1b824d04", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[array([[0.8401114 ],\n", " [0.4296295 ],\n", " [0.34763122],\n", " [0.33006623],\n", " [0.74300694],\n", " [0.48508543],\n", " [0.48184243],\n", " [0.7309267 ],\n", " [0.5264127 ],\n", " [0.7570494 ],\n", " [0.29492375],\n", " [0.34379733]], dtype=float32),\n", " array([[0.9495956 ],\n", " [0.19964108],\n", " [0.25691378],\n", " [0.15781167],\n", " [0.39773428],\n", " [0.257546 ],\n", " [0.2265681 ],\n", " [0.39088207],\n", " [0.30309337],\n", " [0.4006669 ],\n", " [0.16448957],\n", " [0.20928389]], dtype=float32),\n", " array([[0.93163174],\n", " [0.45915267],\n", " [0.24377662],\n", " [0.32275468],\n", " [0.84771645],\n", " [0.51101613],\n", " [0.52240014],\n", " [0.77952445],\n", " [0.6746559 ],\n", " [0.6747417 ],\n", " [0.3022651 ],\n", " [0.3458013 ]], dtype=float32),\n", " array([[0.4518058 ],\n", " [0.06488091],\n", " [0.2511762 ],\n", " [0.0624491 ],\n", " [0.09656441],\n", " [0.07555431],\n", " [0.06494072],\n", " [0.09723139],\n", " [0.10824579],\n", " [0.09783638],\n", " [0.07164052],\n", " [0.15804273]], dtype=float32)]" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rst = prediction_model.predict(np.expand_dims(test[feature_cols], axis=1))\n", "rst" ] }, { "cell_type": "code", "execution_count": 41, "id": "7de501e9-05a2-424c-a5f4-85d43ad37592", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0.998927703775019, 0.9994643982390371, 0.9991108696677027, 0.9996066810061789]" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "[np.exp(K.get_value(log_var[0]))**0.5 for log_var in trainable_model.layers[-1].log_vars]" ] }, { "cell_type": "code", "execution_count": 42, "id": "b0d5d8ad-aadd-4218-b5b7-9691a2d3eeef", "metadata": {}, "outputs": [], "source": [ "pred_rst = pd.DataFrame.from_records(np.squeeze(np.asarray(rst), axis=2).T, columns=out_cols)" ] }, { "cell_type": "code", "execution_count": 43, "id": "0a2bcb45-da86-471b-a61d-314e29430d6a", "metadata": {}, "outputs": [], "source": [ "real_rst = test[out_cols].copy()" ] }, { "cell_type": "code", "execution_count": 44, "id": "e124f7c0-fdd5-43b9-b649-ff7d9dd59641", "metadata": {}, "outputs": [], "source": [ "for col in out_cols:\n", " pred_rst[col] = pred_rst[col] * (maxs[col] - mins[col]) + mins[col]\n", " real_rst[col] = real_rst[col] * (maxs[col] - mins[col]) + mins[col]" ] }, { "cell_type": "code", "execution_count": 45, "id": "5c69d03b-34fd-4dbf-aec6-c15093bb22ab", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['碳材料结构特征-比表面积', '碳材料结构特征-总孔体积', '碳材料结构特征-微孔体积', '碳材料结构特征-平均孔径'], dtype='object')" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "real_rst.columns" ] }, { "cell_type": "code", "execution_count": 46, "id": "21739f82-d82a-4bde-8537-9504b68a96d5", "metadata": {}, "outputs": [], "source": [ "y_pred_pm25 = pred_rst['碳材料结构特征-比表面积'].values.reshape(-1,)\n", "y_pred_pm10 = pred_rst['碳材料结构特征-总孔体积'].values.reshape(-1,)\n", "y_pred_so2 = pred_rst['碳材料结构特征-微孔体积'].values.reshape(-1,)\n", "y_pred_no2 = pred_rst['碳材料结构特征-平均孔径'].values.reshape(-1,)\n", "y_true_pm25 = real_rst['碳材料结构特征-比表面积'].values.reshape(-1,)\n", "y_true_pm10 = real_rst['碳材料结构特征-总孔体积'].values.reshape(-1,)\n", "y_true_so2 = real_rst['碳材料结构特征-微孔体积'].values.reshape(-1,)\n", "y_true_no2 = real_rst['碳材料结构特征-平均孔径'].values.reshape(-1,)" ] }, { "cell_type": "code", "execution_count": 47, "id": "26ea6cfa-efad-443c-9dd9-844f8be42b91", "metadata": {}, "outputs": [], "source": [ "from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error" ] }, { "cell_type": "code", "execution_count": 48, "id": "28072e7c-c9d5-4ff6-940d-e94ae879afc9", "metadata": {}, "outputs": [], "source": [ "def print_eva(y_true, y_pred, tp):\n", " MSE = mean_squared_error(y_true, y_pred)\n", " RMSE = np.sqrt(MSE)\n", " MAE = mean_absolute_error(y_true, y_pred)\n", " MAPE = mean_absolute_percentage_error(y_true, y_pred)\n", " R_2 = r2_score(y_true, y_pred)\n", " print(f\"COL: {tp}, MSE: {format(MSE, '.2E')}\", end=',')\n", " print(f'RMSE: {round(RMSE, 4)}', end=',')\n", " print(f'MAPE: {round(MAPE, 4) * 100} %', end=',')\n", " print(f'MAE: {round(MAE, 4)}', end=',')\n", " print(f'R_2: {round(R_2, 4)}')\n", " return [MSE, RMSE, MAE, MAPE, R_2]" ] }, { "cell_type": "code", "execution_count": 49, "id": "4ec4caa9-7c46-4fc8-a94b-cb659e924304", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "COL: 比表面积, MSE: 2.36E+05,RMSE: 485.5891,MAPE: 25.86 %,MAE: 340.8309,R_2: -0.1091\n", "COL: 总孔体积, MSE: 5.15E-02,RMSE: 0.2268,MAPE: 23.810000000000002 %,MAE: 0.1519,R_2: 0.7657\n", "COL: 微孔体积, MSE: 4.53E-02,RMSE: 0.2128,MAPE: 34.75 %,MAE: 0.1536,R_2: -0.0412\n", "COL: 平均孔径, MSE: 4.63E-01,RMSE: 0.6802,MAPE: 15.620000000000001 %,MAE: 0.415,R_2: 0.5929\n" ] } ], "source": [ "pm25_eva = print_eva(y_true_pm25, y_pred_pm25, tp='比表面积')\n", "pm10_eva = print_eva(y_true_pm10, y_pred_pm10, tp='总孔体积')\n", "so2_eva = print_eva(y_true_so2, y_pred_so2, tp='微孔体积')\n", "nox_eva = print_eva(y_true_no2, y_pred_no2, tp='平均孔径')" ] }, { "cell_type": "code", "execution_count": null, "id": "ac4a4339-ec7d-4266-8197-5276c2395288", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "f15cbb91-1ce7-4fb0-979a-a4bdc452a1ec", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.18" } }, "nbformat": 4, "nbformat_minor": 5 }