coal_materials/multi-task-NN-0123.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "6b84fefd-5936-4da4-ab6b-5b944329ad1d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "os.environ['CUDA_DEVICE_ORDER'] = 'PCB_BUS_ID'\n",
    "os.environ['CUDA_VISIBLE_DEVICES'] = '0, 1'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "9cf130e3-62ef-46e0-bbdc-b13d9d29318d",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.model_selection import train_test_split\n",
    "import matplotlib.pyplot as plt\n",
    "#新增加的两行\n",
    "from pylab import mpl\n",
    "# 设置显示中文字体\n",
    "mpl.rcParams[\"font.sans-serif\"] = [\"SimHei\"]\n",
    "\n",
    "mpl.rcParams[\"axes.unicode_minus\"] = False"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "752381a5-0aeb-4c54-bc48-f9c3f8fc5d17",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.read_csv('./data/20240102/train_data.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "04b177a7-2f02-4e23-8ea9-29f34cf3eafc",
   "metadata": {},
   "outputs": [],
   "source": [
    "out_cols = [x for x in data.columns if '碳材料' in x]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "31169fbf-d78e-42f7-87f3-71ba3dd0979d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['碳材料结构特征-比表面积', '碳材料结构特征-总孔体积', '碳材料结构特征-微孔体积', '碳材料结构特征-平均孔径']"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "out_cols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "a40bee0f-011a-4edb-80f8-4e2f40e755fd",
   "metadata": {},
   "outputs": [],
   "source": [
    "train_data = data.dropna(subset=out_cols).fillna(0)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "535d37b6-b9de-4025-ac8f-62f5bdbe2451",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-01-04 16:22:35.199530: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0\n"
     ]
    }
   ],
   "source": [
    "import tensorflow as tf\n",
    "from tensorflow import keras\n",
    "from tensorflow.keras import layers\n",
    "import tensorflow.keras.backend as K"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "c2318ce6-60d2-495c-91cd-67ca53609cf8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "WARNING:tensorflow:From /tmp/ipykernel_44444/337460670.py:1: is_gpu_available (from tensorflow.python.framework.test_util) is deprecated and will be removed in a future version.\n",
      "Instructions for updating:\n",
      "Use `tf.config.list_physical_devices('GPU')` instead.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "2024-01-04 16:22:36.097926: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA\n",
      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
      "2024-01-04 16:22:36.142225: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1\n",
      "2024-01-04 16:22:36.232036: E tensorflow/stream_executor/cuda/cuda_driver.cc:328] failed call to cuInit: CUDA_ERROR_INVALID_DEVICE: invalid device ordinal\n",
      "2024-01-04 16:22:36.232061: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: zhaojh-yv621\n",
      "2024-01-04 16:22:36.232065: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: zhaojh-yv621\n",
      "2024-01-04 16:22:36.232185: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 520.61.5\n",
      "2024-01-04 16:22:36.232204: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 520.61.5\n",
      "2024-01-04 16:22:36.232207: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 520.61.5\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "False"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tf.test.is_gpu_available()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "1c85d462-f248-4ffb-908f-eb4b20eab179",
   "metadata": {},
   "outputs": [],
   "source": [
    "class TransformerBlock(layers.Layer):\n",
    "    def __init__(self, embed_dim, num_heads, ff_dim, name, rate=0.1):\n",
    "        super().__init__()\n",
    "        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim, name=name)\n",
    "        self.ffn = keras.Sequential(\n",
    "            [layers.Dense(ff_dim, activation=\"relu\"), layers.Dense(embed_dim),]\n",
    "        )\n",
    "        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)\n",
    "        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)\n",
    "        self.dropout1 = layers.Dropout(rate)\n",
    "        self.dropout2 = layers.Dropout(rate)\n",
    "\n",
    "    def call(self, inputs, training):\n",
    "        attn_output = self.att(inputs, inputs)\n",
    "        attn_output = self.dropout1(attn_output, training=training)\n",
    "        out1 = self.layernorm1(inputs + attn_output)\n",
    "        ffn_output = self.ffn(out1)\n",
    "        ffn_output = self.dropout2(ffn_output, training=training)\n",
    "        return self.layernorm2(out1 + ffn_output)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "790284a3-b9d3-4144-b481-38a7c3ecb4b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "from tensorflow.keras import Model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "cd9a1ca1-d0ca-4cb5-9ef5-fd5d63576cd2",
   "metadata": {},
   "outputs": [],
   "source": [
    "from tensorflow.keras.initializers import Constant"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "9bc02f29-0fb7-420d-99a8-435eadc06e29",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Custom loss layer\n",
    "class CustomMultiLossLayer(layers.Layer):\n",
    "    def __init__(self, nb_outputs=2, **kwargs):\n",
    "        self.nb_outputs = nb_outputs\n",
    "        self.is_placeholder = True\n",
    "        super(CustomMultiLossLayer, self).__init__(**kwargs)\n",
    "        \n",
    "    def build(self, input_shape=None):\n",
    "        # initialise log_vars\n",
    "        self.log_vars = []\n",
    "        for i in range(self.nb_outputs):\n",
    "            self.log_vars += [self.add_weight(name='log_var' + str(i), shape=(1,),\n",
    "                                              initializer=tf.initializers.he_normal(), trainable=True)]\n",
    "        super(CustomMultiLossLayer, self).build(input_shape)\n",
    "\n",
    "    def multi_loss(self, ys_true, ys_pred):\n",
    "        assert len(ys_true) == self.nb_outputs and len(ys_pred) == self.nb_outputs\n",
    "        loss = 0\n",
    "        for y_true, y_pred, log_var in zip(ys_true, ys_pred, self.log_vars):\n",
    "            mse = (y_true - y_pred) ** 2.\n",
    "            pre = K.exp(-log_var[0])\n",
    "            loss += tf.abs(tf.reduce_logsumexp(pre * mse + log_var[0], axis=-1))\n",
    "        return K.mean(loss)\n",
    "\n",
    "    def call(self, inputs):\n",
    "        ys_true = inputs[:self.nb_outputs]\n",
    "        ys_pred = inputs[self.nb_outputs:]\n",
    "        loss = self.multi_loss(ys_true, ys_pred)\n",
    "        self.add_loss(loss, inputs=inputs)\n",
    "        # We won't actually use the output.\n",
    "        return K.concatenate(inputs, -1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "a190207e-5a59-4813-9660-758760cf1b73",
   "metadata": {},
   "outputs": [],
   "source": [
    "num_heads, ff_dim = 1, 12"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "80f32155-e71f-4615-8d0c-01dfd04988fe",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_prediction_model():\n",
    "    def build_output(out, out_name):\n",
    "        self_block = TransformerBlock(64, num_heads, ff_dim, name=f'{out_name}_attn')\n",
    "        out = self_block(out)\n",
    "        out = layers.GlobalAveragePooling1D()(out)\n",
    "        out = layers.Dropout(0.1)(out)\n",
    "        out = layers.Dense(32, activation=\"relu\")(out)\n",
    "        # out = layers.Dense(1, name=out_name, activation=\"sigmoid\")(out)\n",
    "        return out\n",
    "    inputs = layers.Input(shape=(1,len(feature_cols)), name='input')\n",
    "    x = layers.Conv1D(filters=64, kernel_size=1, activation='relu')(inputs)\n",
    "    # x = layers.Dropout(rate=0.1)(x)\n",
    "    lstm_out = layers.Bidirectional(layers.LSTM(units=64, return_sequences=True))(x)\n",
    "    lstm_out = layers.Dense(128, activation='relu')(lstm_out)\n",
    "    transformer_block = TransformerBlock(128, num_heads, ff_dim, name='first_attn')\n",
    "    out = transformer_block(lstm_out)\n",
    "    out = layers.GlobalAveragePooling1D()(out)\n",
    "    out = layers.Dropout(0.1)(out)\n",
    "    out = layers.Dense(64, activation='relu')(out)\n",
    "    out = K.expand_dims(out, axis=1)\n",
    "\n",
    "    bet = build_output(out, 'bet')\n",
    "    mesco = build_output(out, 'mesco')\n",
    "    micro = build_output(out, 'micro')\n",
    "    avg = build_output(out, 'avg')\n",
    "\n",
    "    bet = layers.Dense(1, activation='sigmoid', name='bet')(bet)\n",
    "    mesco = layers.Dense(1, activation='sigmoid', name='mesco')(mesco)\n",
    "    micro = layers.Dense(1, activation='sigmoid', name='micro')(micro)\n",
    "    avg = layers.Dense(1, activation='sigmoid', name='avg')(avg)\n",
    "\n",
    "    model = Model(inputs=[inputs], outputs=[bet, mesco, micro, avg])\n",
    "    return model\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "264001b1-5e4a-4786-96fd-2b5c70ab3212",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_trainable_model(prediction_model):\n",
    "    inputs = layers.Input(shape=(1,len(feature_cols)), name='input')\n",
    "    bet, mesco, micro, avg = prediction_model(inputs)\n",
    "    bet_real = layers.Input(shape=(1,), name='bet_real')\n",
    "    mesco_real = layers.Input(shape=(1,), name='mesco_real')\n",
    "    micro_real = layers.Input(shape=(1,), name='micro_real')\n",
    "    avg_real = layers.Input(shape=(1,), name='avg_real')\n",
    "    out = CustomMultiLossLayer(nb_outputs=4)([bet_real, mesco_real, micro_real, avg_real, bet, mesco, micro, avg])\n",
    "    return Model([inputs, bet_real, mesco_real, micro_real, avg_real], out)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "1eebdab3-1f88-48a1-b5e0-bc8787528c1b",
   "metadata": {},
   "outputs": [],
   "source": [
    "maxs = train_data.max()\n",
    "mins = train_data.min()\n",
    "for col in train_data.columns:\n",
    "    if maxs[col] - mins[col] == 0:\n",
    "        continue\n",
    "    train_data[col] = (train_data[col] - mins[col]) / (maxs[col] - mins[col])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "7f27bd56-4f6b-4242-9f79-c7d6b3ee2f13",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>热处理条件-热处理次数</th>\n",
       "      <th>热处理条件-是否是中温停留</th>\n",
       "      <th>第一次热处理-温度</th>\n",
       "      <th>第一次热处理-升温速率</th>\n",
       "      <th>第一次热处理-保留时间</th>\n",
       "      <th>第二次热处理-温度</th>\n",
       "      <th>第二次热处理-升温速率·</th>\n",
       "      <th>第二次热处理-保留时间</th>\n",
       "      <th>共碳化-是否是共碳化物质</th>\n",
       "      <th>共碳化-共碳化物质/沥青</th>\n",
       "      <th>...</th>\n",
       "      <th>模板剂-种类_二氧化硅</th>\n",
       "      <th>模板剂-种类_氢氧化镁</th>\n",
       "      <th>模板剂-种类_氧化钙</th>\n",
       "      <th>模板剂-种类_氧化锌</th>\n",
       "      <th>模板剂-种类_氧化镁</th>\n",
       "      <th>模板剂-种类_氯化钠</th>\n",
       "      <th>模板剂-种类_氯化钾</th>\n",
       "      <th>模板剂-种类_碱式碳酸镁</th>\n",
       "      <th>模板剂-种类_碳酸钙</th>\n",
       "      <th>模板剂-种类_纤维素</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.166667</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.166667</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>144</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>145</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>146</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.666667</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>147</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>148</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.500000</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>123 rows × 42 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     热处理条件-热处理次数  热处理条件-是否是中温停留  第一次热处理-温度  第一次热处理-升温速率  第一次热处理-保留时间  \\\n",
       "0            0.0            0.0   0.166667          0.3          0.5   \n",
       "1            0.0            0.0   0.333333          0.3          0.5   \n",
       "2            0.0            0.0   0.333333          0.3          0.5   \n",
       "3            0.0            0.0   0.333333          0.3          0.5   \n",
       "4            1.0            0.0   0.166667          0.3          0.5   \n",
       "..           ...            ...        ...          ...          ...   \n",
       "144          0.0            0.0   0.333333          0.3          0.0   \n",
       "145          0.0            0.0   0.500000          0.3          0.0   \n",
       "146          0.0            0.0   0.666667          0.3          0.0   \n",
       "147          0.0            0.0   0.500000          0.3          0.0   \n",
       "148          0.0            0.0   0.500000          0.3          0.0   \n",
       "\n",
       "     第二次热处理-温度  第二次热处理-升温速率·  第二次热处理-保留时间  共碳化-是否是共碳化物质  共碳化-共碳化物质/沥青  ...  \\\n",
       "0     0.000000           0.0     0.000000           0.0           0.0  ...   \n",
       "1     0.000000           0.0     0.000000           0.0           0.0  ...   \n",
       "2     0.000000           0.0     0.000000           0.0           0.0  ...   \n",
       "3     0.000000           0.0     0.000000           0.0           0.0  ...   \n",
       "4     0.666667           0.5     0.666667           0.0           0.0  ...   \n",
       "..         ...           ...          ...           ...           ...  ...   \n",
       "144   0.000000           0.0     0.000000           0.0           0.0  ...   \n",
       "145   0.000000           0.0     0.000000           0.0           0.0  ...   \n",
       "146   0.000000           0.0     0.000000           0.0           0.0  ...   \n",
       "147   0.000000           0.0     0.000000           0.0           0.0  ...   \n",
       "148   0.000000           0.0     0.000000           0.0           0.0  ...   \n",
       "\n",
       "     模板剂-种类_二氧化硅  模板剂-种类_氢氧化镁  模板剂-种类_氧化钙  模板剂-种类_氧化锌  模板剂-种类_氧化镁  模板剂-种类_氯化钠  \\\n",
       "0              0          0.0         1.0           0         0.0         0.0   \n",
       "1              0          0.0         1.0           0         0.0         0.0   \n",
       "2              0          0.0         1.0           0         0.0         0.0   \n",
       "3              0          0.0         1.0           0         0.0         0.0   \n",
       "4              0          0.0         0.0           0         0.0         0.0   \n",
       "..           ...          ...         ...         ...         ...         ...   \n",
       "144            0          0.0         0.0           0         0.0         0.0   \n",
       "145            0          0.0         0.0           0         0.0         0.0   \n",
       "146            0          0.0         0.0           0         0.0         0.0   \n",
       "147            0          0.0         0.0           0         0.0         0.0   \n",
       "148            0          0.0         0.0           0         0.0         0.0   \n",
       "\n",
       "     模板剂-种类_氯化钾  模板剂-种类_碱式碳酸镁  模板剂-种类_碳酸钙  模板剂-种类_纤维素  \n",
       "0             0           0.0         0.0         0.0  \n",
       "1             0           0.0         0.0         0.0  \n",
       "2             0           0.0         0.0         0.0  \n",
       "3             0           0.0         0.0         0.0  \n",
       "4             0           1.0         0.0         0.0  \n",
       "..          ...           ...         ...         ...  \n",
       "144           0           0.0         0.0         0.0  \n",
       "145           0           0.0         0.0         0.0  \n",
       "146           0           0.0         0.0         0.0  \n",
       "147           0           0.0         0.0         0.0  \n",
       "148           0           0.0         0.0         0.0  \n",
       "\n",
       "[123 rows x 42 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "baf45a3d-dc01-44fc-9f0b-456964ac2cdb",
   "metadata": {},
   "outputs": [],
   "source": [
    "# feature_cols = [x for x in train_data.columns if x not in out_cols and '第二次' not in x]\n",
    "feature_cols = [x for x in train_data.columns if x not in out_cols]\n",
    "use_cols = feature_cols + out_cols"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "f2d27538-d2bc-4202-b0cf-d3e0949b4686",
   "metadata": {},
   "outputs": [],
   "source": [
    "use_data = train_data.copy()\n",
    "for col in use_cols:\n",
    "    use_data[col] = use_data[col].astype('float32')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "54c1df2c-c297-4b8d-be8a-3a99cff22545",
   "metadata": {},
   "outputs": [],
   "source": [
    "train, valid = train_test_split(use_data[use_cols], test_size=0.3, random_state=42, shuffle=True)\n",
    "valid, test = train_test_split(valid, test_size=0.3, random_state=42, shuffle=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "e7a914da-b9c2-40d9-96e0-459b0888adba",
   "metadata": {},
   "outputs": [],
   "source": [
    "prediction_model = get_prediction_model()\n",
    "trainable_model = get_trainable_model(prediction_model)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "4f832a1e-48e2-4467-b381-35b9d2f1271a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Model: \"model\"\n",
      "__________________________________________________________________________________________________\n",
      "Layer (type)                    Output Shape         Param #     Connected to                     \n",
      "==================================================================================================\n",
      "input (InputLayer)              [(None, 1, 38)]      0                                            \n",
      "__________________________________________________________________________________________________\n",
      "conv1d (Conv1D)                 (None, 1, 64)        2496        input[0][0]                      \n",
      "__________________________________________________________________________________________________\n",
      "bidirectional (Bidirectional)   (None, 1, 128)       66048       conv1d[0][0]                     \n",
      "__________________________________________________________________________________________________\n",
      "dense (Dense)                   (None, 1, 128)       16512       bidirectional[0][0]              \n",
      "__________________________________________________________________________________________________\n",
      "transformer_block (TransformerB (None, 1, 128)       69772       dense[0][0]                      \n",
      "__________________________________________________________________________________________________\n",
      "global_average_pooling1d (Globa (None, 128)          0           transformer_block[0][0]          \n",
      "__________________________________________________________________________________________________\n",
      "dropout_2 (Dropout)             (None, 128)          0           global_average_pooling1d[0][0]   \n",
      "__________________________________________________________________________________________________\n",
      "dense_3 (Dense)                 (None, 64)           8256        dropout_2[0][0]                  \n",
      "__________________________________________________________________________________________________\n",
      "tf.expand_dims (TFOpLambda)     (None, 1, 64)        0           dense_3[0][0]                    \n",
      "__________________________________________________________________________________________________\n",
      "transformer_block_1 (Transforme (None, 1, 64)        18508       tf.expand_dims[0][0]             \n",
      "__________________________________________________________________________________________________\n",
      "transformer_block_2 (Transforme (None, 1, 64)        18508       tf.expand_dims[0][0]             \n",
      "__________________________________________________________________________________________________\n",
      "transformer_block_3 (Transforme (None, 1, 64)        18508       tf.expand_dims[0][0]             \n",
      "__________________________________________________________________________________________________\n",
      "transformer_block_4 (Transforme (None, 1, 64)        18508       tf.expand_dims[0][0]             \n",
      "__________________________________________________________________________________________________\n",
      "global_average_pooling1d_1 (Glo (None, 64)           0           transformer_block_1[0][0]        \n",
      "__________________________________________________________________________________________________\n",
      "global_average_pooling1d_2 (Glo (None, 64)           0           transformer_block_2[0][0]        \n",
      "__________________________________________________________________________________________________\n",
      "global_average_pooling1d_3 (Glo (None, 64)           0           transformer_block_3[0][0]        \n",
      "__________________________________________________________________________________________________\n",
      "global_average_pooling1d_4 (Glo (None, 64)           0           transformer_block_4[0][0]        \n",
      "__________________________________________________________________________________________________\n",
      "dropout_5 (Dropout)             (None, 64)           0           global_average_pooling1d_1[0][0] \n",
      "__________________________________________________________________________________________________\n",
      "dropout_8 (Dropout)             (None, 64)           0           global_average_pooling1d_2[0][0] \n",
      "__________________________________________________________________________________________________\n",
      "dropout_11 (Dropout)            (None, 64)           0           global_average_pooling1d_3[0][0] \n",
      "__________________________________________________________________________________________________\n",
      "dropout_14 (Dropout)            (None, 64)           0           global_average_pooling1d_4[0][0] \n",
      "__________________________________________________________________________________________________\n",
      "dense_6 (Dense)                 (None, 32)           2080        dropout_5[0][0]                  \n",
      "__________________________________________________________________________________________________\n",
      "dense_9 (Dense)                 (None, 32)           2080        dropout_8[0][0]                  \n",
      "__________________________________________________________________________________________________\n",
      "dense_12 (Dense)                (None, 32)           2080        dropout_11[0][0]                 \n",
      "__________________________________________________________________________________________________\n",
      "dense_15 (Dense)                (None, 32)           2080        dropout_14[0][0]                 \n",
      "__________________________________________________________________________________________________\n",
      "bet (Dense)                     (None, 1)            33          dense_6[0][0]                    \n",
      "__________________________________________________________________________________________________\n",
      "mesco (Dense)                   (None, 1)            33          dense_9[0][0]                    \n",
      "__________________________________________________________________________________________________\n",
      "micro (Dense)                   (None, 1)            33          dense_12[0][0]                   \n",
      "__________________________________________________________________________________________________\n",
      "avg (Dense)                     (None, 1)            33          dense_15[0][0]                   \n",
      "==================================================================================================\n",
      "Total params: 245,568\n",
      "Trainable params: 245,568\n",
      "Non-trainable params: 0\n",
      "__________________________________________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "prediction_model.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "9289f452-a5a4-40c4-b942-f6cb2e348548",
   "metadata": {},
   "outputs": [],
   "source": [
    "from tensorflow.keras import optimizers\n",
    "from tensorflow.python.keras.utils.vis_utils import plot_model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "2494ef5a-5b2b-4f11-b6cd-dc39503c9106",
   "metadata": {},
   "outputs": [],
   "source": [
    "X = np.expand_dims(train[feature_cols].values, axis=1)\n",
    "Y = [x for x in train[out_cols].values.T]\n",
    "Y_valid = [x for x in valid[out_cols].values.T]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "9a62dea1-4f05-411b-9756-a91623580581",
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.callbacks import ReduceLROnPlateau\n",
    "reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=10, mode='auto')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "cf869e4d-0fce-45a2-afff-46fd9b30fd1c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/40\n",
      "11/11 [==============================] - 6s 108ms/step - loss: 0.0316 - val_loss: 0.0835\n",
      "Epoch 2/40\n",
      "11/11 [==============================] - 0s 20ms/step - loss: 0.0281 - val_loss: 0.0958\n",
      "Epoch 3/40\n",
      "11/11 [==============================] - 0s 27ms/step - loss: 0.0278 - val_loss: 0.0891\n",
      "Epoch 4/40\n",
      "11/11 [==============================] - 0s 21ms/step - loss: 0.0233 - val_loss: 0.0912\n",
      "Epoch 5/40\n",
      "11/11 [==============================] - 0s 27ms/step - loss: 0.0215 - val_loss: 0.1023\n",
      "Epoch 6/40\n",
      "11/11 [==============================] - 0s 33ms/step - loss: 0.0348 - val_loss: 0.0864\n",
      "Epoch 7/40\n",
      "11/11 [==============================] - 0s 16ms/step - loss: 0.0207 - val_loss: 0.0823\n",
      "Epoch 8/40\n",
      "11/11 [==============================] - 0s 25ms/step - loss: 0.0222 - val_loss: 0.0883\n",
      "Epoch 9/40\n",
      "11/11 [==============================] - 0s 22ms/step - loss: 0.0258 - val_loss: 0.1029\n",
      "Epoch 10/40\n",
      "11/11 [==============================] - 0s 26ms/step - loss: 0.0288 - val_loss: 0.0857\n",
      "Epoch 11/40\n",
      "11/11 [==============================] - 0s 22ms/step - loss: 0.0249 - val_loss: 0.0880\n",
      "Epoch 12/40\n",
      "11/11 [==============================] - 0s 21ms/step - loss: 0.0219 - val_loss: 0.0882\n",
      "Epoch 13/40\n",
      "11/11 [==============================] - 0s 24ms/step - loss: 0.0191 - val_loss: 0.0873\n",
      "Epoch 14/40\n",
      "11/11 [==============================] - 0s 20ms/step - loss: 0.0187 - val_loss: 0.0929\n",
      "Epoch 15/40\n",
      "11/11 [==============================] - 0s 23ms/step - loss: 0.0183 - val_loss: 0.0988\n",
      "Epoch 16/40\n",
      "11/11 [==============================] - 0s 19ms/step - loss: 0.0189 - val_loss: 0.0905\n",
      "Epoch 17/40\n",
      "11/11 [==============================] - 0s 20ms/step - loss: 0.0209 - val_loss: 0.0823\n",
      "Epoch 18/40\n",
      "11/11 [==============================] - 0s 27ms/step - loss: 0.0185 - val_loss: 0.0834\n",
      "Epoch 19/40\n",
      "11/11 [==============================] - 0s 20ms/step - loss: 0.0177 - val_loss: 0.0916\n",
      "Epoch 20/40\n",
      "11/11 [==============================] - 0s 24ms/step - loss: 0.0163 - val_loss: 0.0919\n",
      "Epoch 21/40\n",
      "11/11 [==============================] - 0s 20ms/step - loss: 0.0141 - val_loss: 0.0898\n",
      "Epoch 22/40\n",
      "11/11 [==============================] - 0s 27ms/step - loss: 0.0144 - val_loss: 0.0923\n",
      "Epoch 23/40\n",
      "11/11 [==============================] - 0s 19ms/step - loss: 0.0138 - val_loss: 0.0906\n",
      "Epoch 24/40\n",
      "11/11 [==============================] - 0s 20ms/step - loss: 0.0140 - val_loss: 0.0897\n",
      "Epoch 25/40\n",
      "11/11 [==============================] - 0s 23ms/step - loss: 0.0126 - val_loss: 0.0892\n",
      "Epoch 26/40\n",
      "11/11 [==============================] - 0s 20ms/step - loss: 0.0129 - val_loss: 0.0918\n",
      "Epoch 27/40\n",
      "11/11 [==============================] - 0s 25ms/step - loss: 0.0123 - val_loss: 0.0935\n",
      "Epoch 28/40\n",
      "11/11 [==============================] - 0s 25ms/step - loss: 0.0131 - val_loss: 0.0933\n",
      "Epoch 29/40\n",
      "11/11 [==============================] - 0s 17ms/step - loss: 0.0125 - val_loss: 0.0933\n",
      "Epoch 30/40\n",
      "11/11 [==============================] - 0s 23ms/step - loss: 0.0119 - val_loss: 0.0932\n",
      "Epoch 31/40\n",
      "11/11 [==============================] - 0s 20ms/step - loss: 0.0129 - val_loss: 0.0936\n",
      "Epoch 32/40\n",
      "11/11 [==============================] - 0s 28ms/step - loss: 0.0114 - val_loss: 0.0933\n",
      "Epoch 33/40\n",
      "11/11 [==============================] - 0s 20ms/step - loss: 0.0122 - val_loss: 0.0932\n",
      "Epoch 34/40\n",
      "11/11 [==============================] - 0s 21ms/step - loss: 0.0114 - val_loss: 0.0936\n",
      "Epoch 35/40\n",
      "11/11 [==============================] - 0s 23ms/step - loss: 0.0119 - val_loss: 0.0938\n",
      "Epoch 36/40\n",
      "11/11 [==============================] - 0s 20ms/step - loss: 0.0118 - val_loss: 0.0937\n",
      "Epoch 37/40\n",
      "11/11 [==============================] - 0s 20ms/step - loss: 0.0127 - val_loss: 0.0937\n",
      "Epoch 38/40\n",
      "11/11 [==============================] - 0s 27ms/step - loss: 0.0123 - val_loss: 0.0937\n",
      "Epoch 39/40\n",
      "11/11 [==============================] - 0s 19ms/step - loss: 0.0124 - val_loss: 0.0937\n",
      "Epoch 40/40\n",
      "11/11 [==============================] - 0s 20ms/step - loss: 0.0129 - val_loss: 0.0937\n"
     ]
    }
   ],
   "source": [
    "trainable_model.compile(optimizer='adam', loss=None)\n",
    "hist = trainable_model.fit([X, Y[0], Y[1], Y[2], Y[3]], epochs=40, batch_size=8, verbose=1, \n",
    "                           validation_data=[np.expand_dims(valid[feature_cols].values, axis=1), Y_valid[0], Y_valid[1], Y_valid[2], Y_valid[3]],\n",
    "                           callbacks=[reduce_lr]\n",
    "                           )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "67bfbe88-5f2c-4659-b2dc-eb9f1b824d04",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[array([[0.8401114 ],\n",
       "        [0.4296295 ],\n",
       "        [0.34763122],\n",
       "        [0.33006623],\n",
       "        [0.74300694],\n",
       "        [0.48508543],\n",
       "        [0.48184243],\n",
       "        [0.7309267 ],\n",
       "        [0.5264127 ],\n",
       "        [0.7570494 ],\n",
       "        [0.29492375],\n",
       "        [0.34379733]], dtype=float32),\n",
       " array([[0.9495956 ],\n",
       "        [0.19964108],\n",
       "        [0.25691378],\n",
       "        [0.15781167],\n",
       "        [0.39773428],\n",
       "        [0.257546  ],\n",
       "        [0.2265681 ],\n",
       "        [0.39088207],\n",
       "        [0.30309337],\n",
       "        [0.4006669 ],\n",
       "        [0.16448957],\n",
       "        [0.20928389]], dtype=float32),\n",
       " array([[0.93163174],\n",
       "        [0.45915267],\n",
       "        [0.24377662],\n",
       "        [0.32275468],\n",
       "        [0.84771645],\n",
       "        [0.51101613],\n",
       "        [0.52240014],\n",
       "        [0.77952445],\n",
       "        [0.6746559 ],\n",
       "        [0.6747417 ],\n",
       "        [0.3022651 ],\n",
       "        [0.3458013 ]], dtype=float32),\n",
       " array([[0.4518058 ],\n",
       "        [0.06488091],\n",
       "        [0.2511762 ],\n",
       "        [0.0624491 ],\n",
       "        [0.09656441],\n",
       "        [0.07555431],\n",
       "        [0.06494072],\n",
       "        [0.09723139],\n",
       "        [0.10824579],\n",
       "        [0.09783638],\n",
       "        [0.07164052],\n",
       "        [0.15804273]], dtype=float32)]"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rst = prediction_model.predict(np.expand_dims(test[feature_cols], axis=1))\n",
    "rst"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "7de501e9-05a2-424c-a5f4-85d43ad37592",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[0.998927703775019, 0.9994643982390371, 0.9991108696677027, 0.9996066810061789]"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "[np.exp(K.get_value(log_var[0]))**0.5 for log_var in trainable_model.layers[-1].log_vars]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "b0d5d8ad-aadd-4218-b5b7-9691a2d3eeef",
   "metadata": {},
   "outputs": [],
   "source": [
    "pred_rst = pd.DataFrame.from_records(np.squeeze(np.asarray(rst), axis=2).T, columns=out_cols)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "0a2bcb45-da86-471b-a61d-314e29430d6a",
   "metadata": {},
   "outputs": [],
   "source": [
    "real_rst = test[out_cols].copy()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "e124f7c0-fdd5-43b9-b649-ff7d9dd59641",
   "metadata": {},
   "outputs": [],
   "source": [
    "for col in out_cols:\n",
    "    pred_rst[col] = pred_rst[col] * (maxs[col] - mins[col]) + mins[col]\n",
    "    real_rst[col] = real_rst[col] * (maxs[col] - mins[col]) + mins[col]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "5c69d03b-34fd-4dbf-aec6-c15093bb22ab",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['碳材料结构特征-比表面积', '碳材料结构特征-总孔体积', '碳材料结构特征-微孔体积', '碳材料结构特征-平均孔径'], dtype='object')"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "real_rst.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "21739f82-d82a-4bde-8537-9504b68a96d5",
   "metadata": {},
   "outputs": [],
   "source": [
    "y_pred_pm25 = pred_rst['碳材料结构特征-比表面积'].values.reshape(-1,)\n",
    "y_pred_pm10 = pred_rst['碳材料结构特征-总孔体积'].values.reshape(-1,)\n",
    "y_pred_so2 = pred_rst['碳材料结构特征-微孔体积'].values.reshape(-1,)\n",
    "y_pred_no2 = pred_rst['碳材料结构特征-平均孔径'].values.reshape(-1,)\n",
    "y_true_pm25 = real_rst['碳材料结构特征-比表面积'].values.reshape(-1,)\n",
    "y_true_pm10 = real_rst['碳材料结构特征-总孔体积'].values.reshape(-1,)\n",
    "y_true_so2 = real_rst['碳材料结构特征-微孔体积'].values.reshape(-1,)\n",
    "y_true_no2 = real_rst['碳材料结构特征-平均孔径'].values.reshape(-1,)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "26ea6cfa-efad-443c-9dd9-844f8be42b91",
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "28072e7c-c9d5-4ff6-940d-e94ae879afc9",
   "metadata": {},
   "outputs": [],
   "source": [
    "def print_eva(y_true, y_pred, tp):\n",
    "    MSE = mean_squared_error(y_true, y_pred)\n",
    "    RMSE = np.sqrt(MSE)\n",
    "    MAE = mean_absolute_error(y_true, y_pred)\n",
    "    MAPE = mean_absolute_percentage_error(y_true, y_pred)\n",
    "    R_2 = r2_score(y_true, y_pred)\n",
    "    print(f\"COL: {tp}, MSE: {format(MSE, '.2E')}\", end=',')\n",
    "    print(f'RMSE: {round(RMSE, 4)}', end=',')\n",
    "    print(f'MAPE: {round(MAPE, 4) * 100} %', end=',')\n",
    "    print(f'MAE: {round(MAE, 4)}', end=',')\n",
    "    print(f'R_2: {round(R_2, 4)}')\n",
    "    return [MSE, RMSE, MAE, MAPE, R_2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "4ec4caa9-7c46-4fc8-a94b-cb659e924304",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "COL: 比表面积, MSE: 2.36E+05,RMSE: 485.5891,MAPE: 25.86 %,MAE: 340.8309,R_2: -0.1091\n",
      "COL: 总孔体积, MSE: 5.15E-02,RMSE: 0.2268,MAPE: 23.810000000000002 %,MAE: 0.1519,R_2: 0.7657\n",
      "COL: 微孔体积, MSE: 4.53E-02,RMSE: 0.2128,MAPE: 34.75 %,MAE: 0.1536,R_2: -0.0412\n",
      "COL: 平均孔径, MSE: 4.63E-01,RMSE: 0.6802,MAPE: 15.620000000000001 %,MAE: 0.415,R_2: 0.5929\n"
     ]
    }
   ],
   "source": [
    "pm25_eva = print_eva(y_true_pm25, y_pred_pm25, tp='比表面积')\n",
    "pm10_eva = print_eva(y_true_pm10, y_pred_pm10, tp='总孔体积')\n",
    "so2_eva = print_eva(y_true_so2, y_pred_so2, tp='微孔体积')\n",
    "nox_eva = print_eva(y_true_no2, y_pred_no2, tp='平均孔径')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ac4a4339-ec7d-4266-8197-5276c2395288",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f15cbb91-1ce7-4fb0-979a-a4bdc452a1ec",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}