6.3 MiB
6.3 MiB
In [1]:
import os os.environ['CUDA_DEVICE_ORDER'] = 'PCB_BUS_ID' os.environ['CUDA_VISIBLE_DEVICES'] = '0'
In [2]:
import pandas as pd import numpy as np from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt #新增加的两行 from pylab import mpl # 设置显示中文字体 mpl.rcParams["font.sans-serif"] = ["SimHei"] mpl.rcParams["axes.unicode_minus"] = False
In [3]:
data = pd.read_csv('./new_train_data.csv', index_col='date') # data.drop(columns=['wd'], inplace=True) # 风向还没想好怎么处理 data.head()
Out[3]:
24_PM2.5 | 24_PM10 | 24_SO2 | 24_NO2 | 24_O3 | 24_CO | 23_PM2.5 | 23_PM10 | 23_SO2 | 23_NO2 | ... | NH3_resdient | NH3_agricultural | VOC_industrial | VOC_transportation | VOC_resdient | VOC_power | PM2.5_industrial | PM2.5_transportation | PM2.5_resdient | PM2.5_power | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
date | |||||||||||||||||||||
2015-01-03 01:00:00 | 136.0 | 214.0 | 317.0 | 38.0 | 8.0 | 3.71 | 114.0 | 176.0 | 305.0 | 38.0 | ... | 0.033910 | 0.359273 | 1.177423 | 1.084925 | 0.937173 | 0.037724 | 0.926851 | 0.077715 | 0.827110 | 0.436028 |
2015-01-03 02:00:00 | 114.0 | 176.0 | 305.0 | 38.0 | 8.0 | 3.55 | 97.0 | 154.0 | 306.0 | 37.0 | ... | 0.033910 | 0.359273 | 1.177423 | 1.134240 | 0.937173 | 0.036215 | 0.926851 | 0.081248 | 0.827110 | 0.418587 |
2015-01-03 03:00:00 | 97.0 | 154.0 | 306.0 | 37.0 | 7.0 | 3.51 | 87.0 | 141.0 | 316.0 | 38.0 | ... | 0.033910 | 0.327791 | 1.177423 | 1.232869 | 0.937173 | 0.035712 | 0.926851 | 0.088313 | 0.827110 | 0.412773 |
2015-01-03 04:00:00 | 87.0 | 141.0 | 316.0 | 38.0 | 7.0 | 3.55 | 85.0 | 139.0 | 292.0 | 37.0 | ... | 0.033910 | 0.350014 | 1.177423 | 1.273965 | 0.937173 | 0.036718 | 0.926851 | 0.091256 | 0.827110 | 0.424400 |
2015-01-03 05:00:00 | 85.0 | 139.0 | 292.0 | 37.0 | 7.0 | 3.62 | 106.0 | 167.0 | 316.0 | 37.0 | ... | 0.071588 | 0.388904 | 1.177423 | 1.290403 | 1.978475 | 0.039736 | 0.926851 | 0.092434 | 1.746121 | 0.459282 |
5 rows × 187 columns
In [4]:
import seaborn as sns
In [5]:
out_cols = ['PM2.5', 'PM10', 'SO2', 'NO2', 'O3', 'CO'] feature_cols = [x for x in data.columns if x not in out_cols and x != 'date'] len(feature_cols), len(out_cols)
Out[5]:
(181, 6)
In [6]:
import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers import tensorflow.keras.backend as K
2023-03-30 08:54:01.213692: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcudart.so.11.0
In [7]:
class TransformerBlock(layers.Layer): def __init__(self, embed_dim, num_heads, ff_dim, name, rate=0.1): super().__init__() self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim, name=name) self.ffn = keras.Sequential( [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),] ) self.layernorm1 = layers.LayerNormalization(epsilon=1e-6) self.layernorm2 = layers.LayerNormalization(epsilon=1e-6) self.dropout1 = layers.Dropout(rate) self.dropout2 = layers.Dropout(rate) def call(self, inputs, training): attn_output = self.att(inputs, inputs) attn_output = self.dropout1(attn_output, training=training) out1 = self.layernorm1(inputs + attn_output) ffn_output = self.ffn(out1) ffn_output = self.dropout2(ffn_output, training=training) return self.layernorm2(out1 + ffn_output)
In [8]:
from tensorflow.keras import Model
In [9]:
from tensorflow.keras.initializers import Constant
In [10]:
# Custom loss layer class CustomMultiLossLayer(layers.Layer): def __init__(self, nb_outputs=2, **kwargs): self.nb_outputs = nb_outputs self.is_placeholder = True super(CustomMultiLossLayer, self).__init__(**kwargs) def build(self, input_shape=None): # initialise log_vars self.log_vars = [] for i in range(self.nb_outputs): self.log_vars += [self.add_weight(name='log_var' + str(i), shape=(1,), initializer=tf.initializers.he_normal(), trainable=True)] super(CustomMultiLossLayer, self).build(input_shape) def multi_loss(self, ys_true, ys_pred): assert len(ys_true) == self.nb_outputs and len(ys_pred) == self.nb_outputs loss = 0 for y_true, y_pred, log_var in zip(ys_true, ys_pred, self.log_vars): mse = (y_true - y_pred) ** 2. pre = K.exp(-log_var[0]) loss += tf.abs(tf.reduce_logsumexp(pre * mse + log_var[0], axis=-1)) return K.mean(loss) def call(self, inputs): ys_true = inputs[:self.nb_outputs] ys_pred = inputs[self.nb_outputs:] loss = self.multi_loss(ys_true, ys_pred) self.add_loss(loss, inputs=inputs) # We won't actually use the output. return K.concatenate(inputs, -1)
In [11]:
num_heads, ff_dim = 3, 16
In [12]:
def get_prediction_model(): def build_output(out, out_name): self_block = TransformerBlock(64, num_heads, ff_dim, name=f'{out_name}_attn') out = self_block(out) out = layers.GlobalAveragePooling1D()(out) out = layers.Dropout(0.1)(out) out = layers.Dense(32, activation="relu")(out) # out = layers.Dense(1, name=out_name, activation="sigmoid")(out) return out inputs = layers.Input(shape=(1,len(feature_cols)), name='input') x = layers.Conv1D(filters=64, kernel_size=1, activation='relu')(inputs) # x = layers.Dropout(rate=0.1)(x) lstm_out = layers.Bidirectional(layers.LSTM(units=64, return_sequences=True))(x) lstm_out = layers.Dense(128, activation='relu')(lstm_out) transformer_block = TransformerBlock(128, num_heads, ff_dim, name='first_attn') out = transformer_block(lstm_out) out = layers.GlobalAveragePooling1D()(out) out = layers.Dropout(0.1)(out) out = layers.Dense(64, activation='relu')(out) out = K.expand_dims(out, axis=1) pm25 = build_output(out, 'pm25') pm10 = build_output(out, 'pm10') so2 = build_output(out, 'so2') no2 = build_output(out, 'no2') o3 = build_output(out, 'o3') co = build_output(out, 'co') merge = layers.Concatenate(axis=1)([pm25, pm10, so2, no2, o3, co]) merge = K.expand_dims(merge, axis=1) merge_attn = TransformerBlock(32*6, 3, 16, name='last_attn') out = merge_attn(merge) out = layers.GlobalAveragePooling1D()(out) out = layers.Dropout(0.1)(out) pm25 = layers.Dense(32, activation='relu')(out) pm10 = layers.Dense(32, activation='relu')(out) so2 = layers.Dense(32, activation='relu')(out) no2 = layers.Dense(32, activation='relu')(out) o3 = layers.Dense(32, activation='relu')(out) co = layers.Dense(32, activation='relu')(out) pm25 = layers.Dense(1, activation='sigmoid', name='pm25')(pm25) pm10 = layers.Dense(1, activation='sigmoid', name='pm10')(pm10) so2 = layers.Dense(1, activation='sigmoid', name='so2')(so2) no2 = layers.Dense(1, activation='sigmoid', name='no2')(no2) o3 = layers.Dense(1, activation='sigmoid', name='o3')(o3) co = layers.Dense(1, activation='sigmoid', name='co')(co) model = Model(inputs=[inputs], outputs=[pm25, pm10, so2, no2, o3, co]) return model def get_trainable_model(prediction_model): inputs = layers.Input(shape=(1,len(feature_cols)), name='input') pm25, pm10, so2, no2, o3, co = prediction_model(inputs) pm25_real = layers.Input(shape=(1,), name='pm25_real') pm10_real = layers.Input(shape=(1,), name='pm10_real') so2_real = layers.Input(shape=(1,), name='so2_real') no2_real = layers.Input(shape=(1,), name='no2_real') o3_real = layers.Input(shape=(1,), name='o3_real') co_real = layers.Input(shape=(1,), name='co_real') out = CustomMultiLossLayer(nb_outputs=6)([pm25_real, pm10_real, so2_real, no2_real, o3_real, co_real, pm25, pm10, so2, no2, o3, co]) return Model([inputs, pm25_real, pm10_real, so2_real, no2_real, o3_real, co_real], out)
In [13]:
use_cols = feature_cols + out_cols use_data = data[use_cols].dropna()
In [14]:
for col in use_cols: use_data[col] = use_data[col].astype('float32')
In [15]:
maxs = use_data.max() mins = use_data.min() use_cols = use_data.columns
In [16]:
for col in use_cols: # use_data[col] = use_data[col].apply(lambda x: 0 if x < 0 else x) # use_data[col] = np.log1p(use_data[col]) use_data[col] = (use_data[col] - mins[col]) / (maxs[col] - mins[col])
In [17]:
train_data, valid = train_test_split(use_data[use_cols], test_size=0.1, random_state=42, shuffle=True) valid_data, test_data = train_test_split(valid, test_size=0.5, random_state=42, shuffle=True)
In [18]:
prediction_model = get_prediction_model() trainable_model = get_trainable_model(prediction_model)
2023-03-30 08:54:16.887997: I tensorflow/stream_executor/platform/default/dso_loader.cc:53] Successfully opened dynamic library libcuda.so.1 2023-03-30 08:54:16.964653: E tensorflow/stream_executor/cuda/cuda_driver.cc:328] failed call to cuInit: CUDA_ERROR_INVALID_DEVICE: invalid device ordinal 2023-03-30 08:54:16.964687: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:169] retrieving CUDA diagnostic information for host: ubuntu-NF5468M6 2023-03-30 08:54:16.964693: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:176] hostname: ubuntu-NF5468M6 2023-03-30 08:54:16.964801: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:200] libcuda reported version is: 510.47.3 2023-03-30 08:54:16.964823: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:204] kernel reported version is: 510.47.3 2023-03-30 08:54:16.964828: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:310] kernel version seems to match DSO: 510.47.3 2023-03-30 08:54:16.965147: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F FMA To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
In [19]:
from tensorflow.keras import optimizers from tensorflow.python.keras.utils.vis_utils import plot_model
In [20]:
plot_model(model=prediction_model)
Out[20]:
In [22]:
X = np.expand_dims(train_data[feature_cols].values, axis=1)
In [23]:
Y = [x for x in train_data[out_cols].values.T]
In [24]:
Y_valid = [x for x in valid_data[out_cols].values.T]
In [25]:
from keras.callbacks import ReduceLROnPlateau reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=10, mode='auto')
In [26]:
trainable_model.compile(optimizer='adam', loss=None) hist = trainable_model.fit([X, Y[0], Y[1], Y[2], Y[3], Y[4], Y[5]], epochs=100, batch_size=64, verbose=1, validation_data=[np.expand_dims(valid_data[feature_cols].values, axis=1), Y_valid[0], Y_valid[1], Y_valid[2], Y_valid[3], Y_valid[4], Y_valid[5]], callbacks=[reduce_lr] )
2023-03-30 08:54:43.160048: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2) 2023-03-30 08:54:43.178160: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 2200000000 Hz
Epoch 1/100 690/690 [==============================] - 22s 20ms/step - loss: 5.0242 - val_loss: 4.0851 Epoch 2/100 690/690 [==============================] - 13s 18ms/step - loss: 3.1973 - val_loss: 2.2916 Epoch 3/100 690/690 [==============================] - 13s 18ms/step - loss: 1.4457 - val_loss: 0.7350 Epoch 4/100 690/690 [==============================] - 13s 18ms/step - loss: 0.2939 - val_loss: 0.0348 Epoch 5/100 690/690 [==============================] - 12s 18ms/step - loss: 0.0098 - val_loss: 0.0061 Epoch 6/100 690/690 [==============================] - 12s 18ms/step - loss: 0.0066 - val_loss: 0.0052 Epoch 7/100 690/690 [==============================] - 12s 18ms/step - loss: 0.0063 - val_loss: 0.0050 Epoch 8/100 690/690 [==============================] - 12s 18ms/step - loss: 0.0059 - val_loss: 0.0052 Epoch 9/100 690/690 [==============================] - 12s 18ms/step - loss: 0.0059 - val_loss: 0.0049 Epoch 10/100 690/690 [==============================] - 12s 18ms/step - loss: 0.0056 - val_loss: 0.0053 Epoch 11/100 690/690 [==============================] - 12s 18ms/step - loss: 0.0055 - val_loss: 0.0045 Epoch 12/100 690/690 [==============================] - 12s 18ms/step - loss: 0.0052 - val_loss: 0.0039 Epoch 13/100 690/690 [==============================] - 12s 18ms/step - loss: 0.0053 - val_loss: 0.0047 Epoch 14/100 690/690 [==============================] - 12s 18ms/step - loss: 0.0049 - val_loss: 0.0045 Epoch 15/100 690/690 [==============================] - 12s 18ms/step - loss: 0.0046 - val_loss: 0.0033 Epoch 16/100 690/690 [==============================] - 12s 18ms/step - loss: 0.0046 - val_loss: 0.0040 Epoch 17/100 690/690 [==============================] - 12s 18ms/step - loss: 0.0047 - val_loss: 0.0048 Epoch 18/100 690/690 [==============================] - 12s 18ms/step - loss: 0.0044 - val_loss: 0.0039 Epoch 19/100 690/690 [==============================] - 12s 18ms/step - loss: 0.0043 - val_loss: 0.0032 Epoch 20/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0043 - val_loss: 0.0033 Epoch 21/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0041 - val_loss: 0.0047 Epoch 22/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0041 - val_loss: 0.0038 Epoch 23/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0040 - val_loss: 0.0038 Epoch 24/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0041 - val_loss: 0.0032 Epoch 25/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0038 - val_loss: 0.0032 Epoch 26/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0039 - val_loss: 0.0035 Epoch 27/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0038 - val_loss: 0.0036 Epoch 28/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0037 - val_loss: 0.0034 Epoch 29/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0038 - val_loss: 0.0035 Epoch 30/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0028 - val_loss: 0.0021 Epoch 31/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0027 - val_loss: 0.0022 Epoch 32/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0027 - val_loss: 0.0022 Epoch 33/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0027 - val_loss: 0.0022 Epoch 34/100 690/690 [==============================] - 13s 19ms/step - loss: 0.0027 - val_loss: 0.0021 Epoch 35/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0027 - val_loss: 0.0021 Epoch 36/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0027 - val_loss: 0.0021 Epoch 37/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0026 - val_loss: 0.0021 Epoch 38/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0026 - val_loss: 0.0021 Epoch 39/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0026 - val_loss: 0.0021 Epoch 40/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0026 - val_loss: 0.0022 Epoch 41/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 42/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 43/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 44/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 45/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 46/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 47/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 48/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 49/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 50/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 51/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 52/100 690/690 [==============================] - 13s 19ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 53/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 54/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 55/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 56/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 57/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 58/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 59/100 690/690 [==============================] - 13s 19ms/step - loss: 0.0024 - val_loss: 0.0020 Epoch 60/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 61/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 62/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 63/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 64/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 65/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 66/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 67/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 68/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 69/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 70/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 71/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 72/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 73/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 74/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 75/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 76/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 77/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 78/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 79/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 80/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 81/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 82/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 83/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 84/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 85/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 86/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 87/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 88/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 89/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 90/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 91/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 92/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 93/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 94/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 95/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 96/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 97/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 98/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 99/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020 Epoch 100/100 690/690 [==============================] - 13s 18ms/step - loss: 0.0025 - val_loss: 0.0020
In [27]:
rst = prediction_model.predict(np.expand_dims(test_data[feature_cols], axis=1)) rst
Out[27]:
[array([[0.02649942], [0.17850098], [0.05590522], ..., [0.06559962], [0.07360396], [0.11683977]], dtype=float32), array([[0.0409514 ], [0.10872066], [0.04259145], ..., [0.08358508], [0.07330614], [0.07695329]], dtype=float32), array([[0.0078212 ], [0.03656307], [0.01281381], ..., [0.02448112], [0.01346153], [0.01845062]], dtype=float32), array([[0.03263965], [0.2611128 ], [0.28920233], ..., [0.45292783], [0.07745293], [0.49551144]], dtype=float32), array([[0.26260266], [0.30972052], [0.09751886], ..., [0.02907404], [0.49323225], [0.04708111]], dtype=float32), array([[0.02804664], [0.18090692], [0.08362207], ..., [0.07223988], [0.05220559], [0.1401439 ]], dtype=float32)]
In [28]:
[np.exp(K.get_value(log_var[0]))**0.5 for log_var in trainable_model.layers[-1].log_vars]
Out[28]:
[0.9999489175147174, 0.9999628656168237, 0.999970644281569, 0.9998601875319162, 0.9999327040916789, 0.9999352970648614]
In [29]:
pred_rst = pd.DataFrame.from_records(np.squeeze(np.asarray(rst), axis=2).T, columns=out_cols)
In [30]:
real_rst = test_data[out_cols].copy()
In [31]:
for col in out_cols: pred_rst[col] = pred_rst[col] * (maxs[col] - mins[col]) + mins[col] real_rst[col] = real_rst[col] * (maxs[col] - mins[col]) + mins[col]
In [32]:
y_pred_pm25 = pred_rst['PM2.5'].values.reshape(-1,) y_pred_pm10 = pred_rst['PM10'].values.reshape(-1,) y_pred_so2 = pred_rst['SO2'].values.reshape(-1,) y_pred_no2 = pred_rst['NO2'].values.reshape(-1,) y_pred_o3 = pred_rst['O3'].values.reshape(-1,) y_pred_co = pred_rst['CO'].values.reshape(-1,) y_true_pm25 = real_rst['PM2.5'].values.reshape(-1,) y_true_pm10 = real_rst['PM10'].values.reshape(-1,) y_true_so2 = real_rst['SO2'].values.reshape(-1,) y_true_no2 = real_rst['NO2'].values.reshape(-1,) y_true_o3 = real_rst['O3'].values.reshape(-1,) y_true_co = real_rst['CO'].values.reshape(-1,)
In [33]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
In [34]:
def print_eva(y_true, y_pred, tp): MSE = mean_squared_error(y_true, y_pred) RMSE = np.sqrt(MSE) MAE = mean_absolute_error(y_true, y_pred) MAPE = mean_absolute_percentage_error(y_true, y_pred) R_2 = r2_score(y_true, y_pred) print(f"COL: {tp}, MSE: {format(MSE, '.2E')}", end=',') print(f'RMSE: {round(RMSE, 4)}', end=',') print(f'MAPE: {round(MAPE, 4) * 100} %', end=',') print(f'MAE: {round(MAE, 4)}', end=',') print(f'R_2: {round(R_2, 4)}') return [MSE, RMSE, MAE, MAPE, R_2]
In [35]:
pm25_eva = print_eva(y_true_pm25, y_pred_pm25, tp='pm25') pm10_eva = print_eva(y_true_pm10, y_pred_pm10, tp='pm10') so2_eva = print_eva(y_true_so2, y_pred_so2, tp='so2') nox_eva = print_eva(y_true_no2, y_pred_no2, tp='no2') o3_eva = print_eva(y_true_o3, y_pred_o3, tp='o3') co_eva = print_eva(y_true_co, y_pred_co, tp='co')
COL: pm25, MSE: 8.40E+01,RMSE: 9.166899681091309,MAPE: 12.470000237226486 %,MAE: 5.874800205230713,R_2: 0.9659 COL: pm10, MSE: 3.62E+02,RMSE: 19.023300170898438,MAPE: 12.839999794960022 %,MAE: 12.942299842834473,R_2: 0.9355 COL: so2, MSE: 1.01E+02,RMSE: 10.043499946594238,MAPE: 24.879999458789825 %,MAE: 6.045100212097168,R_2: 0.9678 COL: no2, MSE: 2.25E+01,RMSE: 4.739699840545654,MAPE: 9.269999712705612 %,MAE: 3.3459999561309814,R_2: 0.9641 COL: o3, MSE: 2.76E+01,RMSE: 5.253300189971924,MAPE: 17.21999943256378 %,MAE: 3.7822999954223633,R_2: 0.9889 COL: co, MSE: 1.58E-02,RMSE: 0.125900000333786,MAPE: 8.269999921321869 %,MAE: 0.09000000357627869,R_2: 0.9683
In [36]:
pd.DataFrame.from_records([pm25_eva, pm10_eva, so2_eva, nox_eva, o3_eva,co_eva], columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R_2'], index=['PM25', 'PM10', 'SO2', 'NO2', 'O3', 'CO'])
Out[36]:
MSE | RMSE | MAE | MAPE | R_2 | |
---|---|---|---|---|---|
PM25 | 84.032173 | 9.166906 | 5.874771 | 0.124713 | 0.965868 |
PM10 | 361.884674 | 19.023266 | 12.942302 | 0.128373 | 0.935509 |
SO2 | 100.872444 | 10.043528 | 6.045063 | 0.248850 | 0.967751 |
NO2 | 22.465204 | 4.739747 | 3.346048 | 0.092662 | 0.964141 |
O3 | 27.596682 | 5.253254 | 3.782317 | 0.172198 | 0.988936 |
CO | 0.015848 | 0.125889 | 0.090029 | 0.082749 | 0.968320 |
In [37]:
plt.figure(figsize=(16, 9)) plt.plot(pred_rst['PM10'].values[50:150], 'o-', label='pred') plt.plot(real_rst['PM10'].values[50:150], '*-', label='real') plt.legend(loc='best')
Out[37]:
<matplotlib.legend.Legend at 0x7fe480840ed0>
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans. findfont: Generic family 'sans-serif' not found because none of the following families were found: SimHei
In [53]:
prediction_model.save(f'./models/uw_loss_lookback/')
WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.
2023-03-30 09:42:42.029138: W tensorflow/python/util/util.cc:348] Sets are not currently considered sequences, but this may change in the future, so consider avoiding using them. WARNING:absl:Found untraced functions such as first_attn_layer_call_fn, first_attn_layer_call_and_return_conditional_losses, layer_normalization_layer_call_fn, layer_normalization_layer_call_and_return_conditional_losses, layer_normalization_1_layer_call_fn while saving (showing 5 of 450). These functions will not be directly callable after loading.
INFO:tensorflow:Assets written to: ./models/uw_loss_lookback/assets
INFO:tensorflow:Assets written to: ./models/uw_loss_lookback/assets
In [38]:
from statistics import mean import matplotlib.pyplot as plt from sklearn.metrics import explained_variance_score,r2_score, median_absolute_error, mean_squared_error, mean_absolute_error from scipy import stats import numpy as np from matplotlib import rcParams config = {"font.size": 32,"mathtext.fontset":'stix'} rcParams.update(config)
In [39]:
config = {"font.size": 32,"mathtext.fontset":'stix'} rcParams.update(config)
In [40]:
def scatter_out_1(x, y, label, name): ## x,y为两个需要做对比分析的两个量。 # ==========计算评价指标========== BIAS = mean(x - y) MSE = mean_squared_error(x, y) RMSE = np.power(MSE, 0.5) R2 = r2_score(x, y) MAE = mean_absolute_error(x, y) EV = explained_variance_score(x, y) print('==========算法评价指标==========') print('Explained Variance(EV):', '%.3f' % (EV)) print('Mean Absolute Error(MAE):', '%.3f' % (MAE)) print('Mean squared error(MSE):', '%.3f' % (MSE)) print('Root Mean Squard Error(RMSE):', '%.3f' % (RMSE)) print('R_squared:', '%.3f' % (R2)) # ===========Calculate the point density========== xy = np.vstack([x, y]) z = stats.gaussian_kde(xy)(xy) # ===========Sort the points by density, so that the densest points are plotted last=========== idx = z.argsort() x, y, z = x[idx], y[idx], z[idx] def best_fit_slope_and_intercept(xs, ys): m = (((mean(xs) * mean(ys)) - mean(xs * ys)) / ((mean(xs) * mean(xs)) - mean(xs * xs))) b = mean(ys) - m * mean(xs) return m, b m, b = best_fit_slope_and_intercept(x, y) regression_line = [] for a in x: regression_line.append((m * a) + b) fig,ax=plt.subplots(figsize=(12,9),dpi=400) scatter=ax.scatter(x,y,marker='o',c=z*100,s=15,label='LST',cmap='Spectral_r') cbar=plt.colorbar(scatter,shrink=1,orientation='vertical',extend='both',pad=0.015,aspect=30,label='Frequency') min_value = min(min(x), min(y)) max_value = max(max(x), max(y)) plt.plot([min_value-5,max_value+5],[min_value-5,max_value+5],'black',lw=1.5) # 画的1:1线,线的颜色为black,线宽为0.8 plt.plot(x,regression_line,'red',lw=1.5) # 预测与实测数据之间的回归线 plt.axis([min_value-5,max_value+5,min_value-5,max_value+5]) # 设置线的范围 plt.xlabel('Measured %s' % label) plt.ylabel('Retrived %s' % label) # plt.xticks(fontproperties='Times New Roman') # plt.yticks(fontproperties='Times New Roman') plt.text(min_value-5 + (max_value-min_value) * 0.05, int(max_value * 0.95), '$N=%.f$' % len(y)) # text的位置需要根据x,y的大小范围进行调整。 plt.text(min_value-5 + (max_value-min_value) * 0.05, int(max_value * 0.88), '$R^2=%.2f$' % R2) plt.text(min_value-5 + (max_value-min_value) * 0.05, int(max_value * 0.81), '$RMSE=%.2f$' % RMSE) plt.xlim(min_value-5,max_value+5) # 设置x坐标轴的显示范围 plt.ylim(min_value-5,max_value+5) # 设置y坐标轴的显示范围 # file_name = name.split('(')[0].strip() plt.savefig(f'./figure/lookback/{name}.png',dpi=800,bbox_inches='tight',pad_inches=0) plt.show()
In [41]:
scatter_out_1(real_rst['PM10'].values, pred_rst['PM10'].values, label='$PM_{10}\ (\mu g/m^3$)', name='PM10')
==========算法评价指标========== Explained Variance(EV): 0.939 Mean Absolute Error(MAE): 12.942 Mean squared error(MSE): 361.885 Root Mean Squard Error(RMSE): 19.023 R_squared: 0.936
findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans. findfont: Generic family 'sans-serif' not found because none of the following families were found: SimHei
In [44]:
scatter_out_1(real_rst['PM2.5'].values, pred_rst['PM2.5'].values, label='$PM_{2.5} (\mu g/m^3$)', name='PM25')
==========算法评价指标========== Explained Variance(EV): 0.967 Mean Absolute Error(MAE): 5.875 Mean squared error(MSE): 84.032 Root Mean Squard Error(RMSE): 9.167 R_squared: 0.966
In [45]:
scatter_out_1(real_rst['SO2'].values, pred_rst['SO2'].values, label='$SO_2\ (\mu g/m^3)$', name='SO2')
==========算法评价指标========== Explained Variance(EV): 0.968 Mean Absolute Error(MAE): 6.045 Mean squared error(MSE): 100.872 Root Mean Squard Error(RMSE): 10.044 R_squared: 0.968
In [46]:
scatter_out_1(real_rst['NO2'].values, pred_rst['NO2'].values, label='$NO_2\ (\mu g/m^3)$', name='NO2')
==========算法评价指标========== Explained Variance(EV): 0.965 Mean Absolute Error(MAE): 3.346 Mean squared error(MSE): 22.465 Root Mean Squard Error(RMSE): 4.740 R_squared: 0.964
In [47]:
scatter_out_1(real_rst['O3'], pred_rst['O3'], label='$O_3 \ (\mu g/m^3)$', name='O3')
==========算法评价指标========== Explained Variance(EV): 0.989 Mean Absolute Error(MAE): 3.782 Mean squared error(MSE): 27.597 Root Mean Squard Error(RMSE): 5.253 R_squared: 0.989
In [50]:
def scatter_out_2(x, y, name): ## x,y为两个需要做对比分析的两个量。 # ==========计算评价指标========== BIAS = mean(x - y) MSE = mean_squared_error(x, y) RMSE = np.power(MSE, 0.5) R2 = r2_score(x, y) MAE = mean_absolute_error(x, y) EV = explained_variance_score(x, y) print('==========算法评价指标==========') print('Explained Variance(EV):', '%.3f' % (EV)) print('Mean Absolute Error(MAE):', '%.3f' % (MAE)) print('Mean squared error(MSE):', '%.3f' % (MSE)) print('Root Mean Squard Error(RMSE):', '%.3f' % (RMSE)) print('R_squared:', '%.3f' % (R2)) # ===========Calculate the point density========== xy = np.vstack([x, y]) z = stats.gaussian_kde(xy)(xy) # ===========Sort the points by density, so that the densest points are plotted last=========== idx = z.argsort() x, y, z = x[idx], y[idx], z[idx] def best_fit_slope_and_intercept(xs, ys): m = (((mean(xs) * mean(ys)) - mean(xs * ys)) / ((mean(xs) * mean(xs)) - mean(xs * xs))) b = mean(ys) - m * mean(xs) return m, b m, b = best_fit_slope_and_intercept(x, y) regression_line = [] for a in x: regression_line.append((m * a) + b) fig,ax=plt.subplots(figsize=(12,9),dpi=400) scatter=ax.scatter(x,y,marker='o',c=z*100,s=15,label='LST',cmap='Spectral_r') cbar=plt.colorbar(scatter,shrink=1,orientation='vertical',extend='both',pad=0.015,aspect=30,label='frequency') plt.plot([0, 6], [0, 6],'black',lw=1.5) # 画的1:1线,线的颜色为black,线宽为0.8 plt.plot(x,regression_line,'red',lw=1.5) # 预测与实测数据之间的回归线 plt.axis([0,6,0,6]) # 设置线的范围 plt.xlabel(f'Measured {name}') plt.ylabel(f'Retrived {name}') # plt.xticks(fontproperties='Times New Roman') # plt.yticks(fontproperties='Times New Roman') plt.text(0.3, 5.5, '$N=%.f$' % len(y)) # text的位置需要根据x,y的大小范围进行调整。 plt.text(0.3, 5.0, '$R^2=%.2f$' % R2) plt.text(0.3, 4.5, '$RMSE=%.2f$' % RMSE) plt.xlim(0, 6) # 设置x坐标轴的显示范围 plt.ylim(0, 6) # 设置y坐标轴的显示范围 file_name = name.split('(')[0].strip() plt.savefig(f'./figure/lookback/CO.png',dpi=800,bbox_inches='tight',pad_inches=0) plt.show()
In [52]:
scatter_out_2(y_true_co, y_pred_co, name='$CO \ (mg/m^3)$')
==========算法评价指标========== Explained Variance(EV): 0.970 Mean Absolute Error(MAE): 0.090 Mean squared error(MSE): 0.016 Root Mean Squard Error(RMSE): 0.126 R_squared: 0.968
In [ ]: