T85_code/.ipynb_checkpoints/基于attention+LSTM对天数据建模-chec...

56 KiB

In [1]:
import numpy as np
import pandas as pd
import keras
from keras.layers import Dense, Conv1D, Input, Bidirectional, LSTM, Multiply, Dropout, Flatten, Softmax, Lambda
from keras.models import Model
---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_7812\4169542727.py in <module>
      1 import numpy as np
      2 import pandas as pd
----> 3 import keras
      4 from keras.layers import Dense, Conv1D, Input, Bidirectional, LSTM, Multiply, Dropout, Flatten, Softmax, Lambda
      5 from keras.models import Model

~\AppData\Roaming\Python\Python37\site-packages\keras\__init__.py in <module>
     23 
     24 # See b/110718070#comment18 for more details about this import.
---> 25 from keras import models
     26 
     27 from keras.engine.input_layer import Input

~\AppData\Roaming\Python\Python37\site-packages\keras\models.py in <module>
     17 
     18 import tensorflow.compat.v2 as tf
---> 19 from keras import backend
     20 from keras import metrics as metrics_module
     21 from keras import optimizer_v1

~\AppData\Roaming\Python\Python37\site-packages\keras\backend.py in <module>
     35 from tensorflow.python.distribute import distribute_coordinator as dc
     36 from tensorflow.python.distribute import distribute_coordinator_context as dc_context
---> 37 from tensorflow.python.eager.context import get_config
     38 from tensorflow.python.framework import config
     39 from keras import backend_config

ImportError: cannot import name 'get_config' from 'tensorflow.python.eager.context' (C:\Users\zhaojh\AppData\Roaming\Python\Python37\site-packages\tensorflow\python\eager\context.py)
In [ ]:
data = pd.read_csv('./train_data_processed.csv')
In [3]:
obj_cols = data.columns[-32:]
num_cols = [x for x in data.columns if x not in obj_cols]
In [4]:
maxs = data[num_cols].max()
mins = data[num_cols].min()
In [5]:
for col in num_cols:
    data[col] = (data[col] - mins[col]) / (maxs[col] - mins[col])
In [6]:
optim = keras.optimizers.Adam(learning_rate=5e-4)
In [7]:
def build_model(n_features, n_outs):
    inputs = Input(shape=(1, n_features))
    x = Conv1D(filters=64, kernel_size=1, activation='relu')(inputs)
    x = Dropout(rate=0.1)(x)
    lstm_out = Bidirectional(LSTM(units=128, return_sequences=True))(x)
    attention_pre = Dense(1, name='attention_vec')(lstm_out)
    attention_probs = Softmax()(attention_pre)
    attention_mul = Multiply()([attention_probs, lstm_out])
    attention_mul = Flatten()(attention_mul)
    output = Dense(32, activation='relu')(attention_mul)
    output = Dense(n_outs, activation='sigmoid')(output)
    model = Model(inputs=[inputs], outputs=output)
    model.summary()
    model.compile(loss='mse', optimizer=optim,)
    return model
In [8]:
model = build_model(len(data.columns) - 1, 1)
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_1 (InputLayer)            (None, 1, 251)       0                                            
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, 1, 64)        16128       input_1[0][0]                    
__________________________________________________________________________________________________
dropout_1 (Dropout)             (None, 1, 64)        0           conv1d_1[0][0]                   
__________________________________________________________________________________________________
bidirectional_1 (Bidirectional) (None, 1, 256)       197632      dropout_1[0][0]                  
__________________________________________________________________________________________________
attention_vec (Dense)           (None, 1, 1)         257         bidirectional_1[0][0]            
__________________________________________________________________________________________________
softmax_1 (Softmax)             (None, 1, 1)         0           attention_vec[0][0]              
__________________________________________________________________________________________________
multiply_1 (Multiply)           (None, 1, 256)       0           softmax_1[0][0]                  
                                                                 bidirectional_1[0][0]            
__________________________________________________________________________________________________
flatten_1 (Flatten)             (None, 256)          0           multiply_1[0][0]                 
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 32)           8224        flatten_1[0][0]                  
__________________________________________________________________________________________________
dense_2 (Dense)                 (None, 1)            33          dense_1[0][0]                    
==================================================================================================
Total params: 222,274
Trainable params: 222,274
Non-trainable params: 0
__________________________________________________________________________________________________
In [9]:
from tensorflow.python.keras.utils.vis_utils import plot_model
In [10]:
plot_model(model, to_file='model.png')
Out[10]:
No description has been provided for this image
In [18]:
from sklearn.model_selection import train_test_split
In [19]:
feature_cols = [x for x in data.columns if x != '燃料消耗量']
len(feature_cols)
Out[19]:
251
In [20]:
train_data, valid = train_test_split(data, test_size=0.2, shuffle=True, random_state=666)
valid_data, test_data = train_test_split(valid, test_size=0.5, shuffle=True, random_state=666)
In [21]:
X_train, Y_train = train_data[feature_cols], train_data['燃料消耗量']
X_valid, Y_valid = valid_data[feature_cols], valid_data['燃料消耗量']
X_test, Y_test = test_data[feature_cols], test_data['燃料消耗量']
In [22]:
x_train = np.expand_dims(X_train.values, axis=1)
y_train = Y_train.values.reshape(-1, 1)
x_train.shape, y_train.shape
Out[22]:
((922, 1, 251), (922, 1))
In [23]:
x_valid = np.expand_dims(X_valid.values, axis=1)
y_valid = Y_valid.values.reshape(-1, 1)
In [24]:
x_test = np.expand_dims(X_test.values, axis=1)
y_test = Y_test.values.reshape(-1, 1)
In [25]:
callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss', patience=int(10)),
             keras.callbacks.ModelCheckpoint('./best_model.h5', monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1)]
In [26]:
model.fit(x_train, y_train, epochs=100, batch_size=32, validation_data=(x_test, y_test), shuffle=True,
          callbacks=callbacks)
Train on 922 samples, validate on 116 samples
Epoch 1/100
922/922 [==============================] - 1s 1ms/step - loss: 0.0396 - val_loss: 0.0128
Epoch 2/100
922/922 [==============================] - 0s 69us/step - loss: 0.0074 - val_loss: 0.0057
Epoch 3/100
922/922 [==============================] - 0s 78us/step - loss: 0.0048 - val_loss: 0.0040
Epoch 4/100
922/922 [==============================] - 0s 61us/step - loss: 0.0035 - val_loss: 0.0028
Epoch 5/100
922/922 [==============================] - 0s 77us/step - loss: 0.0030 - val_loss: 0.0023
Epoch 6/100
922/922 [==============================] - 0s 69us/step - loss: 0.0025 - val_loss: 0.0020
Epoch 7/100
922/922 [==============================] - 0s 86us/step - loss: 0.0023 - val_loss: 0.0020
Epoch 8/100
922/922 [==============================] - 0s 78us/step - loss: 0.0023 - val_loss: 0.0018
Epoch 9/100
922/922 [==============================] - 0s 67us/step - loss: 0.0022 - val_loss: 0.0017
Epoch 10/100
922/922 [==============================] - 0s 61us/step - loss: 0.0019 - val_loss: 0.0016
Epoch 11/100
922/922 [==============================] - 0s 69us/step - loss: 0.0019 - val_loss: 0.0016
Epoch 12/100
922/922 [==============================] - 0s 69us/step - loss: 0.0019 - val_loss: 0.0017
Epoch 13/100
922/922 [==============================] - 0s 52us/step - loss: 0.0019 - val_loss: 0.0017
Epoch 14/100
922/922 [==============================] - 0s 69us/step - loss: 0.0018 - val_loss: 0.0015
Epoch 15/100
922/922 [==============================] - 0s 87us/step - loss: 0.0018 - val_loss: 0.0015
Epoch 16/100
922/922 [==============================] - 0s 52us/step - loss: 0.0016 - val_loss: 0.0013
Epoch 17/100
922/922 [==============================] - 0s 68us/step - loss: 0.0015 - val_loss: 0.0014
Epoch 18/100
922/922 [==============================] - 0s 73us/step - loss: 0.0014 - val_loss: 0.0013
Epoch 19/100
922/922 [==============================] - 0s 52us/step - loss: 0.0017 - val_loss: 0.0014
Epoch 20/100
922/922 [==============================] - 0s 70us/step - loss: 0.0017 - val_loss: 0.0013
Epoch 21/100
922/922 [==============================] - 0s 73us/step - loss: 0.0015 - val_loss: 0.0013
Epoch 22/100
922/922 [==============================] - 0s 69us/step - loss: 0.0014 - val_loss: 0.0013
Epoch 23/100
922/922 [==============================] - 0s 69us/step - loss: 0.0014 - val_loss: 0.0012
Epoch 24/100
922/922 [==============================] - 0s 69us/step - loss: 0.0013 - val_loss: 0.0014
Epoch 25/100
922/922 [==============================] - 0s 69us/step - loss: 0.0013 - val_loss: 0.0012
Epoch 26/100
922/922 [==============================] - 0s 69us/step - loss: 0.0013 - val_loss: 0.0011
Epoch 27/100
922/922 [==============================] - 0s 65us/step - loss: 0.0013 - val_loss: 0.0012
Epoch 28/100
922/922 [==============================] - 0s 89us/step - loss: 0.0013 - val_loss: 0.0013
Epoch 29/100
922/922 [==============================] - 0s 52us/step - loss: 0.0013 - val_loss: 0.0011
Epoch 30/100
922/922 [==============================] - 0s 70us/step - loss: 0.0013 - val_loss: 0.0012
Epoch 31/100
922/922 [==============================] - 0s 60us/step - loss: 0.0014 - val_loss: 0.0013
Epoch 32/100
922/922 [==============================] - 0s 70us/step - loss: 0.0012 - val_loss: 0.0012
Epoch 33/100
922/922 [==============================] - 0s 51us/step - loss: 0.0013 - val_loss: 0.0012
Epoch 34/100
922/922 [==============================] - 0s 69us/step - loss: 0.0012 - val_loss: 0.0012
Epoch 35/100
922/922 [==============================] - 0s 60us/step - loss: 0.0013 - val_loss: 0.0014
Epoch 36/100
922/922 [==============================] - 0s 69us/step - loss: 0.0012 - val_loss: 0.0011
Epoch 37/100
922/922 [==============================] - 0s 62us/step - loss: 0.0011 - val_loss: 0.0011
Epoch 38/100
922/922 [==============================] - 0s 77us/step - loss: 0.0012 - val_loss: 0.0012
Epoch 39/100
922/922 [==============================] - 0s 60us/step - loss: 0.0013 - val_loss: 0.0011
Epoch 40/100
922/922 [==============================] - 0s 69us/step - loss: 0.0012 - val_loss: 0.0011
Epoch 41/100
922/922 [==============================] - 0s 69us/step - loss: 0.0012 - val_loss: 0.0014
Epoch 42/100
922/922 [==============================] - 0s 69us/step - loss: 0.0013 - val_loss: 0.0014
Epoch 43/100
922/922 [==============================] - 0s 69us/step - loss: 0.0013 - val_loss: 0.0011
Epoch 44/100
922/922 [==============================] - 0s 51us/step - loss: 0.0012 - val_loss: 0.0011
Epoch 45/100
922/922 [==============================] - 0s 95us/step - loss: 0.0011 - val_loss: 0.0011
Epoch 46/100
922/922 [==============================] - 0s 68us/step - loss: 0.0011 - val_loss: 0.0011
Epoch 47/100
922/922 [==============================] - 0s 69us/step - loss: 0.0011 - val_loss: 0.0012
Epoch 48/100
922/922 [==============================] - 0s 66us/step - loss: 0.0011 - val_loss: 0.0010
Epoch 49/100
922/922 [==============================] - 0s 69us/step - loss: 9.9569e-04 - val_loss: 9.4151e-04
Epoch 50/100
922/922 [==============================] - 0s 69us/step - loss: 0.0010 - val_loss: 0.0011
Epoch 51/100
922/922 [==============================] - 0s 69us/step - loss: 0.0010 - val_loss: 0.0011
Epoch 52/100
922/922 [==============================] - 0s 77us/step - loss: 0.0010 - val_loss: 9.9602e-04
Epoch 53/100
922/922 [==============================] - 0s 52us/step - loss: 9.3358e-04 - val_loss: 0.0012
Epoch 54/100
922/922 [==============================] - 0s 69us/step - loss: 0.0010 - val_loss: 9.6337e-04
Epoch 55/100
922/922 [==============================] - 0s 69us/step - loss: 9.9897e-04 - val_loss: 9.9428e-04
Epoch 56/100
922/922 [==============================] - 0s 69us/step - loss: 0.0010 - val_loss: 9.1976e-04
Epoch 57/100
922/922 [==============================] - 0s 60us/step - loss: 9.8333e-04 - val_loss: 0.0011
Epoch 58/100
922/922 [==============================] - 0s 86us/step - loss: 0.0011 - val_loss: 0.0010
Epoch 59/100
922/922 [==============================] - 0s 69us/step - loss: 0.0010 - val_loss: 0.0010
Epoch 60/100
922/922 [==============================] - 0s 51us/step - loss: 9.6106e-04 - val_loss: 9.5494e-04
Epoch 61/100
922/922 [==============================] - 0s 87us/step - loss: 9.1071e-04 - val_loss: 8.9771e-04
Epoch 62/100
922/922 [==============================] - 0s 69us/step - loss: 9.1379e-04 - val_loss: 9.4967e-04
Epoch 63/100
922/922 [==============================] - 0s 69us/step - loss: 9.3075e-04 - val_loss: 9.1627e-04
Epoch 64/100
922/922 [==============================] - 0s 78us/step - loss: 8.8605e-04 - val_loss: 9.3663e-04
Epoch 65/100
922/922 [==============================] - 0s 69us/step - loss: 9.5708e-04 - val_loss: 0.0011
Epoch 66/100
922/922 [==============================] - 0s 68us/step - loss: 9.5701e-04 - val_loss: 8.9826e-04
Epoch 67/100
922/922 [==============================] - 0s 60us/step - loss: 9.4454e-04 - val_loss: 0.0011
Epoch 68/100
922/922 [==============================] - 0s 74us/step - loss: 9.5393e-04 - val_loss: 9.7981e-04
Epoch 69/100
922/922 [==============================] - 0s 104us/step - loss: 9.5125e-04 - val_loss: 0.0010
Epoch 70/100
922/922 [==============================] - 0s 78us/step - loss: 9.5720e-04 - val_loss: 9.7615e-04
Epoch 71/100
922/922 [==============================] - 0s 64us/step - loss: 9.2241e-04 - val_loss: 0.0010
Out[26]:
<keras.callbacks.callbacks.History at 0x2a12af4d9c8>
In [27]:
y_pred = model.predict(x_test)
In [28]:
y_pred = np.squeeze(y_pred)
y_pred
Out[28]:
array([0.30161506, 0.12067786, 0.43900865, 0.4143401 , 0.11434203,
       0.87028706, 0.15387392, 0.86223227, 0.8570186 , 0.4433931 ,
       0.7649788 , 0.36369222, 0.33063045, 0.7437426 , 0.3493362 ,
       0.7671248 , 0.7743846 , 0.12363896, 0.4528606 , 0.75037146,
       0.45131576, 0.34990048, 0.7552419 , 0.7508755 , 0.7558205 ,
       0.3391131 , 0.76618046, 0.38217723, 0.30887872, 0.36930698,
       0.3591324 , 0.83817935, 0.9321221 , 0.35430533, 0.11836711,
       0.764429  , 0.7478696 , 0.74976325, 0.3656214 , 0.3482211 ,
       0.3658831 , 0.35415024, 0.29030812, 0.7965492 , 0.9372817 ,
       0.11179626, 0.33758143, 0.305908  , 0.12149343, 0.41378874,
       0.09611899, 0.36266702, 0.76215094, 0.41939664, 0.7642038 ,
       0.36630815, 0.36369124, 0.775969  , 0.7431689 , 0.49149197,
       0.35072863, 0.7608663 , 0.88904417, 0.11546668, 0.32508087,
       0.78478754, 0.2949888 , 0.9328996 , 0.26087016, 0.15387377,
       0.7867287 , 0.35840425, 0.8485855 , 0.36474293, 0.86086893,
       0.85082245, 0.37929475, 0.88898706, 0.44798538, 0.74498856,
       0.7642088 , 0.9374167 , 0.24628928, 0.1150094 , 0.35409844,
       0.34573317, 0.1182591 , 0.35334843, 0.8806509 , 0.3744196 ,
       0.12240422, 0.7410463 , 0.3571657 , 0.44970232, 0.8927134 ,
       0.76465344, 0.7640152 , 0.33744502, 0.7715051 , 0.44094718,
       0.33831298, 0.93699497, 0.30656263, 0.10126469, 0.8242742 ,
       0.85100025, 0.42451733, 0.362445  , 0.77677643, 0.40487826,
       0.78558755, 0.339495  , 0.8240729 , 0.7534524 , 0.93057597,
       0.3128613 ], dtype=float32)
In [29]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
In [30]:
y_true = np.squeeze(y_test)
In [31]:
MSE = mean_squared_error(y_true, y_pred)
RMSE = np.sqrt(mean_squared_error(y_true, y_pred))
MAE = mean_absolute_error(y_true, y_pred)
MAPE = mean_absolute_percentage_error(y_true, y_pred)
R_2 = r2_score(y_true, y_pred)
print(f"MSE: {format(MSE, '.2E')}")
print(f'RMSE: {round(RMSE, 4)}')
print(f'MAE: {round(MAE, 4)}')
print(f'MAPE: {round(MAPE * 100, 2)}%')
print(f'R_2: {round(R_2, 4)}')
MSE: 1.02E-03
RMSE: 0.032
MAE: 0.0236
MAPE: 7.07%
R_2: 0.9858
In [32]:
def recover(x, col='燃料消耗量'):
    return np.expm1(x * (maxs[col] - mins[col]) + mins[col])
In [33]:
y_true_recover = recover(y_true)
y_pred_recover = recover(y_pred)
In [34]:
MSE = mean_squared_error(y_true_recover, y_pred_recover)
RMSE = np.sqrt(mean_squared_error(y_true_recover, y_pred_recover))
MAE = mean_absolute_error(y_true_recover, y_pred_recover)
MAPE = mean_absolute_percentage_error(y_true_recover, y_pred_recover)
R_2 = r2_score(y_true_recover, y_pred_recover)
print(f"MSE: {format(MSE, '.2E')}")
print(f'RMSE: {round(RMSE, 4)}')
print(f'MAE: {round(MAE, 4)}')
print(f'MAPE: {round(MAPE * 100, 2)}%')
print(f'R_2: {round(R_2, 4)}')
MSE: 7.76E+04
RMSE: 278.5442
MAE: 166.5543
MAPE: 9.54%
R_2: 0.9717
In [ ]: