ICEEMDAN-Solar_power-forecast/iceemdan-筛选-high-ConvBiGruAttention copy 2.ipynb at 0ccfee43c7d18b72d2e88c462d4ee56293fc3df0

274 KiB

Raw Blame History

In [1]:

from math import sqrt
from numpy import concatenate
from matplotlib import pyplot
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from tensorflow.keras import Sequential

from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dropout
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

C:\Users\asus\AppData\Roaming\Python\Python39\site-packages\pandas\core\computation\expressions.py:21: UserWarning: Pandas requires version '2.8.4' or newer of 'numexpr' (version '2.8.3' currently installed).
  from pandas.core.computation.check import NUMEXPR_INSTALLED
C:\Users\asus\AppData\Roaming\Python\Python39\site-packages\pandas\core\arrays\masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed).
  from pandas.core import (

这段代码是一个函数 time_series_to_supervised，它用于将时间序列数据转换为监督学习问题的数据集。下面是该函数的各个部分的含义：

data: 输入的时间序列数据，可以是列表或2D NumPy数组。 n_in: 作为输入的滞后观察数，即用多少个时间步的观察值作为输入。默认值为96，表示使用前96个时间步的观察值作为输入。 n_out: 作为输出的观测数量，即预测多少个时间步的观察值。默认值为10，表示预测未来10个时间步的观察值。 dropnan: 布尔值，表示是否删除具有NaN值的行。默认为True，即删除具有NaN值的行。函数首先检查输入数据的维度，并初始化一些变量。然后，它创建一个新的DataFrame对象 df 来存储输入数据，并保存原始的列名。接着，它创建了两个空列表 cols 和 names，用于存储新的特征列和列名。

接下来，函数开始构建特征列和对应的列名。首先，它将原始的观察序列添加到 cols 列表中，并将其列名添加到 names 列表中。然后，它依次将滞后的观察序列添加到 cols 列表中，并构建相应的列名，格式为 (原始列名)(t-滞后时间)。这样就创建了输入特征的部分。

接着，函数开始构建输出特征的部分。它依次将未来的观察序列添加到 cols 列表中，并构建相应的列名，格式为 (原始列名)(t+未来时间)。

最后，函数将所有的特征列拼接在一起，构成一个新的DataFrame对象 agg。如果 dropnan 参数为True，则删除具有NaN值的行。最后，函数返回处理后的数据集 agg。

In [2]:

def time_series_to_supervised(data, n_in=96, n_out=10,dropnan=True):
    """
    :param data:作为列表或2D NumPy数组的观察序列。需要。
    :param n_in:作为输入的滞后观察数（X）。值可以在[1..len（数据）]之间可选。默认为1。
    :param n_out:作为输出的观测数量（y）。值可以在[0..len（数据）]之间。可选的。默认为1。
    :param dropnan:Boolean是否删除具有NaN值的行。可选的。默认为True。
    :return:
    """
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    origNames = df.columns
    cols, names = list(), list()
    cols.append(df.shift(0))
    names += [('%s' % origNames[j]) for j in range(n_vars)]
    n_in = max(0, n_in)
    for i in range(n_in, 0, -1):
        time = '(t-%d)' % i
        cols.append(df.shift(i))
        names += [('%s%s' % (origNames[j], time)) for j in range(n_vars)]
    n_out = max(n_out, 0)
    for i in range(1, n_out+1):
        time = '(t+%d)' % i
        cols.append(df.shift(-i))
        names += [('%s%s' % (origNames[j], time)) for j in range(n_vars)]
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [3]:

# 加载数据
path1 = r"D:\project\小论文1-基于ICEEMDAN分解的时序高维变化的短期光伏功率预测模型\CEEMAN-PosConv1dbiLSTM-LSTM\模型代码流程\data6.csv"#数据所在路径
#我的数据是excel表，若是csv文件用pandas的read_csv()函数替换即可。
datas1 = pd.DataFrame(pd.read_csv(path1))
#我只取了data表里的第3、23、16、17、18、19、20、21、27列，如果取全部列的话这一行可以去掉
# data1 = datas1.iloc[:,np.r_[3,23,16:22,27]]
data1=datas1.interpolate()
values1 = data1.values
print(data1.head())
print(data1.shape)

        Temp   Humidity       GHI       DHI  Rainfall  Power
0  19.779453  40.025826  3.232706  1.690531       0.0    0.0
1  19.714937  39.605961  3.194991  1.576346       0.0    0.0
2  19.549330  39.608631  3.070866  1.576157       0.0    0.0
3  19.405870  39.680702  3.038623  1.482489       0.0    0.0
4  19.387363  39.319881  2.656474  1.134153       0.0    0.0
(104256, 6)

In [25]:

# data2= data1.drop(['date','Air_P','RH'], axis = 1)

In [22]:

# # 获取重构的原始数据
# # 获取重构的原始数据
# # 获取重构的原始数据
high_re= r"D:\project\小论文1-基于ICEEMDAN分解的时序高维变化的短期光伏功率预测模型\CEEMAN-PosConv1dbiLSTM-LSTM\模型代码流程\完整的模型代码流程\high_re.csv"#数据所在路径
# #我的数据是excel表，若是csv文件用pandas的read_csv()函数替换即可。
high_re = pd.DataFrame(pd.read_csv(high_re))

In [23]:

reconstructed_data_high= high_re
# # 打印重构的原始数据
print(reconstructed_data_high)

        column_name
0         -1.426824
1         -1.426819
2         -1.426815
3         -1.426812
4         -1.426810
...             ...
104251    -1.629381
104252    -1.629328
104253    -1.629271
104254    -1.629213
104255    -1.629152

[104256 rows x 1 columns]

In [25]:

import matplotlib.pyplot as plt

# # 假设你已经有了原始数据和重构数据
# # 原始数据
original_data = data1['Power'].values

# # 创建时间序列（假设时间序列与数据对应）
time = range(len(original_data))

# # 创建画布和子图
plt.figure(figsize=(10, 6))

# # 绘制原始数据
# plt.plot(time, original_data, label='Original Data', color='blue')

# # 绘制重构数据
plt.plot(reconstructed_data_high[200:1000], label='Reconstructed Data', color='red')

# # 添加标题和标签
plt.title('Comparison between Original and reconstructed_data_high')
plt.xlabel('Time')
plt.ylabel('Power')
plt.legend()

# # 显示图形
plt.show()

No description has been provided for this image

In [26]:

data3=data1.iloc[:,:5]

In [27]:

import pandas as pd

# # 创建data3和imf1_array对应的DataFrame
data3_df = pd.DataFrame(data3)
imf1_df = pd.DataFrame(reconstructed_data_high)

# # 合并data3_df和imf1_df
merged_df = pd.concat([data3_df, imf1_df], axis=1)

merged_df = merged_df.iloc[:104256]

# # 打印合并后的表
print(merged_df)

             Temp   Humidity       GHI       DHI  Rainfall  column_name
0       19.779453  40.025826  3.232706  1.690531       0.0    -1.426824
1       19.714937  39.605961  3.194991  1.576346       0.0    -1.426819
2       19.549330  39.608631  3.070866  1.576157       0.0    -1.426815
3       19.405870  39.680702  3.038623  1.482489       0.0    -1.426812
4       19.387363  39.319881  2.656474  1.134153       0.0    -1.426810
...           ...        ...       ...       ...       ...          ...
104251  13.303740  34.212711  1.210789  0.787026       0.0    -1.629381
104252  13.120920  34.394939  2.142980  1.582670       0.0    -1.629328
104253  12.879215  35.167400  1.926214  1.545889       0.0    -1.629271
104254  12.915867  35.359989  1.317695  0.851529       0.0    -1.629213
104255  13.134816  34.500034  1.043269  0.597816       0.0    -1.629152

[104256 rows x 6 columns]

In [28]:

merged_df.shape

Out[28]:

(104256, 6)

In [29]:

# 使用MinMaxScaler进行归一化
scaler = MinMaxScaler(feature_range=(0, 1))
scaledData1 = scaler.fit_transform(merged_df)
print(scaledData1.shape)

(104256, 6)

In [30]:

n_steps_in =96 #历史时间长度
n_steps_out=1#预测时间长度
processedData1 = time_series_to_supervised(scaledData1,n_steps_in,n_steps_out)
print(processedData1.head())

            0         1         2         3    4         5   0(t-96)  \
96   0.555631  0.349673  0.190042  0.040558  0.0  0.245160  0.490360   
97   0.564819  0.315350  0.211335  0.044613  0.0  0.264683  0.489088   
98   0.576854  0.288321  0.229657  0.047549  0.0  0.283988  0.485824   
99   0.581973  0.268243  0.247775  0.053347  0.0  0.303131  0.482997   
100  0.586026  0.264586  0.266058  0.057351  0.0  0.322308  0.482632   

      1(t-96)   2(t-96)   3(t-96)  ...    2(t-1)    3(t-1)  4(t-1)    5(t-1)  \
96   0.369105  0.002088  0.002013  ...  0.166009  0.036794     0.0  0.225396   
97   0.364859  0.002061  0.001839  ...  0.190042  0.040558     0.0  0.245160   
98   0.364886  0.001973  0.001839  ...  0.211335  0.044613     0.0  0.264683   
99   0.365615  0.001950  0.001697  ...  0.229657  0.047549     0.0  0.283988   
100  0.361965  0.001679  0.001167  ...  0.247775  0.053347     0.0  0.303131   

       0(t+1)    1(t+1)    2(t+1)    3(t+1)  4(t+1)    5(t+1)  
96   0.564819  0.315350  0.211335  0.044613     0.0  0.264683  
97   0.576854  0.288321  0.229657  0.047549     0.0  0.283988  
98   0.581973  0.268243  0.247775  0.053347     0.0  0.303131  
99   0.586026  0.264586  0.266058  0.057351     0.0  0.322308  
100  0.590772  0.258790  0.282900  0.060958     0.0  0.340588  

[5 rows x 588 columns]

In [31]:

# processedData1.to_csv('processedData1.csv', index=False)

In [32]:

data_x = processedData1.loc[:,'0(t-96)':'5(t-1)']
data_y = processedData1.loc[:,'5']

In [33]:

data_x.shape

Out[33]:

(104159, 576)

In [34]:

data_y

Out[34]:

96        0.245160
97        0.264683
98        0.283988
99        0.303131
100       0.322308
            ...   
104250    0.000090
104251    0.000099
104252    0.000109
104253    0.000118
104254    0.000128
Name: 5, Length: 104159, dtype: float64

In [35]:

data_y.shape

Out[35]:

(104159,)

In [36]:

# 7.划分训练集和测试集

test_size = int(len(data_x) * 0.2)
# 计算训练集和测试集的索引范围
train_indices = range(len(data_x) - test_size)
test_indices = range(len(data_x) - test_size, len(data_x))

# 根据索引范围划分数据集
train_X1 = data_x.iloc[train_indices].values.reshape((-1, n_steps_in, scaledData1.shape[1]))
test_X1 = data_x.iloc[test_indices].values.reshape((-1, n_steps_in, scaledData1.shape[1]))
train_y = data_y.iloc[train_indices].values
test_y = data_y.iloc[test_indices].values


# # 多次运行代码时希望得到相同的数据分割，可以设置 random_state 参数为一个固定的整数值
# train_X1,test_X1, train_y, test_y = train_test_split(data_x.values, data_y.values, test_size=0.2, random_state=343)
# reshape input to be 3D [samples, timesteps, features]
train_X = train_X1.reshape((train_X1.shape[0], n_steps_in, scaledData1.shape[1]))
test_X = test_X1.reshape((test_X1.shape[0], n_steps_in,scaledData1.shape[1]))
print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)
# 使用train_test_split函数划分训练集和测试集，测试集的比重是40%。
# 然后将train_X1、test_X1进行一个升维，变成三维，维数分别是[samples,timesteps,features]。
# 打印一下他们的shape：\

(83328, 96, 6) (83328,) (20831, 96, 6) (20831,)

In [37]:

train_X1.shape

Out[37]:

(83328, 96, 6)

In [38]:

import tensorflow as tf
from tensorflow.keras.layers import Input, Conv1D, Bidirectional, GlobalAveragePooling1D, Dense, GRU, MaxPooling1D
from tensorflow.keras.models import Model
class SelfAttention(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads):
        super(SelfAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model
        assert d_model % self.num_heads == 0
        self.depth = d_model // self.num_heads
        self.wq = tf.keras.layers.Dense(d_model)
        self.wk = tf.keras.layers.Dense(d_model)
        self.wv = tf.keras.layers.Dense(d_model)
        self.dense = tf.keras.layers.Dense(d_model)

    def split_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, v, k, q, mask):
        batch_size = tf.shape(q)[0]
        q = self.wq(q)
        k = self.wk(k)
        v = self.wv(v)

        q = self.split_heads(q, batch_size)
        k = self.split_heads(k, batch_size)
        v = self.split_heads(v, batch_size)

        scaled_attention, attention_weights = self.scaled_dot_product_attention(q, k, v, mask)
        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model))
        output = self.dense(concat_attention)
        return output, attention_weights

    def scaled_dot_product_attention(self, q, k, v, mask):
        matmul_qk = tf.matmul(q, k, transpose_b=True)
        dk = tf.cast(tf.shape(k)[-1], tf.float32)
        scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

        if mask is not None:
            scaled_attention_logits += (mask * -1e9)

        attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)
        output = tf.matmul(attention_weights, v)
        return output, attention_weights

class SelfAttentionWithRelativePositionEncoding(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, max_len=5000):
        super(SelfAttentionWithRelativePositionEncoding, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model
        self.max_len = max_len
        self.wq = tf.keras.layers.Dense(d_model)
        self.wk = tf.keras.layers.Dense(d_model)
        self.wv = tf.keras.layers.Dense(d_model)
        self.dense = tf.keras.layers.Dense(d_model)
        self.relative_position_encoding = AdvancedRelativePositionalEncoding(d_model)

    def call(self, v, k, q, mask):
        batch_size = tf.shape(q)[0]
        q = self.wq(q)
        k = self.wk(k)
        v = self.wv(v)

        # 添加相对位置编码
        k += self.relative_position_encoding(k)
        q += self.relative_position_encoding(q)

        q = self.split_heads(q, batch_size)
        k = self.split_heads(k, batch_size)
        v = self.split_heads(v, batch_size)

        scaled_attention, attention_weights = self.scaled_dot_product_attention(q, k, v, mask)
        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model))
        output = self.dense(concat_attention)
        return output, attention_weights

    def split_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.d_model // self.num_heads))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def scaled_dot_product_attention(self, q, k, v, mask):
        matmul_qk = tf.matmul(q, k, transpose_b=True)
        dk = tf.cast(tf.shape(k)[-1], tf.float32)
        scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

        if mask is not None:
            scaled_attention_logits += (mask * -1e9)

        attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)
        output = tf.matmul(attention_weights, v)
        return output, attention_weights

import tensorflow as tf
import numpy as np

import tensorflow as tf

class AdvancedRelativePositionalEncoding(tf.keras.layers.Layer):
    def __init__(self, d_model, max_len=5000):
        super(AdvancedRelativePositionalEncoding, self).__init__()
        self.max_len = max_len
        self.d_model = d_model
        # #创新点 引入可变化的参数u，v 进行线性变化
        self.u =  tf.Variable(tf.random(self.add_weight(shape=(d_model,), initializer='random_normal', trainable=True)))
        self.v = tf.Variable(tf.random(self.add_weight(shape=(d_model,), initializer='random_normal', trainable=True)))

    def call(self, inputs):
        seq_length = tf.shape(inputs)[1]
        pos_encoding = self.relative_positional_encoding(seq_length, self.d_model)

        # 保留Sinusoidal生成方案
        angle_rads_sin = pos_encoding[:, :, 0]
        angle_rads_cos = pos_encoding[:, :, 1]

        # 线性维度转换层
        ti = tf.expand_dims(inputs, axis=1)  # shape: [batch_size, 1, seq_length, d_model]
        tj = tf.expand_dims(inputs, axis=2)  # shape: [batch_size, seq_length, 1, d_model]

        # 计算表征 t_i * W_q * W_k^T * t_j
        t_wq_wk_t = tf.einsum('bijd,d->bij', tf.einsum('bijd,d->bijd', ti, self.u), tf.transpose(tj, perm=[0, 1, 3, 2]))

        # 计算基于全局的偏置 t_i * W_q * W_k^T * R_(i-j)^T
        t_wq_wk_r = tf.einsum('bijd,d->bij', tf.einsum('bijd,d->bijd', ti, self.u), angle_rads_sin)

        # 计算基于表征的偏置 u * W_q * W_k^T * t_j
        E_u = tf.einsum('bd,bijd->bij', self.u, ti)

        # 计算基于表征的局部偏置 v * W_q * W_k^T * R_(i-j)^T
        R_v = tf.einsum('bd,bijd->bij', self.v, angle_rads_cos)

        
        pe_with_params = t_wq_wk_t + R_v + t_wq_wk_r + E_u

        return inputs + pe_with_params

    def relative_positional_encoding(self, position, d_model):
        pos = tf.range(position, dtype=tf.float32)
        i = tf.range(d_model, dtype=tf.float32)

        angles = 1 / tf.pow(10000.0, (2 * (i // 2)) / tf.cast(d_model, tf.float32))
        angle_rads = tf.einsum('i,j->ij', pos, angles)

        pos_encoding = tf.stack([tf.sin(angle_rads[:, 0::2]), tf.cos(angle_rads[:, 1::2])], axis=-1)
        pos_encoding = tf.pad(pos_encoding, [[0, 0], [0, 0], [0, 0]])  #embbing维度嵌入层

        return pos_encoding





def PosConv1biGRUWithSelfAttention(input_shape, gru_units, num_heads):
    inputs = Input(shape=input_shape)
    # CNN layer
    cnn_layer = Conv1D(filters=64, kernel_size=2, activation='relu')(inputs)
    cnn_layer = MaxPooling1D(pool_size=1)(cnn_layer)
    gru_output = Bidirectional(GRU(gru_units, return_sequences=True))(cnn_layer)
    
    # Apply Self-Attention
    self_attention = SelfAttention(d_model=gru_units*2, num_heads=num_heads)
    gru_output, _ = self_attention(gru_output, gru_output, gru_output, mask=None)
    
    pool1 = GlobalAveragePooling1D()(gru_output)
    output = Dense(1)(pool1)
    
    return Model(inputs=inputs, outputs=output)


input_shape = (96, 6)
gru_units = 64
num_heads = 8

# Create model
model = PosConv1biGRUWithSelfAttention(input_shape, gru_units, num_heads)
model.compile(optimizer='adam', loss='mse')
model.summary()

Model: "functional_1"

┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)        ┃ Output Shape      ┃    Param # ┃ Connected to      ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
│ input_layer_1       │ (None, 96, 6)     │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ conv1d_1 (Conv1D)   │ (None, 95, 64)    │        832 │ input_layer_1[0]… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ max_pooling1d_1     │ (None, 95, 64)    │          0 │ conv1d_1[0][0]    │
│ (MaxPooling1D)      │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ bidirectional_1     │ (None, 95, 128)   │     49,920 │ max_pooling1d_1[… │
│ (Bidirectional)     │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ self_attention_1    │ [(None, None,     │     66,048 │ bidirectional_1[… │
│ (SelfAttention)     │ 128), (None, 8,   │            │ bidirectional_1[… │
│                     │ None, None)]      │            │ bidirectional_1[… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ global_average_poo… │ (None, 128)       │          0 │ self_attention_1… │
│ (GlobalAveragePool… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_9 (Dense)     │ (None, 1)         │        129 │ global_average_p… │
└─────────────────────┴───────────────────┴────────────┴───────────────────┘

 Total params: 116,929 (456.75 KB)

 Trainable params: 116,929 (456.75 KB)

 Non-trainable params: 0 (0.00 B)

In [39]:

# Compile and train the model
model.compile(optimizer='adam', loss='mean_squared_error')
from keras.callbacks import EarlyStopping, ModelCheckpoint

# 定义早停机制
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='min')

# 拟合模型，并添加早停机制和模型检查点
history = model.fit(train_X, train_y, epochs=100, batch_size=64, validation_data=(test_X, test_y), 
                    callbacks=[early_stopping])
# 预测
lstm_pred = model.predict(test_X)
# 将预测结果的形状修改为与原始数据相同的形状

Epoch 1/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 71s 53ms/step - loss: 0.0196 - val_loss: 0.0018
Epoch 2/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 77s 59ms/step - loss: 0.0013 - val_loss: 0.0019
Epoch 3/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 79s 61ms/step - loss: 0.0012 - val_loss: 0.0017
Epoch 4/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 76s 58ms/step - loss: 0.0010 - val_loss: 0.0015
Epoch 5/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 75s 57ms/step - loss: 9.7760e-04 - val_loss: 0.0018
Epoch 6/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 77s 59ms/step - loss: 9.9108e-04 - val_loss: 0.0017
Epoch 7/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 90s 69ms/step - loss: 9.7381e-04 - val_loss: 0.0016
Epoch 8/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 82s 63ms/step - loss: 9.1248e-04 - val_loss: 0.0015
Epoch 9/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 86s 66ms/step - loss: 9.4959e-04 - val_loss: 0.0016
Epoch 10/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 76s 58ms/step - loss: 9.3746e-04 - val_loss: 0.0016
Epoch 11/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 88s 68ms/step - loss: 9.1358e-04 - val_loss: 0.0015
Epoch 12/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 80s 61ms/step - loss: 8.8907e-04 - val_loss: 0.0016
Epoch 13/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 87s 67ms/step - loss: 9.0822e-04 - val_loss: 0.0015
Epoch 14/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 89s 68ms/step - loss: 8.9505e-04 - val_loss: 0.0015
Epoch 15/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 88s 68ms/step - loss: 8.9855e-04 - val_loss: 0.0015
Epoch 16/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 89s 68ms/step - loss: 9.4414e-04 - val_loss: 0.0015
Epoch 17/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 91s 70ms/step - loss: 8.8443e-04 - val_loss: 0.0014
Epoch 18/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 89s 68ms/step - loss: 8.7323e-04 - val_loss: 0.0015
Epoch 19/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 90s 69ms/step - loss: 8.7132e-04 - val_loss: 0.0014
Epoch 20/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 86s 66ms/step - loss: 8.7265e-04 - val_loss: 0.0015
Epoch 21/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 82s 63ms/step - loss: 8.4318e-04 - val_loss: 0.0015
Epoch 22/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 87s 67ms/step - loss: 7.9306e-04 - val_loss: 0.0015
Epoch 23/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 84s 65ms/step - loss: 8.1019e-04 - val_loss: 0.0015
Epoch 24/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 91s 70ms/step - loss: 7.8526e-04 - val_loss: 0.0015
Epoch 25/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 94s 72ms/step - loss: 8.6874e-04 - val_loss: 0.0014
Epoch 26/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 90s 69ms/step - loss: 8.0322e-04 - val_loss: 0.0015
Epoch 27/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 90s 69ms/step - loss: 8.3907e-04 - val_loss: 0.0014
Epoch 28/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 92s 71ms/step - loss: 8.2911e-04 - val_loss: 0.0015
Epoch 29/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 93s 71ms/step - loss: 8.1428e-04 - val_loss: 0.0015
Epoch 30/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 90s 69ms/step - loss: 8.1292e-04 - val_loss: 0.0015
Epoch 31/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 90s 69ms/step - loss: 8.2787e-04 - val_loss: 0.0015
Epoch 32/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 99s 76ms/step - loss: 7.9780e-04 - val_loss: 0.0015
Epoch 33/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 98s 75ms/step - loss: 7.9815e-04 - val_loss: 0.0015
Epoch 34/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 96s 74ms/step - loss: 7.9996e-04 - val_loss: 0.0016
Epoch 35/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 93s 71ms/step - loss: 7.5751e-04 - val_loss: 0.0016
Epoch 36/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 84s 65ms/step - loss: 8.1121e-04 - val_loss: 0.0015
Epoch 37/100
1302/1302 ━━━━━━━━━━━━━━━━━━━━ 92s 71ms/step - loss: 7.6797e-04 - val_loss: 0.0015
651/651 ━━━━━━━━━━━━━━━━━━━━ 12s 16ms/step

In [40]:

plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show()

In [41]:

lstm_pred.shape

Out[41]:

(20831, 1)

In [42]:

test_y.shape

Out[42]:

(20831,)

In [43]:

test_y1=test_y.reshape(20831,1)

In [44]:

test_y1

Out[44]:

array([[4.52189913e-01],
       [3.12516873e-01],
       [3.25310588e-01],
       ...,
       [1.08522631e-04],
       [1.18219088e-04],
       [1.28327022e-04]])

In [45]:

results1 = np.broadcast_to(lstm_pred, (20831, 6))

In [46]:

test_y2 = np.broadcast_to(test_y1, (20831, 6))

In [47]:

# 反归一化
inv_forecast_y = scaler.inverse_transform(results1)
inv_test_y = scaler.inverse_transform(test_y2)

In [48]:

inv_test_y

Out[48]:

array([[ 1.78428369e+01,  4.82409691e+01,  6.37156385e+02,
         2.97801603e+02,  1.07621239e+01,  9.90052500e-01],
       [ 1.07562527e+01,  3.44305945e+01,  4.40440713e+02,
         2.05929459e+02,  7.43790432e+00,  1.80780551e-01],
       [ 1.14053667e+01,  3.56955916e+01,  4.58459395e+02,
         2.14344726e+02,  7.74239484e+00,  2.54907916e-01],
       ...,
       [-5.09439462e+00,  3.54076535e+00,  4.44428011e-01,
         4.37940726e-01,  2.58283957e-03, -1.62932764e+00],
       [-5.09390265e+00,  3.54172410e+00,  4.58084512e-01,
         4.44318723e-01,  2.81361533e-03, -1.62927146e+00],
       [-5.09338980e+00,  3.54272354e+00,  4.72320538e-01,
         4.50967376e-01,  3.05418424e-03, -1.62921289e+00]])

In [49]:

# 计算均方根误差
rmse = sqrt(mean_squared_error(inv_test_y[:,5], inv_forecast_y[:,5]))
print('Test RMSE: %.3f' % rmse)
#画图
plt.figure(figsize=(16,8))
plt.plot(inv_test_y[300:3000,5], label='true')
plt.plot(inv_forecast_y[300:3000,5], label='pre')
plt.legend()
plt.show()

Test RMSE: 0.222

In [57]:

from sklearn.metrics import mean_squared_error, mean_absolute_error  # 评价指标
# 使用sklearn调用衡量线性回归的MSE 、 RMSE、 MAE、r2
from math import sqrt
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
print('mean_squared_error:', mean_squared_error(lstm_pred, test_y))  # mse)
print("mean_absolute_error:", mean_absolute_error(lstm_pred, test_y))  # mae
print("rmse:", sqrt(mean_squared_error(lstm_pred,test_y)))
print("r2 score:", r2_score(inv_test_y[480:850,5], inv_forecast_y[480:850,5]))

mean_squared_error: 0.0014630274318863602
mean_absolute_error: 0.013232284805068965
rmse: 0.03824954159053884
r2 score: 0.9900756487103545

In [107]:

df1 = pd.DataFrame(inv_test_y[:,5], columns=['column_name'])

In [108]:

# 指定文件路径和文件名，保存DataFrame到CSV文件中
df1.to_csv('高频_test.csv', index=False)

In [109]:

df2 = pd.DataFrame(inv_forecast_y[:,5], columns=['column_name'])

In [110]:

# 指定文件路径和文件名，保存DataFrame到CSV文件中
df2.to_csv('高频_forecast.csv', index=False)

In [ ]:

274 KiB Raw Blame History Unescape Escape

274 KiB

Raw Blame History