ICEEMDAN-Solar_power-forecast/ConvBigru_IRPE_Attention特定数...

172 KiB
Raw Permalink Blame History

In [2]:
from math import sqrt
from numpy import concatenate
from matplotlib import pyplot
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from tensorflow.keras import Sequential

from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dropout
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

这段代码是一个函数 time_series_to_supervised它用于将时间序列数据转换为监督学习问题的数据集。下面是该函数的各个部分的含义

data: 输入的时间序列数据可以是列表或2D NumPy数组。 n_in: 作为输入的滞后观察数即用多少个时间步的观察值作为输入。默认值为96表示使用前96个时间步的观察值作为输入。 n_out: 作为输出的观测数量即预测多少个时间步的观察值。默认值为1表示预测未来1个时间步的观察值。 dropnan: 布尔值表示是否删除具有NaN值的行。默认为True即删除具有NaN值的行。 函数首先检查输入数据的维度并初始化一些变量。然后它创建一个新的DataFrame对象 df 来存储输入数据,并保存原始的列名。接着,它创建了两个空列表 cols 和 names用于存储新的特征列和列名。

接下来,函数开始构建特征列和对应的列名。首先,它将原始的观察序列添加到 cols 列表中,并将其列名添加到 names 列表中。然后,它依次将滞后的观察序列添加到 cols 列表中,并构建相应的列名,格式为 (原始列名)(t-滞后时间)。这样就创建了输入特征的部分。

接着,函数开始构建输出特征的部分。它依次将未来的观察序列添加到 cols 列表中,并构建相应的列名,格式为 (原始列名)(t+未来时间)。

最后函数将所有的特征列拼接在一起构成一个新的DataFrame对象 agg。如果 dropnan 参数为True则删除具有NaN值的行。最后函数返回处理后的数据集 agg。

In [3]:
def time_series_to_supervised(data, n_in=96, n_out=1,dropnan=True):
    """
    :param data:作为列表或2D NumPy数组的观察序列。需要。
    :param n_in:作为输入的滞后观察数X。值可以在[1..len数据]之间可选。默认为1。
    :param n_out:作为输出的观测数量y。值可以在[0..len数据]之间。可选的。默认为1。
    :param dropnan:Boolean是否删除具有NaN值的行。可选的。默认为True。
    :return:
    """
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    origNames = df.columns
    cols, names = list(), list()
    cols.append(df.shift(0))
    names += [('%s' % origNames[j]) for j in range(n_vars)]
    n_in = max(0, n_in)
    for i in range(n_in, 0, -1):
        time = '(t-%d)' % i
        cols.append(df.shift(i))
        names += [('%s%s' % (origNames[j], time)) for j in range(n_vars)]
    n_out = max(n_out, 0)
    for i in range(1, n_out+1):
        time = '(t+%d)' % i
        cols.append(df.shift(-i))
        names += [('%s%s' % (origNames[j], time)) for j in range(n_vars)]
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    if dropnan:
        agg.dropna(inplace=True)
    return agg
In [4]:
# 加载数据
path1 = r"D:\project\小论文1-基于ICEEMDAN分解的时序高维变化的短期光伏功率预测模型\CEEMAN-PosConv1dbiLSTM-LSTM\模型代码流程\data6.csv"#数据所在路径
#我的数据是excel表若是csv文件用pandas的read_csv()函数替换即可。
datas1 = pd.DataFrame(pd.read_csv(path1))
#我只取了data表里的第3、23、16、17、18、19、20、21、27列如果取全部列的话这一行可以去掉
# data1 = datas1.iloc[:,np.r_[3,23,16:22,27]]
data1=datas1.interpolate()
values1 = data1.values
print(data1.head())
print(data1.shape)
        Temp   Humidity       GHI       DHI  Rainfall  Power
0  19.779453  40.025826  3.232706  1.690531       0.0    0.0
1  19.714937  39.605961  3.194991  1.576346       0.0    0.0
2  19.549330  39.608631  3.070866  1.576157       0.0    0.0
3  19.405870  39.680702  3.038623  1.482489       0.0    0.0
4  19.387363  39.319881  2.656474  1.134153       0.0    0.0
(104256, 6)
In [6]:
# 使用MinMaxScaler进行归一化
scaler = MinMaxScaler(feature_range=(0, 1))
scaledData1 = scaler.fit_transform(data1)
print(scaledData1.shape)
(104256, 6)
In [7]:
n_steps_in =96 #历史时间长度
n_steps_out=1#预测时间长度
processedData1 = time_series_to_supervised(scaledData1,n_steps_in,n_steps_out)
print(processedData1.head())
            0         1         2         3    4         5   0(t-96)  \
96   0.555631  0.349673  0.190042  0.040558  0.0  0.236302  0.490360   
97   0.564819  0.315350  0.211335  0.044613  0.0  0.258204  0.489088   
98   0.576854  0.288321  0.229657  0.047549  0.0  0.279860  0.485824   
99   0.581973  0.268243  0.247775  0.053347  0.0  0.301336  0.482997   
100  0.586026  0.264586  0.266058  0.057351  0.0  0.322851  0.482632   

      1(t-96)   2(t-96)   3(t-96)  ...    2(t-1)    3(t-1)  4(t-1)    5(t-1)  \
96   0.369105  0.002088  0.002013  ...  0.166009  0.036794     0.0  0.214129   
97   0.364859  0.002061  0.001839  ...  0.190042  0.040558     0.0  0.236302   
98   0.364886  0.001973  0.001839  ...  0.211335  0.044613     0.0  0.258204   
99   0.365615  0.001950  0.001697  ...  0.229657  0.047549     0.0  0.279860   
100  0.361965  0.001679  0.001167  ...  0.247775  0.053347     0.0  0.301336   

       0(t+1)    1(t+1)    2(t+1)    3(t+1)  4(t+1)    5(t+1)  
96   0.564819  0.315350  0.211335  0.044613     0.0  0.258204  
97   0.576854  0.288321  0.229657  0.047549     0.0  0.279860  
98   0.581973  0.268243  0.247775  0.053347     0.0  0.301336  
99   0.586026  0.264586  0.266058  0.057351     0.0  0.322851  
100  0.590772  0.258790  0.282900  0.060958     0.0  0.343360  

[5 rows x 588 columns]
In [8]:
data_x = processedData1.loc[:,'0(t-96)':'5(t-1)']#去除power剩下的做标签列
data_y = processedData1.loc[:,'5']

冒号

In [9]:
data_x.shape
Out[9]:
(104159, 576)
In [10]:
data_y
Out[10]:
96        0.236302
97        0.258204
98        0.279860
99        0.301336
100       0.322851
            ...   
104250    0.000000
104251    0.000000
104252    0.000000
104253    0.000000
104254    0.000000
Name: 5, Length: 104159, dtype: float64
In [11]:
data_y.shape
Out[11]:
(104159,)
In [33]:
# 计算训练集、验证集和测试集的大小
train_size = int(len(data_x) * 0.90)
test_size = int(len(data_x) * 0.015)
val_size = len(data_x) - train_size - test_size

# 计算训练集、验证集和测试集的索引范围
train_indices = range(train_size)
val_indices = range(train_size, train_size + val_size)
test_indices = range(train_size + val_size, len(data_x))

# 根据索引范围划分数据集
train_X1 = data_x.iloc[train_indices].values.reshape((-1, n_steps_in, scaledData1.shape[1]))
val_X1 = data_x.iloc[val_indices].values.reshape((-1, n_steps_in, scaledData1.shape[1]))
test_X1 = data_x.iloc[test_indices].values.reshape((-1, n_steps_in, scaledData1.shape[1]))
train_y = data_y.iloc[train_indices].values
val_y = data_y.iloc[val_indices].values
test_y = data_y.iloc[test_indices].values

# reshape input to be 3D [samples, timesteps, features]
train_X = train_X1.reshape((train_X1.shape[0], n_steps_in, scaledData1.shape[1]))
val_X = val_X1.reshape((val_X1.shape[0], n_steps_in, scaledData1.shape[1]))
test_X = test_X1.reshape((test_X1.shape[0], n_steps_in, scaledData1.shape[1]))

print(train_X.shape, train_y.shape, val_X.shape, val_y.shape, test_X.shape, test_y.shape)
(93743, 96, 6) (93743,) (8854, 96, 6) (8854,) (1562, 96, 6) (1562,)
In [34]:
train_X1.shape
Out[34]:
(93743, 96, 6)
In [77]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv1D, Bidirectional, GlobalAveragePooling1D, Dense, GRU, MaxPooling1D
from tensorflow.keras.models import Model
from tensorflow.keras.initializers import RandomUniform
class AttentionWithImproveRelativePositionEncoding(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, max_len=5000):
        super(AttentionWithImproveRelativePositionEncoding, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model
        self.max_len = max_len
        self.wq = tf.keras.layers.Dense(d_model)
        self.wk = tf.keras.layers.Dense(d_model)
        self.wv = tf.keras.layers.Dense(d_model)
        self.dense = tf.keras.layers.Dense(d_model)
        self.position_encoding = ImproveRelativePositionEncoding(d_model)

    def call(self, v, k, q, mask):
        batch_size = tf.shape(q)[0]
        q = self.wq(q)
        k = self.wk(k)
        v = self.wv(v)

        # 添加位置编码
        k += self.position_encoding (k)
        q += self.position_encoding (q)

        q = self.split_heads(q, batch_size)
        k = self.split_heads(k, batch_size)
        v = self.split_heads(v, batch_size)

        scaled_attention, attention_weights = self.scaled_dot_product_attention(q, k, v, mask)
        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model))
        output = self.dense(concat_attention)
        return output, attention_weights

    def split_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.d_model // self.num_heads))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def scaled_dot_product_attention(self, q, k, v, mask):
        matmul_qk = tf.matmul(q, k, transpose_b=True)
        dk = tf.cast(tf.shape(k)[-1], tf.float32)
        scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)

        if mask is not None:
            scaled_attention_logits += (mask * -1e9)

        attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)
        output = tf.matmul(attention_weights, v)
        return output, attention_weights

class ImproveRelativePositionEncoding(tf.keras.layers.Layer):
    def __init__(self, d_model, max_len=5000):
        super(ImproveRelativePositionEncoding, self).__init__()
        self.max_len = max_len
        self.d_model = d_model
        # 引入可变化的参数u和v进行线性变化
        self.u = self.add_weight(shape=(self.d_model,),
                                 initializer=RandomUniform(),
                                 trainable=True)
        self.v = self.add_weight(shape=(self.d_model,),
                                 initializer=RandomUniform(),
                                 trainable=True)
    def call(self, inputs):
        seq_length = inputs.shape[1]
        pos_encoding = self.relative_positional_encoding(seq_length, self.d_model)
        
        # 调整原始的相对位置编码公式将u和v参数融入其中
        pe_with_params = pos_encoding * self.u+ pos_encoding * self.v
        return inputs + pe_with_params

    def relative_positional_encoding(self, position, d_model):
        pos = tf.range(position, dtype=tf.float32)
        i = tf.range(d_model, dtype=tf.float32)
        
        angles = 1 / tf.pow(10000.0, (2 * (i // 2)) / tf.cast(d_model, tf.float32))
        angle_rads = tf.einsum('i,j->ij', pos, angles)
        #保留了sinous机制
        # Apply sin to even indices; 2i
        angle_rads_sin = tf.sin(angle_rads[:, 0::2])
        # Apply cos to odd indices; 2i+1
        angle_rads_cos = tf.cos(angle_rads[:, 1::2])

        pos_encoding = tf.stack([angle_rads_sin, angle_rads_cos], axis=2)
        pos_encoding = tf.reshape(pos_encoding, [1, position, d_model])

        return pos_encoding



def PosConv1biGRUWithSelfAttention(input_shape, gru_units, num_heads):
    inputs = Input(shape=input_shape)
    # CNN layer
    cnn_layer = Conv1D(filters=64, kernel_size=2, activation='relu')(inputs)
    cnn_layer = MaxPooling1D(pool_size=1)(cnn_layer)
    gru_output = Bidirectional(GRU(gru_units, return_sequences=True))(cnn_layer)
    
    # Apply Self-Attention
    self_attention =AttentionWithImproveRelativePositionEncoding(d_model=gru_units*2, num_heads=num_heads)
    gru_output, _ = self_attention(gru_output, gru_output, gru_output, mask=None)
    
    pool1 = GlobalAveragePooling1D()(gru_output)
    output = Dense(1)(pool1)
    
    return Model(inputs=inputs, outputs=output)


input_shape = (96, 6)
gru_units = 64
num_heads = 8

# Create model
model = PosConv1biGRUWithSelfAttention(input_shape, gru_units, num_heads)
model.compile(optimizer='adam', loss='mse')
model.summary()
Model: "functional_4"
┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┓
┃ Layer (type)         Output Shape          Param #  Connected to      ┃
┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━┩
│ input_layer_4       │ (None, 96, 6)     │          0 │ -                 │
│ (InputLayer)        │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ conv1d_4 (Conv1D)   │ (None, 95, 64)    │        832 │ input_layer_4[0]… │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ max_pooling1d_4     │ (None, 95, 64)    │          0 │ conv1d_4[0][0]    │
│ (MaxPooling1D)      │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ bidirectional_4     │ (None, 95, 128)   │     49,920 │ max_pooling1d_4[ │
│ (Bidirectional)     │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ attention_with_imp… │ [(None, None,     │     66,304 │ bidirectional_4[ │
│ (AttentionWithImpr…128), (None, 8,   │            │ bidirectional_4[ │
│                     │ None, None)]      │            │ bidirectional_4[ │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ global_average_poo… │ (None, 128)       │          0 │ attention_with_i… │
│ (GlobalAveragePool… │                   │            │                   │
├─────────────────────┼───────────────────┼────────────┼───────────────────┤
│ dense_24 (Dense)    │ (None, 1)         │        129 │ global_average_p… │
└─────────────────────┴───────────────────┴────────────┴───────────────────┘
 Total params: 117,185 (457.75 KB)
 Trainable params: 117,185 (457.75 KB)
 Non-trainable params: 0 (0.00 B)
In [61]:
# Compile and train the model
model.compile(optimizer='adam', loss='mean_squared_error')
from keras.callbacks import EarlyStopping, ModelCheckpoint

# 定义早停机制
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='min')

# 拟合模型,并添加早停机制和模型检查点
history = model.fit(train_X, train_y, epochs=100, batch_size=64, validation_data=(val_X, val_y), 
                    callbacks=[early_stopping])
Epoch 1/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 106s 71ms/step - loss: 0.0198 - val_loss: 0.0016
Epoch 2/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 107s 73ms/step - loss: 0.0016 - val_loss: 0.0015
Epoch 3/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 108s 74ms/step - loss: 0.0015 - val_loss: 0.0015
Epoch 4/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 107s 73ms/step - loss: 0.0015 - val_loss: 0.0014
Epoch 5/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 106s 73ms/step - loss: 0.0014 - val_loss: 0.0016
Epoch 6/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 105s 71ms/step - loss: 0.0014 - val_loss: 0.0015
Epoch 7/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 104s 71ms/step - loss: 0.0014 - val_loss: 0.0014
Epoch 8/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 107s 73ms/step - loss: 0.0013 - val_loss: 0.0014
Epoch 9/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 107s 73ms/step - loss: 0.0013 - val_loss: 0.0014
Epoch 10/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 106s 72ms/step - loss: 0.0013 - val_loss: 0.0015
Epoch 11/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 105s 71ms/step - loss: 0.0013 - val_loss: 0.0014
Epoch 12/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 105s 72ms/step - loss: 0.0013 - val_loss: 0.0015
Epoch 13/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 107s 73ms/step - loss: 0.0013 - val_loss: 0.0014
Epoch 14/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 108s 74ms/step - loss: 0.0012 - val_loss: 0.0014
Epoch 15/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 107s 73ms/step - loss: 0.0013 - val_loss: 0.0014
Epoch 16/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 104s 71ms/step - loss: 0.0013 - val_loss: 0.0013
Epoch 17/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 105s 72ms/step - loss: 0.0013 - val_loss: 0.0013
Epoch 18/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 105s 72ms/step - loss: 0.0012 - val_loss: 0.0014
Epoch 19/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 102s 69ms/step - loss: 0.0012 - val_loss: 0.0013
Epoch 20/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 102s 70ms/step - loss: 0.0012 - val_loss: 0.0014
Epoch 21/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 103s 70ms/step - loss: 0.0012 - val_loss: 0.0014
Epoch 22/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 102s 70ms/step - loss: 0.0011 - val_loss: 0.0014
Epoch 23/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 102s 69ms/step - loss: 0.0012 - val_loss: 0.0018
Epoch 24/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 101s 69ms/step - loss: 0.0012 - val_loss: 0.0014
Epoch 25/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 102s 70ms/step - loss: 0.0011 - val_loss: 0.0014
Epoch 26/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 102s 70ms/step - loss: 0.0012 - val_loss: 0.0015
Epoch 27/100
1465/1465 ━━━━━━━━━━━━━━━━━━━━ 97s 66ms/step - loss: 0.0012 - val_loss: 0.0015
In [62]:
# 预测
lstm_pred = model.predict(test_X)
# 将预测结果的形状修改为与原始数据相同的形状
49/49 ━━━━━━━━━━━━━━━━━━━━ 1s 16ms/step
In [63]:
test_y_pre=test_y
In [64]:
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()
plt.show()
No description has been provided for this image
In [65]:
lstm_pred.shape
Out[65]:
(1562, 1)
In [66]:
test_y_pre.shape
Out[66]:
(1562,)
In [67]:
test_y_pre1=test_y_pre.reshape(1562,1)
In [68]:
test_y_pre1
Out[68]:
array([[0.90540195],
       [0.90466702],
       [0.89696645],
       ...,
       [0.        ],
       [0.        ],
       [0.        ]])
In [69]:
results1 = np.broadcast_to(lstm_pred, (1562, 6))
In [70]:
test_y2 = np.broadcast_to(test_y_pre1, (1562, 6))
In [71]:
# 反归一化
inv_forecast_y = scaler.inverse_transform(results1)
inv_test_y = scaler.inverse_transform(test_y2)
In [72]:
inv_test_y
Out[72]:
array([[ 4.08374339e+01,  9.30529669e+01,  1.27546074e+03,
         5.95908965e+02,  2.15485743e+01,  4.67959929e+00],
       [ 4.08001461e+01,  9.29803001e+01,  1.27442567e+03,
         5.95425556e+02,  2.15310831e+01,  4.67580080e+00],
       [ 4.04094426e+01,  9.22188951e+01,  1.26358018e+03,
         5.90360385e+02,  2.13478095e+01,  4.63600016e+00],
       ...,
       [-5.09990072e+00,  3.53003502e+00,  2.91584611e-01,
         3.66558254e-01,  0.00000000e+00,  0.00000000e+00],
       [-5.09990072e+00,  3.53003502e+00,  2.91584611e-01,
         3.66558254e-01,  0.00000000e+00,  0.00000000e+00],
       [-5.09990072e+00,  3.53003502e+00,  2.91584611e-01,
         3.66558254e-01,  0.00000000e+00,  0.00000000e+00]])
In [73]:
# 计算均方根误差
rmse = sqrt(mean_squared_error(inv_test_y[:,5], inv_forecast_y[:,5]))
print('Test RMSE: %.3f' % rmse)
#画图
plt.figure(figsize=(16,8))
plt.plot(inv_test_y[:,5], label='true')
plt.plot(inv_forecast_y[:,5], label='pre')
plt.legend()
plt.show()
Test RMSE: 0.063
No description has been provided for this image
In [80]:
from sklearn.metrics import mean_squared_error, mean_absolute_error  # 评价指标
# 使用sklearn调用衡量线性回归的MSE 、 RMSE、 MAE、r2
from math import sqrt
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
print('mean_squared_error:', mean_squared_error(lstm_pred, test_y_pre))  # mse)
print("mean_absolute_error:", mean_absolute_error(lstm_pred, test_y_pre))  # mae
print("rmse:", sqrt(mean_squared_error(lstm_pred,test_y_pre)))
#r2对比区域
print("r2 score:", r2_score(inv_test_y[:], inv_forecast_y[:]))#预测50天数据
mean_squared_error: 0.00014629570256978046
mean_absolute_error: 0.008445659571024366
rmse: 0.01209527604355438
r2 score: 0.9988370101682903
In [75]:
df1 = pd.DataFrame(inv_test_y[:,5], columns=['column_name'])
In [58]:
# 指定文件路径和文件名保存DataFrame到CSV文件中
df1.to_csv('test.csv', index=False)
In [45]:
df2 = pd.DataFrame(inv_forecast_y[:], columns=['column_name'])
In [46]:
# 指定文件路径和文件名保存DataFrame到CSV文件中
df2.to_csv('forecast.csv', index=False)
In [ ]: