From 7b9191a9a47fe8ae6f27c4fce48fbe8183c99cba Mon Sep 17 00:00:00 2001 From: hanyp Date: Mon, 12 Aug 2024 07:40:16 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=A0=E9=99=A4=20'iceemdan-low-LSTM.ipynb'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- iceemdan-low-LSTM.ipynb | 1060 --------------------------------------- 1 file changed, 1060 deletions(-) delete mode 100644 iceemdan-low-LSTM.ipynb diff --git a/iceemdan-low-LSTM.ipynb b/iceemdan-low-LSTM.ipynb deleted file mode 100644 index 1fbb803..0000000 --- a/iceemdan-low-LSTM.ipynb +++ /dev/null @@ -1,1060 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "C:\\Users\\asus\\AppData\\Roaming\\Python\\Python39\\site-packages\\pandas\\core\\computation\\expressions.py:21: UserWarning: Pandas requires version '2.8.4' or newer of 'numexpr' (version '2.8.3' currently installed).\n", - " from pandas.core.computation.check import NUMEXPR_INSTALLED\n", - "C:\\Users\\asus\\AppData\\Roaming\\Python\\Python39\\site-packages\\pandas\\core\\arrays\\masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed).\n", - " from pandas.core import (\n" - ] - } - ], - "source": [ - "from math import sqrt\n", - "from numpy import concatenate\n", - "from matplotlib import pyplot\n", - "import pandas as pd\n", - "import numpy as np\n", - "from sklearn.preprocessing import MinMaxScaler\n", - "from sklearn.preprocessing import LabelEncoder\n", - "from sklearn.metrics import mean_squared_error\n", - "from tensorflow.keras import Sequential\n", - "\n", - "from tensorflow.keras.layers import Dense\n", - "from tensorflow.keras.layers import LSTM\n", - "from tensorflow.keras.layers import Dropout\n", - "from sklearn.model_selection import train_test_split\n", - "import matplotlib.pyplot as plt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "这段代码是一个函数 time_series_to_supervised,它用于将时间序列数据转换为监督学习问题的数据集。下面是该函数的各个部分的含义:\n", - "\n", - "data: 输入的时间序列数据,可以是列表或2D NumPy数组。\n", - "n_in: 作为输入的滞后观察数,即用多少个时间步的观察值作为输入。默认值为96,表示使用前96个时间步的观察值作为输入。\n", - "n_out: 作为输出的观测数量,即预测多少个时间步的观察值。默认值为10,表示预测未来10个时间步的观察值。\n", - "dropnan: 布尔值,表示是否删除具有NaN值的行。默认为True,即删除具有NaN值的行。\n", - "函数首先检查输入数据的维度,并初始化一些变量。然后,它创建一个新的DataFrame对象 df 来存储输入数据,并保存原始的列名。接着,它创建了两个空列表 cols 和 names,用于存储新的特征列和列名。\n", - "\n", - "接下来,函数开始构建特征列和对应的列名。首先,它将原始的观察序列添加到 cols 列表中,并将其列名添加到 names 列表中。然后,它依次将滞后的观察序列添加到 cols 列表中,并构建相应的列名,格式为 (原始列名)(t-滞后时间)。这样就创建了输入特征的部分。\n", - "\n", - "接着,函数开始构建输出特征的部分。它依次将未来的观察序列添加到 cols 列表中,并构建相应的列名,格式为 (原始列名)(t+未来时间)。\n", - "\n", - "最后,函数将所有的特征列拼接在一起,构成一个新的DataFrame对象 agg。如果 dropnan 参数为True,则删除具有NaN值的行。最后,函数返回处理后的数据集 agg。" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "def time_series_to_supervised(data, n_in=96, n_out=10,dropnan=True):\n", - " \"\"\"\n", - " :param data:作为列表或2D NumPy数组的观察序列。需要。\n", - " :param n_in:作为输入的滞后观察数(X)。值可以在[1..len(数据)]之间可选。默认为1。\n", - " :param n_out:作为输出的观测数量(y)。值可以在[0..len(数据)]之间。可选的。默认为1。\n", - " :param dropnan:Boolean是否删除具有NaN值的行。可选的。默认为True。\n", - " :return:\n", - " \"\"\"\n", - " n_vars = 1 if type(data) is list else data.shape[1]\n", - " df = pd.DataFrame(data)\n", - " origNames = df.columns\n", - " cols, names = list(), list()\n", - " cols.append(df.shift(0))\n", - " names += [('%s' % origNames[j]) for j in range(n_vars)]\n", - " n_in = max(0, n_in)\n", - " for i in range(n_in, 0, -1):\n", - " time = '(t-%d)' % i\n", - " cols.append(df.shift(i))\n", - " names += [('%s%s' % (origNames[j], time)) for j in range(n_vars)]\n", - " n_out = max(n_out, 0)\n", - " for i in range(1, n_out+1):\n", - " time = '(t+%d)' % i\n", - " cols.append(df.shift(-i))\n", - " names += [('%s%s' % (origNames[j], time)) for j in range(n_vars)]\n", - " agg = pd.concat(cols, axis=1)\n", - " agg.columns = names\n", - " if dropnan:\n", - " agg.dropna(inplace=True)\n", - " return agg" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Temp Humidity GHI DHI Rainfall Power\n", - "0 19.779453 40.025826 3.232706 1.690531 0.0 0.0\n", - "1 19.714937 39.605961 3.194991 1.576346 0.0 0.0\n", - "2 19.549330 39.608631 3.070866 1.576157 0.0 0.0\n", - "3 19.405870 39.680702 3.038623 1.482489 0.0 0.0\n", - "4 19.387363 39.319881 2.656474 1.134153 0.0 0.0\n", - "(104256, 6)\n" - ] - } - ], - "source": [ - "# 加载数据\n", - "path1 = r\"D:\\project\\小论文1-基于ICEEMDAN分解的时序高维变化的短期光伏功率预测模型\\CEEMAN-PosConv1dbiLSTM-LSTM\\模型代码流程\\data6.csv\"#数据所在路径\n", - "#我的数据是excel表,若是csv文件用pandas的read_csv()函数替换即可。\n", - "datas1 = pd.DataFrame(pd.read_csv(path1))\n", - "#我只取了data表里的第3、23、16、17、18、19、20、21、27列,如果取全部列的话这一行可以去掉\n", - "# data1 = datas1.iloc[:,np.r_[3,23,16:22,27]]\n", - "data1=datas1.interpolate()\n", - "values1 = data1.values\n", - "print(data1.head())\n", - "print(data1.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "# data2= data1.drop(['date'], axis = 1)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "# # 获取重构的原始数据\n", - "# # 获取重构的原始数据\n", - "# # 获取重构的原始数据\n", - "path_re = r\"D:\\project\\小论文1-基于ICEEMDAN分解的时序高维变化的短期光伏功率预测模型\\CEEMAN-PosConv1dbiLSTM-LSTM\\模型代码流程\\完整的模型代码流程\\iceemdan_reconstructed_data_low.csv\"#数据所在路径\n", - "# #我的数据是excel表,若是csv文件用pandas的read_csv()函数替换即可。\n", - "data_re = pd.DataFrame(pd.read_csv(path_re))" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
column_name
01.426824
11.426819
21.426815
31.426812
41.426810
......
1042511.629381
1042521.629328
1042531.629271
1042541.629213
1042551.629152
\n", - "

104256 rows × 1 columns

\n", - "
" - ], - "text/plain": [ - " column_name\n", - "0 1.426824\n", - "1 1.426819\n", - "2 1.426815\n", - "3 1.426812\n", - "4 1.426810\n", - "... ...\n", - "104251 1.629381\n", - "104252 1.629328\n", - "104253 1.629271\n", - "104254 1.629213\n", - "104255 1.629152\n", - "\n", - "[104256 rows x 1 columns]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data_re" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "# # 假设你已经有了原始数据和重构数据\n", - "# # 原始数据\n", - "original_data = data1['Power'].values\n", - "\n", - "# # 创建时间序列(假设时间序列与数据对应)\n", - "time = range(len(original_data))\n", - "\n", - "# # 创建画布和子图\n", - "plt.figure(figsize=(10, 6))\n", - "\n", - "# # 绘制原始数据\n", - "# plt.plot(time, original_data, label='Original Data', color='blue')\n", - "\n", - "# # 绘制重构数据\n", - "plt.plot( data_re[:], label='Reconstructed Data', color='red')\n", - "\n", - "# # 添加标题和标签\n", - "plt.title('Comparison between Original and reconstructed_data_high')\n", - "plt.xlabel('Time')\n", - "plt.ylabel('Power')\n", - "plt.legend()\n", - "\n", - "# # 显示图形\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], - "source": [ - "data3=data1.iloc[:,:5]" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Temp Humidity GHI DHI Rainfall column_name\n", - "0 19.779453 40.025826 3.232706 1.690531 0.0 1.426824\n", - "1 19.714937 39.605961 3.194991 1.576346 0.0 1.426819\n", - "2 19.549330 39.608631 3.070866 1.576157 0.0 1.426815\n", - "3 19.405870 39.680702 3.038623 1.482489 0.0 1.426812\n", - "4 19.387363 39.319881 2.656474 1.134153 0.0 1.426810\n", - "... ... ... ... ... ... ...\n", - "104251 13.303740 34.212711 1.210789 0.787026 0.0 1.629381\n", - "104252 13.120920 34.394939 2.142980 1.582670 0.0 1.629328\n", - "104253 12.879215 35.167400 1.926214 1.545889 0.0 1.629271\n", - "104254 12.915867 35.359989 1.317695 0.851529 0.0 1.629213\n", - "104255 13.134816 34.500034 1.043269 0.597816 0.0 1.629152\n", - "\n", - "[104256 rows x 6 columns]\n" - ] - } - ], - "source": [ - "import pandas as pd\n", - "\n", - "# # 创建data3和imf1_array对应的DataFrame\n", - "data3_df = pd.DataFrame(data3)\n", - "imf1_df = pd.DataFrame(data_re)\n", - "\n", - "# # 合并data3_df和imf1_df\n", - "merged_df = pd.concat([data3_df, imf1_df], axis=1)\n", - "\n", - "# # 设置行数为35040行\n", - "merged_df = merged_df.iloc[:104256]\n", - "\n", - "# # 打印合并后的表\n", - "print(merged_df)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(104256, 6)" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "merged_df.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(104256, 6)\n" - ] - } - ], - "source": [ - "# 使用MinMaxScaler进行归一化\n", - "scaler = MinMaxScaler(feature_range=(0, 1))\n", - "scaledData1 = scaler.fit_transform(merged_df)\n", - "print(scaledData1.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " 0 1 2 3 4 5 0(t-96) \\\n", - "96 0.555631 0.349673 0.190042 0.040558 0.0 0.836699 0.490360 \n", - "97 0.564819 0.315350 0.211335 0.044613 0.0 0.836762 0.489088 \n", - "98 0.576854 0.288321 0.229657 0.047549 0.0 0.836826 0.485824 \n", - "99 0.581973 0.268243 0.247775 0.053347 0.0 0.836891 0.482997 \n", - "100 0.586026 0.264586 0.266058 0.057351 0.0 0.836956 0.482632 \n", - "\n", - " 1(t-96) 2(t-96) 3(t-96) ... 2(t-1) 3(t-1) 4(t-1) 5(t-1) \\\n", - "96 0.369105 0.002088 0.002013 ... 0.166009 0.036794 0.0 0.836635 \n", - "97 0.364859 0.002061 0.001839 ... 0.190042 0.040558 0.0 0.836699 \n", - "98 0.364886 0.001973 0.001839 ... 0.211335 0.044613 0.0 0.836762 \n", - "99 0.365615 0.001950 0.001697 ... 0.229657 0.047549 0.0 0.836826 \n", - "100 0.361965 0.001679 0.001167 ... 0.247775 0.053347 0.0 0.836891 \n", - "\n", - " 0(t+1) 1(t+1) 2(t+1) 3(t+1) 4(t+1) 5(t+1) \n", - "96 0.564819 0.315350 0.211335 0.044613 0.0 0.836762 \n", - "97 0.576854 0.288321 0.229657 0.047549 0.0 0.836826 \n", - "98 0.581973 0.268243 0.247775 0.053347 0.0 0.836891 \n", - "99 0.586026 0.264586 0.266058 0.057351 0.0 0.836956 \n", - "100 0.590772 0.258790 0.282900 0.060958 0.0 0.837022 \n", - "\n", - "[5 rows x 588 columns]\n" - ] - } - ], - "source": [ - "n_steps_in =96 #历史时间长度\n", - "n_steps_out=1#预测时间长度\n", - "processedData1 = time_series_to_supervised(scaledData1,n_steps_in,n_steps_out)\n", - "print(processedData1.head())" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "# processedData1.to_csv('processedData1.csv', index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [], - "source": [ - "data_x = processedData1.loc[:,'0(t-96)':'5(t-1)']\n", - "data_y = processedData1.loc[:,'5']" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(104159, 576)" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data_x.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "96 0.836699\n", - "97 0.836762\n", - "98 0.836826\n", - "99 0.836891\n", - "100 0.836956\n", - " ... \n", - "104250 0.989547\n", - "104251 0.989508\n", - "104252 0.989466\n", - "104253 0.989423\n", - "104254 0.989378\n", - "Name: 5, Length: 104159, dtype: float64" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data_y" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(104159,)" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data_y.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(83328, 96, 6) (83328,) (20831, 96, 6) (20831,)\n" - ] - } - ], - "source": [ - "# 7.划分训练集和测试集\n", - "\n", - "test_size = int(len(data_x) * 0.2)\n", - "# 计算训练集和测试集的索引范围\n", - "train_indices = range(len(data_x) - test_size)\n", - "test_indices = range(len(data_x) - test_size, len(data_x))\n", - "\n", - "# 根据索引范围划分数据集\n", - "train_X1 = data_x.iloc[train_indices].values.reshape((-1, n_steps_in, scaledData1.shape[1]))\n", - "test_X1 = data_x.iloc[test_indices].values.reshape((-1, n_steps_in, scaledData1.shape[1]))\n", - "train_y = data_y.iloc[train_indices].values\n", - "test_y = data_y.iloc[test_indices].values\n", - "\n", - "\n", - "# # 多次运行代码时希望得到相同的数据分割,可以设置 random_state 参数为一个固定的整数值\n", - "# train_X1,test_X1, train_y, test_y = train_test_split(data_x.values, data_y.values, test_size=0.2, random_state=343)\n", - "# reshape input to be 3D [samples, timesteps, features]\n", - "train_X = train_X1.reshape((train_X1.shape[0], n_steps_in, scaledData1.shape[1]))\n", - "test_X = test_X1.reshape((test_X1.shape[0], n_steps_in,scaledData1.shape[1]))\n", - "print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)\n", - "# 使用train_test_split函数划分训练集和测试集,测试集的比重是40%。\n", - "# 然后将train_X1、test_X1进行一个升维,变成三维,维数分别是[samples,timesteps,features]。\n", - "# 打印一下他们的shape:\\\n" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(83328, 96, 6)" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "train_X1.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "d:\\Anaconda3\\lib\\site-packages\\keras\\src\\layers\\rnn\\rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.\n", - " super().__init__(**kwargs)\n" - ] - }, - { - "data": { - "text/html": [ - "
Model: \"sequential\"\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1mModel: \"sequential\"\u001b[0m\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
-       "┃ Layer (type)                     Output Shape                  Param # ┃\n",
-       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
-       "│ lstm (LSTM)                     │ (None, 128)            │        69,120 │\n",
-       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
-       "│ dense (Dense)                   │ (None, 1)              │           129 │\n",
-       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
-       "
\n" - ], - "text/plain": [ - "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n", - "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n", - "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n", - "│ lstm (\u001b[38;5;33mLSTM\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m69,120\u001b[0m │\n", - "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", - "│ dense (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m) │ \u001b[38;5;34m129\u001b[0m │\n", - "└─────────────────────────────────┴────────────────────────┴───────────────┘\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
 Total params: 69,249 (270.50 KB)\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m69,249\u001b[0m (270.50 KB)\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
 Trainable params: 69,249 (270.50 KB)\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m69,249\u001b[0m (270.50 KB)\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
 Non-trainable params: 0 (0.00 B)\n",
-       "
\n" - ], - "text/plain": [ - "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from keras.layers import GRU, Bidirectional\n", - "from keras.models import Model\n", - "from keras.layers import Input, Conv1D, MaxPooling1D, LSTM, Dense, Attention, Flatten\n", - "import keras\n", - "from keras.models import Sequential\n", - "from keras.layers import LSTM, Dense\n", - "\n", - "# 创建模型\n", - "model = Sequential()\n", - "\n", - "# 添加单层 LSTM\n", - "model.add(LSTM(units=128, input_shape=(96, 6)))\n", - "\n", - "# 添加输出层\n", - "model.add(Dense(1))\n", - "\n", - "# 编译模型\n", - "model.compile(optimizer='adam', loss='mean_squared_error')\n", - "\n", - "# 查看模型结构\n", - "model.summary()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m122s\u001b[0m 92ms/step - loss: 0.0156 - val_loss: 1.0318e-05\n", - "Epoch 2/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m110s\u001b[0m 85ms/step - loss: 1.2280e-05 - val_loss: 2.9811e-06\n", - "Epoch 3/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m114s\u001b[0m 87ms/step - loss: 9.1935e-06 - val_loss: 2.5579e-06\n", - "Epoch 4/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m128s\u001b[0m 98ms/step - loss: 1.0443e-05 - val_loss: 8.4623e-06\n", - "Epoch 5/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m118s\u001b[0m 90ms/step - loss: 1.1108e-05 - val_loss: 8.1167e-06\n", - "Epoch 6/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m111s\u001b[0m 85ms/step - loss: 5.3451e-06 - val_loss: 2.4689e-06\n", - "Epoch 7/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m117s\u001b[0m 90ms/step - loss: 1.5962e-05 - val_loss: 2.2134e-06\n", - "Epoch 8/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m124s\u001b[0m 95ms/step - loss: 5.3290e-06 - val_loss: 3.5285e-07\n", - "Epoch 9/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m126s\u001b[0m 97ms/step - loss: 4.5184e-06 - val_loss: 1.2596e-07\n", - "Epoch 10/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m128s\u001b[0m 98ms/step - loss: 1.6976e-06 - val_loss: 7.1095e-06\n", - "Epoch 11/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m123s\u001b[0m 95ms/step - loss: 6.6386e-06 - val_loss: 1.0392e-07\n", - "Epoch 12/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m126s\u001b[0m 97ms/step - loss: 2.3165e-06 - val_loss: 8.4822e-07\n", - "Epoch 13/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m120s\u001b[0m 92ms/step - loss: 3.5823e-06 - val_loss: 4.9285e-08\n", - "Epoch 14/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m117s\u001b[0m 90ms/step - loss: 3.1791e-06 - val_loss: 2.2294e-07\n", - "Epoch 15/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m124s\u001b[0m 95ms/step - loss: 2.9977e-06 - val_loss: 3.9852e-06\n", - "Epoch 16/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m128s\u001b[0m 98ms/step - loss: 2.3874e-06 - val_loss: 1.3594e-07\n", - "Epoch 17/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m135s\u001b[0m 103ms/step - loss: 3.1801e-07 - val_loss: 1.6932e-07\n", - "Epoch 18/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m128s\u001b[0m 98ms/step - loss: 1.5647e-06 - val_loss: 2.1397e-08\n", - "Epoch 19/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m135s\u001b[0m 104ms/step - loss: 1.4188e-06 - val_loss: 1.4569e-07\n", - "Epoch 20/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m128s\u001b[0m 99ms/step - loss: 1.1043e-06 - val_loss: 5.9704e-07\n", - "Epoch 21/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m135s\u001b[0m 103ms/step - loss: 2.0067e-06 - val_loss: 2.0218e-06\n", - "Epoch 22/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m145s\u001b[0m 111ms/step - loss: 1.9982e-06 - val_loss: 2.2618e-07\n", - "Epoch 23/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m136s\u001b[0m 104ms/step - loss: 1.4178e-06 - val_loss: 1.3009e-06\n", - "Epoch 24/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m130s\u001b[0m 100ms/step - loss: 2.7170e-06 - val_loss: 1.2247e-08\n", - "Epoch 25/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m124s\u001b[0m 95ms/step - loss: 1.8664e-06 - val_loss: 5.6499e-07\n", - "Epoch 26/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m89s\u001b[0m 68ms/step - loss: 1.3434e-06 - val_loss: 1.2509e-08\n", - "Epoch 27/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m85s\u001b[0m 65ms/step - loss: 1.8632e-06 - val_loss: 5.3179e-07\n", - "Epoch 28/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 63ms/step - loss: 1.2746e-06 - val_loss: 9.0354e-08\n", - "Epoch 29/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m83s\u001b[0m 63ms/step - loss: 1.5440e-06 - val_loss: 1.2604e-07\n", - "Epoch 30/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m88s\u001b[0m 68ms/step - loss: 1.2646e-06 - val_loss: 2.5639e-07\n", - "Epoch 31/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m81s\u001b[0m 62ms/step - loss: 1.3377e-06 - val_loss: 4.0479e-08\n", - "Epoch 32/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m48s\u001b[0m 37ms/step - loss: 7.9140e-07 - val_loss: 1.1824e-06\n", - "Epoch 33/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m48s\u001b[0m 37ms/step - loss: 2.1865e-06 - val_loss: 4.2140e-06\n", - "Epoch 34/100\n", - "\u001b[1m1302/1302\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m51s\u001b[0m 39ms/step - loss: 1.4884e-06 - val_loss: 1.8359e-06\n", - "\u001b[1m651/651\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m10s\u001b[0m 16ms/step\n" - ] - } - ], - "source": [ - "# Compile and train the model\n", - "model.compile(optimizer='adam', loss='mean_squared_error')\n", - "from keras.callbacks import EarlyStopping, ModelCheckpoint\n", - "\n", - "# 定义早停机制\n", - "early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=0, mode='min')\n", - "\n", - "# 拟合模型,并添加早停机制和模型检查点\n", - "history = model.fit(train_X, train_y, epochs=100, batch_size=64, validation_data=(test_X, test_y), \n", - " callbacks=[early_stopping])\n", - "# 预测\n", - "lstm_pred = model.predict(test_X)\n", - "# 将预测结果的形状修改为与原始数据相同的形状" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(20831, 1)" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "lstm_pred.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(20831,)" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test_y.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [], - "source": [ - "test_y1=test_y.reshape(20831,1)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[0.65620206],\n", - " [0.6565139 ],\n", - " [0.65682633],\n", - " ...,\n", - " [0.98946626],\n", - " [0.98942303],\n", - " [0.98937795]])" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "test_y1" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "results1 = np.broadcast_to(lstm_pred, (20831, 6))" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [], - "source": [ - "test_y2 = np.broadcast_to(test_y1, (20831, 6))" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "metadata": {}, - "outputs": [], - "source": [ - "# 反归一化\n", - "inv_forecast_y = scaler.inverse_transform(results1)\n", - "inv_test_y = scaler.inverse_transform(test_y2)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([[2.81937911e+01, 6.84129659e+01, 9.24487326e+02, 4.31993807e+02,\n", - " 1.56176147e+01, 1.19628092e+00],\n", - " [2.82096130e+01, 6.84437997e+01, 9.24926524e+02, 4.32198926e+02,\n", - " 1.56250366e+01, 1.19668613e+00],\n", - " [2.82254649e+01, 6.84746920e+01, 9.25366555e+02, 4.32404434e+02,\n", - " 1.56324725e+01, 1.19709211e+00],\n", - " ...,\n", - " [4.51026009e+01, 1.01364948e+02, 1.39385702e+03, 6.51203592e+02,\n", - " 2.35493057e+01, 1.62932764e+00],\n", - " [4.51004072e+01, 1.01360673e+02, 1.39379613e+03, 6.51175153e+02,\n", - " 2.35482767e+01, 1.62927146e+00],\n", - " [4.50981204e+01, 1.01356216e+02, 1.39373265e+03, 6.51145506e+02,\n", - " 2.35472040e+01, 1.62921289e+00]])" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "inv_test_y" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Test RMSE: 0.002\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "# 计算均方根误差\n", - "rmse = sqrt(mean_squared_error(inv_test_y[:,5], inv_forecast_y[:,5]))\n", - "print('Test RMSE: %.3f' % rmse)\n", - "#画图\n", - "plt.figure(figsize=(16,8))\n", - "plt.plot(inv_test_y[:,5], label='true')\n", - "plt.plot(inv_forecast_y[:,5], label='pre')\n", - "plt.legend()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "mean_squared_error: 1.8358609523038586e-06\n", - "mean_absolute_error: 0.0012240899816947145\n", - "rmse: 0.0013549394644425479\n", - "r2 score: 0.9998451201868883\n" - ] - } - ], - "source": [ - "from sklearn.metrics import mean_squared_error, mean_absolute_error # 评价指标\n", - "# 使用sklearn调用衡量线性回归的MSE 、 RMSE、 MAE、r2\n", - "from math import sqrt\n", - "from sklearn.metrics import mean_absolute_error\n", - "from sklearn.metrics import mean_squared_error\n", - "from sklearn.metrics import r2_score\n", - "print('mean_squared_error:', mean_squared_error(lstm_pred, test_y)) # mse)\n", - "print(\"mean_absolute_error:\", mean_absolute_error(lstm_pred, test_y)) # mae\n", - "print(\"rmse:\", sqrt(mean_squared_error(lstm_pred,test_y)))\n", - "print(\"r2 score:\", r2_score(inv_test_y[:], inv_forecast_y[:]))" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [], - "source": [ - "df1 = pd.DataFrame(inv_test_y[:,5], columns=['column_name'])" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "metadata": {}, - "outputs": [], - "source": [ - "# 指定文件路径和文件名,保存DataFrame到CSV文件中\n", - "df1.to_csv('低频_test.csv', index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "metadata": {}, - "outputs": [], - "source": [ - "df2 = pd.DataFrame(inv_forecast_y[:,5], columns=['column_name'])" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": {}, - "outputs": [], - "source": [ - "# 指定文件路径和文件名,保存DataFrame到CSV文件中\n", - "df2.to_csv('低频_forecast.csv', index=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}