647 KiB
647 KiB
In [1]:
from math import sqrt from numpy import concatenate from matplotlib import pyplot import pandas as pd import numpy as np from sklearn.preprocessing import MinMaxScaler from sklearn.preprocessing import LabelEncoder from sklearn.metrics import mean_squared_error from tensorflow.keras import Sequential from tensorflow.keras.layers import Dense from tensorflow.keras.layers import LSTM from tensorflow.keras.layers import Dropout from sklearn.model_selection import train_test_split import matplotlib.pyplot as plt
C:\Users\asus\AppData\Roaming\Python\Python39\site-packages\pandas\core\computation\expressions.py:21: UserWarning: Pandas requires version '2.8.4' or newer of 'numexpr' (version '2.8.3' currently installed). from pandas.core.computation.check import NUMEXPR_INSTALLED C:\Users\asus\AppData\Roaming\Python\Python39\site-packages\pandas\core\arrays\masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed). from pandas.core import (
In [2]:
data=pd.read_csv(r'D:\project\小论文1-基于ICEEMDAN分解的时序高维变化的短期光伏功率预测模型\CEEMAN-PosConv1dbiLSTM-LSTM\对比模型\58-Site_DKA-M17_C-Phase.csv')
In [3]:
data
Out[3]:
timestamp | Active_Energy_Delivered_Received | Current_Phase_Average | Active_Power | Performance_Ratio | Wind_Speed | Weather_Temperature_Celsius | Weather_Relative_Humidity | Global_Horizontal_Radiation | Diffuse_Horizontal_Radiation | Wind_Direction | Weather_Daily_Rainfall | Radiation_Global_Tilted | Radiation_Diffuse_Tilted | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2010-02-18 14:35:00 | 0.0 | 0.000000 | 0.000000 | NaN | 6.793873 | 35.132046 | 13.933495 | 1000.515625 | 97.682610 | 126.266418 | 0.0 | NaN | NaN |
1 | 2010-02-18 14:40:00 | 0.0 | 0.000000 | 0.000000 | NaN | 6.926013 | 34.586330 | 14.363612 | 989.110413 | 102.564949 | 116.272385 | 0.0 | NaN | NaN |
2 | 2010-02-18 14:45:00 | 0.0 | 0.000000 | 0.000000 | NaN | 6.824874 | 34.628662 | 13.933328 | 977.882629 | 102.709160 | 141.693970 | 0.0 | NaN | NaN |
3 | 2010-02-18 14:50:00 | 0.0 | 0.000000 | 0.000000 | NaN | 5.291194 | 35.258572 | 13.457552 | 963.508484 | 100.324097 | 130.381912 | 0.0 | NaN | NaN |
4 | 2010-02-18 14:55:00 | 0.0 | 0.000000 | 0.000000 | NaN | 6.065388 | 35.220058 | 13.886837 | 939.744995 | 105.697617 | 126.441544 | 0.0 | NaN | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1502295 | 2024-07-17 12:20:00 | 83264.0 | 17.695337 | 4.331866 | 87.961319 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 947.065369 | 144.291245 |
1502296 | 2024-07-17 12:25:00 | 83265.0 | 17.795330 | 4.350333 | 90.215775 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 927.335022 | 131.287155 |
1502297 | 2024-07-17 12:30:00 | 83265.0 | 17.962000 | 4.386533 | 94.664726 | NaN | 14.208377 | 33.600403 | 823.925476 | 83.903313 | 19.253492 | 0.0 | 891.106995 | 126.447548 |
1502298 | 2024-07-17 12:35:00 | 83265.0 | 17.877998 | 4.375267 | 93.486641 | NaN | 14.223358 | 33.683571 | 817.790710 | 76.371666 | 19.294001 | 0.0 | 900.018799 | 123.445114 |
1502299 | 2024-07-17 12:40:00 | 83266.0 | 17.829998 | 4.369600 | 90.526978 | NaN | 14.428312 | 32.949017 | 820.284790 | 74.797913 | 19.167789 | 0.0 | 928.239990 | 123.938103 |
1502300 rows × 14 columns
In [6]:
#只要2018.4.1-2019.4.1一年的数据 data2=data.iloc[853133:958253, :]
In [7]:
data2
Out[7]:
timestamp | Active_Energy_Delivered_Received | Current_Phase_Average | Active_Power | Performance_Ratio | Wind_Speed | Weather_Temperature_Celsius | Weather_Relative_Humidity | Global_Horizontal_Radiation | Diffuse_Horizontal_Radiation | Wind_Direction | Weather_Daily_Rainfall | Radiation_Global_Tilted | Radiation_Diffuse_Tilted | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
853133 | 2018-04-01 00:00:00 | 18104.0 | 0.997333 | 0.0 | 0.0 | NaN | 19.779453 | 40.025826 | 3.232706 | 1.690531 | 64.372742 | 0.0 | 3.565593 | 0.742383 |
853134 | 2018-04-01 00:05:00 | 18104.0 | 0.997333 | 0.0 | 0.0 | NaN | 19.714937 | 39.605961 | 3.194991 | 1.576346 | 65.954178 | 0.0 | 3.469451 | 0.663080 |
853135 | 2018-04-01 00:10:00 | 18104.0 | 0.996000 | 0.0 | 0.0 | NaN | 19.549330 | 39.608631 | 3.070866 | 1.576157 | 65.347725 | 0.0 | 3.354114 | 0.540446 |
853136 | 2018-04-01 00:15:00 | 18104.0 | 1.000000 | 0.0 | 0.0 | NaN | 19.405870 | 39.680702 | 3.038623 | 1.482489 | 67.103271 | 0.0 | 3.365968 | 0.597973 |
853137 | 2018-04-01 00:20:00 | 18104.0 | 1.000000 | 0.0 | 0.0 | NaN | 19.387363 | 39.319881 | 2.656474 | 1.134153 | 66.430733 | 0.0 | 3.222809 | 0.530707 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
958248 | 2019-03-31 23:35:00 | 29021.0 | 0.991333 | 0.0 | 0.0 | NaN | 13.303740 | 34.212711 | 1.210789 | 0.787026 | 34.165325 | 0.0 | 3.271109 | 0.476681 |
958249 | 2019-03-31 23:40:00 | 29021.0 | 0.995333 | 0.0 | 0.0 | NaN | 13.120920 | 34.394939 | 2.142980 | 1.582670 | 34.202522 | 0.0 | 3.163039 | 0.444219 |
958250 | 2019-03-31 23:45:00 | 29021.0 | 0.995333 | 0.0 | 0.0 | NaN | 12.879215 | 35.167400 | 1.926214 | 1.545889 | 34.233902 | 0.0 | 3.197096 | 0.475794 |
958251 | 2019-03-31 23:50:00 | 29021.0 | 0.999333 | 0.0 | 0.0 | NaN | 12.915867 | 35.359989 | 1.317695 | 0.851529 | 34.308563 | 0.0 | 2.873335 | 0.320598 |
958252 | 2019-03-31 23:55:00 | 29021.0 | 1.000000 | 0.0 | 0.0 | NaN | 13.134816 | 34.500034 | 1.043269 | 0.597816 | 34.228458 | 0.0 | 2.947993 | 0.294085 |
105120 rows × 14 columns
In [8]:
data2.plot(legend=True, subplots=True, figsize=(12, 8))
Out[8]:
array([<Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >], dtype=object)
In [9]:
data2.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 105120 entries, 853133 to 958252 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 timestamp 105120 non-null object 1 Active_Energy_Delivered_Received 104221 non-null float64 2 Current_Phase_Average 104221 non-null float64 3 Active_Power 104221 non-null float64 4 Performance_Ratio 104221 non-null float64 5 Wind_Speed 0 non-null float64 6 Weather_Temperature_Celsius 105120 non-null float64 7 Weather_Relative_Humidity 105120 non-null float64 8 Global_Horizontal_Radiation 105120 non-null float64 9 Diffuse_Horizontal_Radiation 105120 non-null float64 10 Wind_Direction 105120 non-null float64 11 Weather_Daily_Rainfall 105120 non-null float64 12 Radiation_Global_Tilted 103998 non-null float64 13 Radiation_Diffuse_Tilted 103998 non-null float64 dtypes: float64(13), object(1) memory usage: 11.2+ MB
In [10]:
for dataset in [data2]: dataset.columns=['time','AE_Power','Current','Power','PR','Wind_speed','Temp','Humidity','GHI','DHI','Wind_dir','Rainfall','RGT','RDT']
In [11]:
data2
Out[11]:
time | AE_Power | Current | Power | PR | Wind_speed | Temp | Humidity | GHI | DHI | Wind_dir | Rainfall | RGT | RDT | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
853133 | 2018-04-01 00:00:00 | 18104.0 | 0.997333 | 0.0 | 0.0 | NaN | 19.779453 | 40.025826 | 3.232706 | 1.690531 | 64.372742 | 0.0 | 3.565593 | 0.742383 |
853134 | 2018-04-01 00:05:00 | 18104.0 | 0.997333 | 0.0 | 0.0 | NaN | 19.714937 | 39.605961 | 3.194991 | 1.576346 | 65.954178 | 0.0 | 3.469451 | 0.663080 |
853135 | 2018-04-01 00:10:00 | 18104.0 | 0.996000 | 0.0 | 0.0 | NaN | 19.549330 | 39.608631 | 3.070866 | 1.576157 | 65.347725 | 0.0 | 3.354114 | 0.540446 |
853136 | 2018-04-01 00:15:00 | 18104.0 | 1.000000 | 0.0 | 0.0 | NaN | 19.405870 | 39.680702 | 3.038623 | 1.482489 | 67.103271 | 0.0 | 3.365968 | 0.597973 |
853137 | 2018-04-01 00:20:00 | 18104.0 | 1.000000 | 0.0 | 0.0 | NaN | 19.387363 | 39.319881 | 2.656474 | 1.134153 | 66.430733 | 0.0 | 3.222809 | 0.530707 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
958248 | 2019-03-31 23:35:00 | 29021.0 | 0.991333 | 0.0 | 0.0 | NaN | 13.303740 | 34.212711 | 1.210789 | 0.787026 | 34.165325 | 0.0 | 3.271109 | 0.476681 |
958249 | 2019-03-31 23:40:00 | 29021.0 | 0.995333 | 0.0 | 0.0 | NaN | 13.120920 | 34.394939 | 2.142980 | 1.582670 | 34.202522 | 0.0 | 3.163039 | 0.444219 |
958250 | 2019-03-31 23:45:00 | 29021.0 | 0.995333 | 0.0 | 0.0 | NaN | 12.879215 | 35.167400 | 1.926214 | 1.545889 | 34.233902 | 0.0 | 3.197096 | 0.475794 |
958251 | 2019-03-31 23:50:00 | 29021.0 | 0.999333 | 0.0 | 0.0 | NaN | 12.915867 | 35.359989 | 1.317695 | 0.851529 | 34.308563 | 0.0 | 2.873335 | 0.320598 |
958252 | 2019-03-31 23:55:00 | 29021.0 | 1.000000 | 0.0 | 0.0 | NaN | 13.134816 | 34.500034 | 1.043269 | 0.597816 | 34.228458 | 0.0 | 2.947993 | 0.294085 |
105120 rows × 14 columns
In [12]:
df = pd.DataFrame(data2) # 将'Power'列移到最后一列 columns = df.columns.tolist() # 获取列名列表 columns.remove('Power') # 移除'Power'列 columns.append('Power') # 将'Power'列添加到列名列表的末尾 # 使用重新排列后的列名重新构建DataFrame df = df[columns] # 打印结果,确认'Power'列已经移到最后 print(df)
time AE_Power Current PR Wind_speed Temp \ 853133 2018-04-01 00:00:00 18104.0 0.997333 0.0 NaN 19.779453 853134 2018-04-01 00:05:00 18104.0 0.997333 0.0 NaN 19.714937 853135 2018-04-01 00:10:00 18104.0 0.996000 0.0 NaN 19.549330 853136 2018-04-01 00:15:00 18104.0 1.000000 0.0 NaN 19.405870 853137 2018-04-01 00:20:00 18104.0 1.000000 0.0 NaN 19.387363 ... ... ... ... ... ... ... 958248 2019-03-31 23:35:00 29021.0 0.991333 0.0 NaN 13.303740 958249 2019-03-31 23:40:00 29021.0 0.995333 0.0 NaN 13.120920 958250 2019-03-31 23:45:00 29021.0 0.995333 0.0 NaN 12.879215 958251 2019-03-31 23:50:00 29021.0 0.999333 0.0 NaN 12.915867 958252 2019-03-31 23:55:00 29021.0 1.000000 0.0 NaN 13.134816 Humidity GHI DHI Wind_dir Rainfall RGT \ 853133 40.025826 3.232706 1.690531 64.372742 0.0 3.565593 853134 39.605961 3.194991 1.576346 65.954178 0.0 3.469451 853135 39.608631 3.070866 1.576157 65.347725 0.0 3.354114 853136 39.680702 3.038623 1.482489 67.103271 0.0 3.365968 853137 39.319881 2.656474 1.134153 66.430733 0.0 3.222809 ... ... ... ... ... ... ... 958248 34.212711 1.210789 0.787026 34.165325 0.0 3.271109 958249 34.394939 2.142980 1.582670 34.202522 0.0 3.163039 958250 35.167400 1.926214 1.545889 34.233902 0.0 3.197096 958251 35.359989 1.317695 0.851529 34.308563 0.0 2.873335 958252 34.500034 1.043269 0.597816 34.228458 0.0 2.947993 RDT Power 853133 0.742383 0.0 853134 0.663080 0.0 853135 0.540446 0.0 853136 0.597973 0.0 853137 0.530707 0.0 ... ... ... 958248 0.476681 0.0 958249 0.444219 0.0 958250 0.475794 0.0 958251 0.320598 0.0 958252 0.294085 0.0 [105120 rows x 14 columns]
In [13]:
df
Out[13]:
time | AE_Power | Current | PR | Wind_speed | Temp | Humidity | GHI | DHI | Wind_dir | Rainfall | RGT | RDT | Power | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
853133 | 2018-04-01 00:00:00 | 18104.0 | 0.997333 | 0.0 | NaN | 19.779453 | 40.025826 | 3.232706 | 1.690531 | 64.372742 | 0.0 | 3.565593 | 0.742383 | 0.0 |
853134 | 2018-04-01 00:05:00 | 18104.0 | 0.997333 | 0.0 | NaN | 19.714937 | 39.605961 | 3.194991 | 1.576346 | 65.954178 | 0.0 | 3.469451 | 0.663080 | 0.0 |
853135 | 2018-04-01 00:10:00 | 18104.0 | 0.996000 | 0.0 | NaN | 19.549330 | 39.608631 | 3.070866 | 1.576157 | 65.347725 | 0.0 | 3.354114 | 0.540446 | 0.0 |
853136 | 2018-04-01 00:15:00 | 18104.0 | 1.000000 | 0.0 | NaN | 19.405870 | 39.680702 | 3.038623 | 1.482489 | 67.103271 | 0.0 | 3.365968 | 0.597973 | 0.0 |
853137 | 2018-04-01 00:20:00 | 18104.0 | 1.000000 | 0.0 | NaN | 19.387363 | 39.319881 | 2.656474 | 1.134153 | 66.430733 | 0.0 | 3.222809 | 0.530707 | 0.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
958248 | 2019-03-31 23:35:00 | 29021.0 | 0.991333 | 0.0 | NaN | 13.303740 | 34.212711 | 1.210789 | 0.787026 | 34.165325 | 0.0 | 3.271109 | 0.476681 | 0.0 |
958249 | 2019-03-31 23:40:00 | 29021.0 | 0.995333 | 0.0 | NaN | 13.120920 | 34.394939 | 2.142980 | 1.582670 | 34.202522 | 0.0 | 3.163039 | 0.444219 | 0.0 |
958250 | 2019-03-31 23:45:00 | 29021.0 | 0.995333 | 0.0 | NaN | 12.879215 | 35.167400 | 1.926214 | 1.545889 | 34.233902 | 0.0 | 3.197096 | 0.475794 | 0.0 |
958251 | 2019-03-31 23:50:00 | 29021.0 | 0.999333 | 0.0 | NaN | 12.915867 | 35.359989 | 1.317695 | 0.851529 | 34.308563 | 0.0 | 2.873335 | 0.320598 | 0.0 |
958252 | 2019-03-31 23:55:00 | 29021.0 | 1.000000 | 0.0 | NaN | 13.134816 | 34.500034 | 1.043269 | 0.597816 | 34.228458 | 0.0 | 2.947993 | 0.294085 | 0.0 |
105120 rows × 14 columns
In [14]:
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 105120 entries, 853133 to 958252 Data columns (total 14 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 time 105120 non-null object 1 AE_Power 104221 non-null float64 2 Current 104221 non-null float64 3 PR 104221 non-null float64 4 Wind_speed 0 non-null float64 5 Temp 105120 non-null float64 6 Humidity 105120 non-null float64 7 GHI 105120 non-null float64 8 DHI 105120 non-null float64 9 Wind_dir 105120 non-null float64 10 Rainfall 105120 non-null float64 11 RGT 103998 non-null float64 12 RDT 103998 non-null float64 13 Power 104221 non-null float64 dtypes: float64(13), object(1) memory usage: 11.2+ MB
In [15]:
# # 打印出所有包含 NaN 的位置 # nan_positions = data2.isna() # print("Positions of NaN values:") # print(nan_positions) # # 将处理后的 DataFrame 保存为 Excel 文件 # excel_file_path = 'D:\project\小论文1-基于ICEEMDAN分解的时序高维变化的短期光伏功率预测模型\CEEMAN-PosConv1dbiLSTM-LSTM\对比模型\processed_data.xlsx' # 定义 Excel 文件路径和文件名 # nan_positions.to_excel(excel_file_path, index=False) # 将 DataFrame 保存为 Excel 文件,不包含索引
In [16]:
data3 = df.drop("Wind_speed", axis=1)
In [17]:
import matplotlib as plt data3.plot(legend=True, subplots=True, figsize=(12, 8))
Out[17]:
array([<Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >], dtype=object)
In [18]:
data3
Out[18]:
time | AE_Power | Current | PR | Temp | Humidity | GHI | DHI | Wind_dir | Rainfall | RGT | RDT | Power | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
853133 | 2018-04-01 00:00:00 | 18104.0 | 0.997333 | 0.0 | 19.779453 | 40.025826 | 3.232706 | 1.690531 | 64.372742 | 0.0 | 3.565593 | 0.742383 | 0.0 |
853134 | 2018-04-01 00:05:00 | 18104.0 | 0.997333 | 0.0 | 19.714937 | 39.605961 | 3.194991 | 1.576346 | 65.954178 | 0.0 | 3.469451 | 0.663080 | 0.0 |
853135 | 2018-04-01 00:10:00 | 18104.0 | 0.996000 | 0.0 | 19.549330 | 39.608631 | 3.070866 | 1.576157 | 65.347725 | 0.0 | 3.354114 | 0.540446 | 0.0 |
853136 | 2018-04-01 00:15:00 | 18104.0 | 1.000000 | 0.0 | 19.405870 | 39.680702 | 3.038623 | 1.482489 | 67.103271 | 0.0 | 3.365968 | 0.597973 | 0.0 |
853137 | 2018-04-01 00:20:00 | 18104.0 | 1.000000 | 0.0 | 19.387363 | 39.319881 | 2.656474 | 1.134153 | 66.430733 | 0.0 | 3.222809 | 0.530707 | 0.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
958248 | 2019-03-31 23:35:00 | 29021.0 | 0.991333 | 0.0 | 13.303740 | 34.212711 | 1.210789 | 0.787026 | 34.165325 | 0.0 | 3.271109 | 0.476681 | 0.0 |
958249 | 2019-03-31 23:40:00 | 29021.0 | 0.995333 | 0.0 | 13.120920 | 34.394939 | 2.142980 | 1.582670 | 34.202522 | 0.0 | 3.163039 | 0.444219 | 0.0 |
958250 | 2019-03-31 23:45:00 | 29021.0 | 0.995333 | 0.0 | 12.879215 | 35.167400 | 1.926214 | 1.545889 | 34.233902 | 0.0 | 3.197096 | 0.475794 | 0.0 |
958251 | 2019-03-31 23:50:00 | 29021.0 | 0.999333 | 0.0 | 12.915867 | 35.359989 | 1.317695 | 0.851529 | 34.308563 | 0.0 | 2.873335 | 0.320598 | 0.0 |
958252 | 2019-03-31 23:55:00 | 29021.0 | 1.000000 | 0.0 | 13.134816 | 34.500034 | 1.043269 | 0.597816 | 34.228458 | 0.0 | 2.947993 | 0.294085 | 0.0 |
105120 rows × 13 columns
In [21]:
data4 = pd.DataFrame(data3) # 将 data3 保存为 Excel 文件 csv_file_path = 'D:\project\小论文1-基于ICEEMDAN分解的时序高维变化的短期光伏功率预测模型\CEEMAN-PosConv1dbiLSTM-LSTM\对比模型\data3.csv' # 定义 Excel 文件路径和文件名 data4.to_csv(csv_file_path, index=False) # 将 DataFrame 保存为 Excel 文件,不包含索引 print(f"DataFrame saved to {csv_file_path}")
DataFrame saved to D:\project\小论文1-基于ICEEMDAN分解的时序高维变化的短期光伏功率预测模型\CEEMAN-PosConv1dbiLSTM-LSTM\对比模型\data3.csv
In [21]:
import matplotlib as plt data3.plot(legend=True, subplots=True, figsize=(12, 8))
Out[21]:
array([<Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >], dtype=object)
In [ ]: