ICEEMDAN-Solar_power-forecast/数据预处理 第二步.ipynb

454 KiB
Raw Blame History

In [1]:
from math import sqrt
from numpy import concatenate
from matplotlib import pyplot
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from tensorflow.keras import Sequential

from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dropout
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
C:\Users\asus\AppData\Roaming\Python\Python39\site-packages\pandas\core\computation\expressions.py:21: UserWarning: Pandas requires version '2.8.4' or newer of 'numexpr' (version '2.8.3' currently installed).
  from pandas.core.computation.check import NUMEXPR_INSTALLED
C:\Users\asus\AppData\Roaming\Python\Python39\site-packages\pandas\core\arrays\masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed).
  from pandas.core import (
In [2]:
data=pd.read_csv(r'D:\project\小论文1-基于ICEEMDAN分解的时序高维变化的短期光伏功率预测模型\CEEMAN-PosConv1dbiLSTM-LSTM\对比模型\data3.csv')
In [3]:
data
Out[3]:
time AE_Power Current PR Temp Humidity GHI DHI Wind_dir Rainfall RGT RDT Power
0 2018-4-1 0:00 18104.0 0.997333 0.0 19.779453 40.025826 3.232706 1.690531 64.372742 0.0 3.565593 0.742383 0.0
1 2018-4-1 0:05 18104.0 0.997333 0.0 19.714937 39.605961 3.194991 1.576346 65.954178 0.0 3.469451 0.663080 0.0
2 2018-4-1 0:10 18104.0 0.996000 0.0 19.549330 39.608631 3.070866 1.576157 65.347725 0.0 3.354114 0.540446 0.0
3 2018-4-1 0:15 18104.0 1.000000 0.0 19.405870 39.680702 3.038623 1.482489 67.103271 0.0 3.365968 0.597973 0.0
4 2018-4-1 0:20 18104.0 1.000000 0.0 19.387363 39.319881 2.656474 1.134153 66.430733 0.0 3.222809 0.530707 0.0
... ... ... ... ... ... ... ... ... ... ... ... ... ...
104251 2019-3-31 23:35 29021.0 0.991333 0.0 13.303740 34.212711 1.210789 0.787026 34.165325 0.0 3.271109 0.476681 0.0
104252 2019-3-31 23:40 29021.0 0.995333 0.0 13.120920 34.394939 2.142980 1.582670 34.202522 0.0 3.163039 0.444219 0.0
104253 2019-3-31 23:45 29021.0 0.995333 0.0 12.879215 35.167400 1.926214 1.545889 34.233902 0.0 3.197096 0.475794 0.0
104254 2019-3-31 23:50 29021.0 0.999333 0.0 12.915867 35.359989 1.317695 0.851529 34.308563 0.0 2.873335 0.320598 0.0
104255 2019-3-31 23:55 29021.0 1.000000 0.0 13.134816 34.500034 1.043269 0.597816 34.228458 0.0 2.947993 0.294085 0.0

104256 rows × 13 columns

In [4]:
data2 = data.drop("time", axis=1)
In [5]:
data2
Out[5]:
AE_Power Current PR Temp Humidity GHI DHI Wind_dir Rainfall RGT RDT Power
0 18104.0 0.997333 0.0 19.779453 40.025826 3.232706 1.690531 64.372742 0.0 3.565593 0.742383 0.0
1 18104.0 0.997333 0.0 19.714937 39.605961 3.194991 1.576346 65.954178 0.0 3.469451 0.663080 0.0
2 18104.0 0.996000 0.0 19.549330 39.608631 3.070866 1.576157 65.347725 0.0 3.354114 0.540446 0.0
3 18104.0 1.000000 0.0 19.405870 39.680702 3.038623 1.482489 67.103271 0.0 3.365968 0.597973 0.0
4 18104.0 1.000000 0.0 19.387363 39.319881 2.656474 1.134153 66.430733 0.0 3.222809 0.530707 0.0
... ... ... ... ... ... ... ... ... ... ... ... ...
104251 29021.0 0.991333 0.0 13.303740 34.212711 1.210789 0.787026 34.165325 0.0 3.271109 0.476681 0.0
104252 29021.0 0.995333 0.0 13.120920 34.394939 2.142980 1.582670 34.202522 0.0 3.163039 0.444219 0.0
104253 29021.0 0.995333 0.0 12.879215 35.167400 1.926214 1.545889 34.233902 0.0 3.197096 0.475794 0.0
104254 29021.0 0.999333 0.0 12.915867 35.359989 1.317695 0.851529 34.308563 0.0 2.873335 0.320598 0.0
104255 29021.0 1.000000 0.0 13.134816 34.500034 1.043269 0.597816 34.228458 0.0 2.947993 0.294085 0.0

104256 rows × 12 columns

In [6]:
data2.plot(legend=True, subplots=True, figsize=(12, 8))
Out[6]:
array([<Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >,
       <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >],
      dtype=object)
No description has been provided for this image
In [ ]:
 
In [7]:
data2.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104256 entries, 0 to 104255
Data columns (total 12 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   AE_Power  104205 non-null  float64
 1   Current   104205 non-null  float64
 2   PR        104205 non-null  float64
 3   Temp      104256 non-null  float64
 4   Humidity  104256 non-null  float64
 5   GHI       104256 non-null  float64
 6   DHI       104256 non-null  float64
 7   Wind_dir  104256 non-null  float64
 8   Rainfall  104256 non-null  float64
 9   RGT       103982 non-null  float64
 10  RDT       103982 non-null  float64
 11  Power     104205 non-null  float64
dtypes: float64(12)
memory usage: 9.5 MB
In [11]:
data3 = data2.drop("AE_Power",axis=1)
In [12]:
data3 = data3.drop("Current",axis=1)
In [13]:
data3 = data3.drop("PR",axis=1)
In [14]:
data3 = data3.drop("Wind_dir",axis=1)
In [15]:
data3 = data3.drop("RGT",axis=1)
In [16]:
data3 = data3.drop("RDT",axis=1)
In [17]:
data3
Out[17]:
Temp Humidity GHI DHI Rainfall Power
0 19.779453 40.025826 3.232706 1.690531 0.0 0.0
1 19.714937 39.605961 3.194991 1.576346 0.0 0.0
2 19.549330 39.608631 3.070866 1.576157 0.0 0.0
3 19.405870 39.680702 3.038623 1.482489 0.0 0.0
4 19.387363 39.319881 2.656474 1.134153 0.0 0.0
... ... ... ... ... ... ...
104251 13.303740 34.212711 1.210789 0.787026 0.0 0.0
104252 13.120920 34.394939 2.142980 1.582670 0.0 0.0
104253 12.879215 35.167400 1.926214 1.545889 0.0 0.0
104254 12.915867 35.359989 1.317695 0.851529 0.0 0.0
104255 13.134816 34.500034 1.043269 0.597816 0.0 0.0

104256 rows × 6 columns

In [18]:
data3.plot(legend=True, subplots=True, figsize=(12, 8))
Out[18]:
array([<Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >],
      dtype=object)
No description has been provided for this image
In [19]:
import seaborn as sns
In [20]:
#各个相关性分析
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams["font.sans-serif"] = ["SimSun", "Arial Unicode MS"]
plt.rcParams["axes.unicode_minus"] = False
import seaborn as sns
sns.set(font_scale=1.5)
sns.heatmap(data3.corr(),annot=True,cmap='RdYlGn',linewidths=0.2, fmt='.2f')
fig=plt.gcf()
fig.set_size_inches(10,8)
plt.show()
No description has been provided for this image
In [21]:
data3 = data3.drop("Humidity",axis=1)
In [22]:
data3 = data3.drop("Rainfall",axis=1)
In [23]:
data4 = pd.DataFrame(data3)

# 将 data3 保存为 Excel 文件
csv_file_path = 'D:\project\小论文1-基于ICEEMDAN分解的时序高维变化的短期光伏功率预测模型\CEEMAN-PosConv1dbiLSTM-LSTM\对比模型\data7.csv'  # 定义 Excel 文件路径和文件名

data4.to_csv(csv_file_path, index=False)  # 将 DataFrame 保存为 Excel 文件,不包含索引

print(f"DataFrame saved to {csv_file_path}")
DataFrame saved to D:\project\小论文1-基于ICEEMDAN分解的时序高维变化的短期光伏功率预测模型\CEEMAN-PosConv1dbiLSTM-LSTM\对比模型\data7.csv
In [ ]: