ICEEMDAN-Solar_power-forecast/数据预处理 第二步.ipynb at eac4d0f4ddcaa222f4a472bd6cf906ead3fe8c04

454 KiB

Raw Blame History

In [1]:

from math import sqrt
from numpy import concatenate
from matplotlib import pyplot
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error
from tensorflow.keras import Sequential

from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dropout
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

C:\Users\asus\AppData\Roaming\Python\Python39\site-packages\pandas\core\computation\expressions.py:21: UserWarning: Pandas requires version '2.8.4' or newer of 'numexpr' (version '2.8.3' currently installed).
  from pandas.core.computation.check import NUMEXPR_INSTALLED
C:\Users\asus\AppData\Roaming\Python\Python39\site-packages\pandas\core\arrays\masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed).
  from pandas.core import (

In [2]:

data=pd.read_csv(r'D:\project\小论文1-基于ICEEMDAN分解的时序高维变化的短期光伏功率预测模型\CEEMAN-PosConv1dbiLSTM-LSTM\对比模型\data3.csv')

In [3]:

data

Out[3]:

	time	AE_Power	Current	PR	Temp	Humidity	GHI	DHI	Wind_dir	Rainfall	RGT	RDT	Power
0	2018-4-1 0:00	18104.0	0.997333	0.0	19.779453	40.025826	3.232706	1.690531	64.372742	0.0	3.565593	0.742383	0.0
1	2018-4-1 0:05	18104.0	0.997333	0.0	19.714937	39.605961	3.194991	1.576346	65.954178	0.0	3.469451	0.663080	0.0
2	2018-4-1 0:10	18104.0	0.996000	0.0	19.549330	39.608631	3.070866	1.576157	65.347725	0.0	3.354114	0.540446	0.0
3	2018-4-1 0:15	18104.0	1.000000	0.0	19.405870	39.680702	3.038623	1.482489	67.103271	0.0	3.365968	0.597973	0.0
4	2018-4-1 0:20	18104.0	1.000000	0.0	19.387363	39.319881	2.656474	1.134153	66.430733	0.0	3.222809	0.530707	0.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...
104251	2019-3-31 23:35	29021.0	0.991333	0.0	13.303740	34.212711	1.210789	0.787026	34.165325	0.0	3.271109	0.476681	0.0
104252	2019-3-31 23:40	29021.0	0.995333	0.0	13.120920	34.394939	2.142980	1.582670	34.202522	0.0	3.163039	0.444219	0.0
104253	2019-3-31 23:45	29021.0	0.995333	0.0	12.879215	35.167400	1.926214	1.545889	34.233902	0.0	3.197096	0.475794	0.0
104254	2019-3-31 23:50	29021.0	0.999333	0.0	12.915867	35.359989	1.317695	0.851529	34.308563	0.0	2.873335	0.320598	0.0
104255	2019-3-31 23:55	29021.0	1.000000	0.0	13.134816	34.500034	1.043269	0.597816	34.228458	0.0	2.947993	0.294085	0.0

104256 rows × 13 columns

In [4]:

data2 = data.drop("time", axis=1)

In [5]:

data2

Out[5]:

	AE_Power	Current	PR	Temp	Humidity	GHI	DHI	Wind_dir	Rainfall	RGT	RDT	Power
0	18104.0	0.997333	0.0	19.779453	40.025826	3.232706	1.690531	64.372742	0.0	3.565593	0.742383	0.0
1	18104.0	0.997333	0.0	19.714937	39.605961	3.194991	1.576346	65.954178	0.0	3.469451	0.663080	0.0
2	18104.0	0.996000	0.0	19.549330	39.608631	3.070866	1.576157	65.347725	0.0	3.354114	0.540446	0.0
3	18104.0	1.000000	0.0	19.405870	39.680702	3.038623	1.482489	67.103271	0.0	3.365968	0.597973	0.0
4	18104.0	1.000000	0.0	19.387363	39.319881	2.656474	1.134153	66.430733	0.0	3.222809	0.530707	0.0
...	...	...	...	...	...	...	...	...	...	...	...	...
104251	29021.0	0.991333	0.0	13.303740	34.212711	1.210789	0.787026	34.165325	0.0	3.271109	0.476681	0.0
104252	29021.0	0.995333	0.0	13.120920	34.394939	2.142980	1.582670	34.202522	0.0	3.163039	0.444219	0.0
104253	29021.0	0.995333	0.0	12.879215	35.167400	1.926214	1.545889	34.233902	0.0	3.197096	0.475794	0.0
104254	29021.0	0.999333	0.0	12.915867	35.359989	1.317695	0.851529	34.308563	0.0	2.873335	0.320598	0.0
104255	29021.0	1.000000	0.0	13.134816	34.500034	1.043269	0.597816	34.228458	0.0	2.947993	0.294085	0.0

104256 rows × 12 columns

In [6]:

data2.plot(legend=True, subplots=True, figsize=(12, 8))

Out[6]:

array([<Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >,
       <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >],
      dtype=object)

No description has been provided for this image

In [ ]:

In [7]:

data2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104256 entries, 0 to 104255
Data columns (total 12 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   AE_Power  104205 non-null  float64
 1   Current   104205 non-null  float64
 2   PR        104205 non-null  float64
 3   Temp      104256 non-null  float64
 4   Humidity  104256 non-null  float64
 5   GHI       104256 non-null  float64
 6   DHI       104256 non-null  float64
 7   Wind_dir  104256 non-null  float64
 8   Rainfall  104256 non-null  float64
 9   RGT       103982 non-null  float64
 10  RDT       103982 non-null  float64
 11  Power     104205 non-null  float64
dtypes: float64(12)
memory usage: 9.5 MB

In [11]:

data3 = data2.drop("AE_Power",axis=1)

In [12]:

data3 = data3.drop("Current",axis=1)

In [13]:

data3 = data3.drop("PR",axis=1)

In [14]:

data3 = data3.drop("Wind_dir",axis=1)

In [15]:

data3 = data3.drop("RGT",axis=1)

In [16]:

data3 = data3.drop("RDT",axis=1)

In [17]:

data3

Out[17]:

	Temp	Humidity	GHI	DHI	Rainfall	Power
0	19.779453	40.025826	3.232706	1.690531	0.0	0.0
1	19.714937	39.605961	3.194991	1.576346	0.0	0.0
2	19.549330	39.608631	3.070866	1.576157	0.0	0.0
3	19.405870	39.680702	3.038623	1.482489	0.0	0.0
4	19.387363	39.319881	2.656474	1.134153	0.0	0.0
...	...	...	...	...	...	...
104251	13.303740	34.212711	1.210789	0.787026	0.0	0.0
104252	13.120920	34.394939	2.142980	1.582670	0.0	0.0
104253	12.879215	35.167400	1.926214	1.545889	0.0	0.0
104254	12.915867	35.359989	1.317695	0.851529	0.0	0.0
104255	13.134816	34.500034	1.043269	0.597816	0.0	0.0

104256 rows × 6 columns

In [18]:

data3.plot(legend=True, subplots=True, figsize=(12, 8))

Out[18]:

array([<Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >, <Axes: >],
      dtype=object)

In [19]:

import seaborn as sns

In [20]:

#各个相关性分析
import matplotlib.pyplot as plt
import numpy as np
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams["font.sans-serif"] = ["SimSun", "Arial Unicode MS"]
plt.rcParams["axes.unicode_minus"] = False
import seaborn as sns
sns.set(font_scale=1.5)
sns.heatmap(data3.corr(),annot=True,cmap='RdYlGn',linewidths=0.2, fmt='.2f')
fig=plt.gcf()
fig.set_size_inches(10,8)
plt.show()

In [21]:

data3 = data3.drop("Humidity",axis=1)

In [22]:

data3 = data3.drop("Rainfall",axis=1)

In [23]:

data4 = pd.DataFrame(data3)

# 将 data3 保存为 Excel 文件
csv_file_path = 'D:\project\小论文1-基于ICEEMDAN分解的时序高维变化的短期光伏功率预测模型\CEEMAN-PosConv1dbiLSTM-LSTM\对比模型\data7.csv'  # 定义 Excel 文件路径和文件名

data4.to_csv(csv_file_path, index=False)  # 将 DataFrame 保存为 Excel 文件，不包含索引

print(f"DataFrame saved to {csv_file_path}")

DataFrame saved to D:\project\小论文1-基于ICEEMDAN分解的时序高维变化的短期光伏功率预测模型\CEEMAN-PosConv1dbiLSTM-LSTM\对比模型\data7.csv

In [ ]:

454 KiB Raw Blame History Unescape Escape

454 KiB

Raw Blame History