finish pv forecast

This commit is contained in:
default 2025-02-08 07:01:05 +00:00
parent 0c073816e1
commit 61cfdc304f
8 changed files with 4452 additions and 11685 deletions

4338
pv/data/pv_original.csv Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

BIN
pv/models/pv_pred.joblib Normal file

Binary file not shown.

View File

@ -0,0 +1,25 @@
import joblib
import numpy as np
def load_model(path:str):
gbm = joblib.load('./models/pv_pred.joblib')
return gbm
def pv_forecast(inputs: np.ndarray, model):
"""_summary_
Args:
inputs (np.ndarray): 输入序列
model (_type_): _description_
"""
out = model.predict([inputs])
return out
if __name__ == '__main__':
model = load_model('./models/pv_pred.joblib')
inputs = np.random.randn(24)
print(inputs.shape)
out = pv_forecast(inputs, model)
print(out)

View File

@ -1,10 +1,12 @@
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import joblib
from logzero import logger
def time_series_to_supervised(data, n_in=10, n_out=1,dropnan=True):
def time_series_to_supervised(data, columns, n_in=24, n_out=1,dropnan=True):
"""
:param data:作为列表或2D NumPy数组的观察序列需要
:param n_in:作为输入的滞后观察数X值可以在[1..len数据]之间可选默认为1
@ -12,9 +14,10 @@ def time_series_to_supervised(data, n_in=10, n_out=1,dropnan=True):
:param dropnan:Boolean是否删除具有NaN值的行可选的默认为True
:return:
"""
logger.info(f"正在处理训练数据size{data.shape}")
n_vars = 1 if type(data) is list else data.shape[1]
df = pd.DataFrame(data)
origNames = df.columns
origNames = columns
cols, names = list(), list()
cols.append(df.shift(0))
names += [('%s' % origNames[j]) for j in range(n_vars)]
@ -33,3 +36,38 @@ def time_series_to_supervised(data, n_in=10, n_out=1,dropnan=True):
if dropnan:
agg.dropna(inplace=True)
return agg
def train_model(train_data: pd.DataFrame):
"""训练模型的函数,需要根据模型类型实际调整
Args:
data (pd.DataFrame): 训练集
"""
# 特征和输出列名,需要根据业务场景灵活处理
fea_cols = train_data.columns[:-1].tolist()
out_cols = train_data.columns[-1:].tolist()
logger.info(fea_cols, out_cols)
X = train_data[fea_cols]
y = train_data[out_cols]
train_X,test_X,train_y,test_y = train_test_split(X, y, test_size=0.2, random_state=42)
valid_X,test_X,valid_y,test_y = train_test_split(test_X, test_y, test_size=0.5, random_state=42)
# 参数
other_params = {'learning_rate': 0.1, 'n_estimators': 150, 'max_depth': 10, 'min_child_weight': 1, 'seed': 0, 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0, 'reg_alpha': 0, 'reg_lambda': 1}
print(train_X.shape, train_y.shape)
gbm = xgb.XGBRegressor(objective='reg:squarederror',**other_params)
gbm.fit(train_X.values, train_y.values, eval_set=[(valid_X.values, valid_y.values)], early_stopping_rounds=20)
y_pred = gbm.predict(test_X.values)
logger.info(f"Root Mean Squared Error on Test set: {np.sqrt(mean_squared_error(test_y, y_pred))}")
logger.info(f"R2 score on Test set: {r2_score(test_y, y_pred)}")
joblib.dump(gbm, './models/pv_pred.joblib')
logger.info(f"save_path: ./models/pv_pred.joblib")
if __name__ == '__main__':
data = pd.read_csv('./data/pv_data_hourly.csv', index_col=0)
agg = time_series_to_supervised(data.values, data.columns, 24, 1)
train_model(agg)

View File

@ -1,58 +0,0 @@
import numpy as np
import pandas as pd
import numpy as np
from vmdpy import VMD
from skopt import gp_minimize
from skopt.space import Real, Integer, Categorical
from skopt.utils import use_named_args
# 加载数据
def do_vmd(dataframe):
signal = dataframe['power'].values
# 定义VMD函数用于优化过程
def vmd_objective(params):
alpha, tau, K, DC, init, tol = params
u, u_hat, omega = VMD(signal, alpha, tau, K, DC, init, tol)
# 定义目标函数,这里使用模态的平均带宽作为优化目标
# 你可以根据实际需求定义其他目标函数
objective = np.mean([np.diff(np.where(um != 0)) for um in u])
return -objective # gp_minimize用于最小化所以取负值
# 定义参数空间
space = [
Real(low=2, high=100, prior='log-uniform', name='alpha'),
Real(low=0, high=1, name='tau'),
Integer(low=2, high=10, name='K'),
Categorical(categories=[0, 1], name='DC'),
Categorical(categories=[0, 1], name='init'),
Real(low=1e-6, high=1e-2, prior='log-uniform', name='tol')
]
# 使用贝叶斯优化
@use_named_args(space)
def objective_function(alpha, tau, K, DC, init, tol):
return vmd_objective((alpha, tau, K, DC, init, tol))
result = gp_minimize(objective_function, space, n_calls=50, random_state=0)
print('最优参数:', result.x)
print('最优目标函数值:', -result.fun) # 取负值回到原始目标函数的值
# 使用找到的最优参数进行VMD分解
alpha_opt, tau_opt, K_opt, DC_opt, init_opt, tol_opt = result.x
u_opt, u_hat_opt, omega_opt = VMD(signal, alpha_opt, tau_opt, K_opt, DC_opt, init_opt, tol_opt)
# 保存或处理分解得到的模态函数
best_params = [alpha_opt, tau_opt, K_opt, DC_opt, init_opt, tol_opt]
vmd_rst = u_opt.T
vmd_train_data = pd.concat([data2vmd, pd.DataFrame.from_records(vmd_rst, index=data2vmd.index, columns=[f"vmd_{x}"for x in range(vmd_rst.shape[1])])], axis=1)
return vmd_train_data
if __name__ == '__main__':
data2vmd = pd.read_csv('./data/pv_data_hourly.csv', index_col=0)
train_data = do_vmd(data2vmd)
train_data.to_csv('./data/vmd_train.csv', index=False, encoding='utf-8-sig')

View File

@ -1,7 +1,13 @@
numpy==1.23
pandas==1.5.3
EMD_signal==1.6.0
vmdpy==0.2
matplotlib==3.7.0
numpy==1.22.0
pandas==1.5.3
PyEMD==1.0.0
scikit-optimize==0.10.1
scikit_learn==1.2.1
seaborn==0.13.2
xlrd==2.0.1
vmdpy==0.2
scikit-optimize==0.10.1
xgboost==1.6.0
logzero==1.7.0
flask==3.1.0

35
run.py
View File

@ -0,0 +1,35 @@
from flask import request, Flask, jsonify
from pv.pv_inference import load_model, pv_forecast
from logzero import logger
app = Flask(__name__)
gbm_pv = load_model('./pv/models/pv_pred.joblib')
# todo: 写一个flask接口
@app.route('/pv', methods=['POST'])
def run_pv_forecast():
"""todo: 需要测试
Returns:
_type_: _description_
"""
data = request.data
if not data or 'inputs' not in data:
return jsonify({"error": "Invalid data"}), 400
else:
# todo: 这里需要写个判断inputs是否合规的逻辑
inputs = data.get('inputs').reshape(1, 24)
logger.info(f"pv history inputs: {inputs}")
out = pv_forecast(inputs, gbm_pv)
results = {"result": out}
return jsonify(results), 200
@app.route('/carbon', methods=['POST'])
def run_carbon_forecast():
"""
todo: 封装其他的预测
"""
pass
if __name__=='__main__':
app.run(host='0.0.0.0', port='2467')