finish pv forecast
This commit is contained in:
parent
0c073816e1
commit
61cfdc304f
File diff suppressed because it is too large
Load Diff
11617
pv/data/vmd_train.csv
11617
pv/data/vmd_train.csv
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
@ -0,0 +1,25 @@
|
|||
import joblib
|
||||
import numpy as np
|
||||
|
||||
|
||||
def load_model(path:str):
|
||||
gbm = joblib.load('./models/pv_pred.joblib')
|
||||
return gbm
|
||||
|
||||
def pv_forecast(inputs: np.ndarray, model):
|
||||
"""_summary_
|
||||
|
||||
Args:
|
||||
inputs (np.ndarray): 输入序列
|
||||
model (_type_): _description_
|
||||
"""
|
||||
out = model.predict([inputs])
|
||||
return out
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
model = load_model('./models/pv_pred.joblib')
|
||||
inputs = np.random.randn(24)
|
||||
print(inputs.shape)
|
||||
out = pv_forecast(inputs, model)
|
||||
print(out)
|
|
@ -1,10 +1,12 @@
|
|||
import os
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import xgboost as xgb
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import mean_squared_error, r2_score
|
||||
import joblib
|
||||
from logzero import logger
|
||||
|
||||
def time_series_to_supervised(data, n_in=10, n_out=1,dropnan=True):
|
||||
def time_series_to_supervised(data, columns, n_in=24, n_out=1,dropnan=True):
|
||||
"""
|
||||
:param data:作为列表或2D NumPy数组的观察序列。需要。
|
||||
:param n_in:作为输入的滞后观察数(X)。值可以在[1..len(数据)]之间可选。默认为1。
|
||||
|
@ -12,9 +14,10 @@ def time_series_to_supervised(data, n_in=10, n_out=1,dropnan=True):
|
|||
:param dropnan:Boolean是否删除具有NaN值的行。可选的。默认为True。
|
||||
:return:
|
||||
"""
|
||||
logger.info(f"正在处理训练数据:size:{data.shape}")
|
||||
n_vars = 1 if type(data) is list else data.shape[1]
|
||||
df = pd.DataFrame(data)
|
||||
origNames = df.columns
|
||||
origNames = columns
|
||||
cols, names = list(), list()
|
||||
cols.append(df.shift(0))
|
||||
names += [('%s' % origNames[j]) for j in range(n_vars)]
|
||||
|
@ -33,3 +36,38 @@ def time_series_to_supervised(data, n_in=10, n_out=1,dropnan=True):
|
|||
if dropnan:
|
||||
agg.dropna(inplace=True)
|
||||
return agg
|
||||
|
||||
def train_model(train_data: pd.DataFrame):
|
||||
"""训练模型的函数,需要根据模型类型实际调整
|
||||
|
||||
Args:
|
||||
data (pd.DataFrame): 训练集
|
||||
"""
|
||||
# 特征和输出列名,需要根据业务场景灵活处理
|
||||
fea_cols = train_data.columns[:-1].tolist()
|
||||
out_cols = train_data.columns[-1:].tolist()
|
||||
logger.info(fea_cols, out_cols)
|
||||
X = train_data[fea_cols]
|
||||
y = train_data[out_cols]
|
||||
train_X,test_X,train_y,test_y = train_test_split(X, y, test_size=0.2, random_state=42)
|
||||
valid_X,test_X,valid_y,test_y = train_test_split(test_X, test_y, test_size=0.5, random_state=42)
|
||||
|
||||
# 参数
|
||||
other_params = {'learning_rate': 0.1, 'n_estimators': 150, 'max_depth': 10, 'min_child_weight': 1, 'seed': 0, 'subsample': 0.8, 'colsample_bytree': 0.8, 'gamma': 0, 'reg_alpha': 0, 'reg_lambda': 1}
|
||||
print(train_X.shape, train_y.shape)
|
||||
|
||||
gbm = xgb.XGBRegressor(objective='reg:squarederror',**other_params)
|
||||
|
||||
gbm.fit(train_X.values, train_y.values, eval_set=[(valid_X.values, valid_y.values)], early_stopping_rounds=20)
|
||||
|
||||
y_pred = gbm.predict(test_X.values)
|
||||
|
||||
logger.info(f"Root Mean Squared Error on Test set: {np.sqrt(mean_squared_error(test_y, y_pred))}")
|
||||
logger.info(f"R2 score on Test set: {r2_score(test_y, y_pred)}")
|
||||
joblib.dump(gbm, './models/pv_pred.joblib')
|
||||
logger.info(f"save_path: ./models/pv_pred.joblib")
|
||||
|
||||
if __name__ == '__main__':
|
||||
data = pd.read_csv('./data/pv_data_hourly.csv', index_col=0)
|
||||
agg = time_series_to_supervised(data.values, data.columns, 24, 1)
|
||||
train_model(agg)
|
||||
|
|
|
@ -1,58 +0,0 @@
|
|||
import numpy as np
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from vmdpy import VMD
|
||||
from skopt import gp_minimize
|
||||
from skopt.space import Real, Integer, Categorical
|
||||
from skopt.utils import use_named_args
|
||||
|
||||
|
||||
# 加载数据
|
||||
def do_vmd(dataframe):
|
||||
signal = dataframe['power'].values
|
||||
# 定义VMD函数,用于优化过程
|
||||
def vmd_objective(params):
|
||||
alpha, tau, K, DC, init, tol = params
|
||||
u, u_hat, omega = VMD(signal, alpha, tau, K, DC, init, tol)
|
||||
# 定义目标函数,这里使用模态的平均带宽作为优化目标
|
||||
# 你可以根据实际需求定义其他目标函数
|
||||
objective = np.mean([np.diff(np.where(um != 0)) for um in u])
|
||||
return -objective # gp_minimize用于最小化,所以取负值
|
||||
|
||||
# 定义参数空间
|
||||
space = [
|
||||
Real(low=2, high=100, prior='log-uniform', name='alpha'),
|
||||
Real(low=0, high=1, name='tau'),
|
||||
Integer(low=2, high=10, name='K'),
|
||||
Categorical(categories=[0, 1], name='DC'),
|
||||
Categorical(categories=[0, 1], name='init'),
|
||||
Real(low=1e-6, high=1e-2, prior='log-uniform', name='tol')
|
||||
]
|
||||
|
||||
# 使用贝叶斯优化
|
||||
@use_named_args(space)
|
||||
def objective_function(alpha, tau, K, DC, init, tol):
|
||||
return vmd_objective((alpha, tau, K, DC, init, tol))
|
||||
|
||||
result = gp_minimize(objective_function, space, n_calls=50, random_state=0)
|
||||
|
||||
print('最优参数:', result.x)
|
||||
print('最优目标函数值:', -result.fun) # 取负值回到原始目标函数的值
|
||||
|
||||
# 使用找到的最优参数进行VMD分解
|
||||
alpha_opt, tau_opt, K_opt, DC_opt, init_opt, tol_opt = result.x
|
||||
u_opt, u_hat_opt, omega_opt = VMD(signal, alpha_opt, tau_opt, K_opt, DC_opt, init_opt, tol_opt)
|
||||
|
||||
# 保存或处理分解得到的模态函数
|
||||
best_params = [alpha_opt, tau_opt, K_opt, DC_opt, init_opt, tol_opt]
|
||||
vmd_rst = u_opt.T
|
||||
|
||||
|
||||
vmd_train_data = pd.concat([data2vmd, pd.DataFrame.from_records(vmd_rst, index=data2vmd.index, columns=[f"vmd_{x}"for x in range(vmd_rst.shape[1])])], axis=1)
|
||||
|
||||
return vmd_train_data
|
||||
|
||||
if __name__ == '__main__':
|
||||
data2vmd = pd.read_csv('./data/pv_data_hourly.csv', index_col=0)
|
||||
train_data = do_vmd(data2vmd)
|
||||
train_data.to_csv('./data/vmd_train.csv', index=False, encoding='utf-8-sig')
|
|
@ -1,7 +1,13 @@
|
|||
numpy==1.23
|
||||
pandas==1.5.3
|
||||
EMD_signal==1.6.0
|
||||
vmdpy==0.2
|
||||
matplotlib==3.7.0
|
||||
numpy==1.22.0
|
||||
pandas==1.5.3
|
||||
PyEMD==1.0.0
|
||||
scikit-optimize==0.10.1
|
||||
scikit_learn==1.2.1
|
||||
seaborn==0.13.2
|
||||
xlrd==2.0.1
|
||||
vmdpy==0.2
|
||||
scikit-optimize==0.10.1
|
||||
xgboost==1.6.0
|
||||
logzero==1.7.0
|
||||
flask==3.1.0
|
35
run.py
35
run.py
|
@ -0,0 +1,35 @@
|
|||
from flask import request, Flask, jsonify
|
||||
from pv.pv_inference import load_model, pv_forecast
|
||||
from logzero import logger
|
||||
|
||||
app = Flask(__name__)
|
||||
gbm_pv = load_model('./pv/models/pv_pred.joblib')
|
||||
# todo: 写一个flask接口
|
||||
|
||||
@app.route('/pv', methods=['POST'])
|
||||
def run_pv_forecast():
|
||||
"""todo: 需要测试
|
||||
|
||||
Returns:
|
||||
_type_: _description_
|
||||
"""
|
||||
data = request.data
|
||||
if not data or 'inputs' not in data:
|
||||
return jsonify({"error": "Invalid data"}), 400
|
||||
else:
|
||||
# todo: 这里需要写个判断inputs是否合规的逻辑
|
||||
inputs = data.get('inputs').reshape(1, 24)
|
||||
logger.info(f"pv history inputs: {inputs}")
|
||||
out = pv_forecast(inputs, gbm_pv)
|
||||
results = {"result": out}
|
||||
return jsonify(results), 200
|
||||
|
||||
@app.route('/carbon', methods=['POST'])
|
||||
def run_carbon_forecast():
|
||||
"""
|
||||
todo: 封装其他的预测
|
||||
"""
|
||||
pass
|
||||
|
||||
if __name__=='__main__':
|
||||
app.run(host='0.0.0.0', port='2467')
|
Loading…
Reference in New Issue