288 lines
12 KiB
Python
288 lines
12 KiB
Python
import numpy as np
|
||
import pandas as pd
|
||
import xgboost as xgb
|
||
from sklearn.model_selection import train_test_split
|
||
import os
|
||
import chardet
|
||
import joblib
|
||
|
||
from sklearn.metrics import (
|
||
mean_squared_error,
|
||
mean_absolute_error,
|
||
median_absolute_error,
|
||
mean_absolute_percentage_error,
|
||
r2_score
|
||
)
|
||
|
||
# 读取文件
|
||
|
||
|
||
|
||
current_directory = os.getcwd()
|
||
print(current_directory)
|
||
|
||
def get_orign_data():
|
||
save_directory = os.path.join(current_directory,'jiawanyuce\data')
|
||
|
||
# with open(os.path.join(save_directory, 'inflowAll.csv'), 'rb') as f:
|
||
# raw_data = f.read()
|
||
# result = chardet.detect(raw_data)
|
||
# encoding = result['encoding']
|
||
|
||
|
||
inflow = pd.read_csv(os.path.join(save_directory,'inflowAll.csv'),encoding= 'utf-8')
|
||
inflow.columns = inflow.iloc[1,:]
|
||
# ['糖类浓度','蛋白质浓度','脂质浓度','颗粒惰性物质','输入量']
|
||
inflow = inflow.loc[2:, ['X_ch','X_pr','X_li','X_I','Q']]
|
||
inflow[['X_ch','X_pr','X_li','X_I','Q']] = inflow[['X_ch','X_pr','X_li','X_I','Q']].astype('float32')
|
||
inflow = inflow.reset_index(drop=True)
|
||
inflow = inflow.reset_index()
|
||
inflow['index'] = inflow.index.map(lambda x: x+1)
|
||
|
||
outflow = pd.read_csv(os.path.join(save_directory,'outFlow.csv'),encoding= 'utf-8')
|
||
outflow.columns = outflow.iloc[1,:]
|
||
outflow = outflow.loc[2:,:]
|
||
outflow = outflow.apply(pd.to_numeric, errors = 'raise')
|
||
outflow.drop(['X_ch','X_pr','X_li','X_I','S_gas_ch4'], axis=1, inplace=True)
|
||
outflow = outflow.reset_index(drop=True)
|
||
outflow = outflow.reset_index()
|
||
outflow['index'] = outflow.index.map(lambda x: x+1)
|
||
ch4gasall = pd.read_csv(os.path.join(save_directory,'ch4GasAll.csv'),encoding= 'utf-8')
|
||
ch4gasall.columns = ch4gasall.iloc[0,:]
|
||
ch4gasall = ch4gasall.loc[1:,:]
|
||
ch4gasall = ch4gasall.apply(pd.to_numeric, errors = 'raise')
|
||
ch4gasall = ch4gasall[['甲烷流量(Nm3d-1)','气体总流量(Nm3d-1)','S_gas_ch4(kg COD m-3)']]
|
||
ch4gasall.columns = ['Nm3d-1-ch4','Nm3d-1-all','S_gas_ch4']
|
||
ch4gasall = ch4gasall.reset_index(drop=True)
|
||
ch4gasall = ch4gasall.reset_index()
|
||
ch4gasall['index'] = ch4gasall.index.map(lambda x: x+1)
|
||
data = inflow.merge(outflow,how='left',on=['index'])
|
||
data = data.merge(ch4gasall,how='left',on=['index'])
|
||
|
||
# ['糖类浓度','蛋白质浓度','脂质浓度','颗粒惰性物质','输入量','pH','甲烷流量' , 'S_gas_ch4']
|
||
data = data[['index','X_ch','X_pr','X_li','X_I','Q','pH','Nm3d-1-ch4','S_gas_ch4']]
|
||
return data
|
||
|
||
def data_feature(data,x_cols):
|
||
for name in x_cols:
|
||
|
||
data[name+"_15_first"] = data[name].shift(1)
|
||
data[name+"_30_first"] = data[name].shift(2)
|
||
data[name+"_45_first"] = data[name].shift(3)
|
||
data[name+"_60_first"] = data[name].shift(4)
|
||
|
||
data[name+"_1h_mean"] = data[name].rolling(4).mean()
|
||
data[name+"_1h_max"] = data[name].rolling(4).max()
|
||
data[name+"_1h_min"] = data[name].rolling(4).min()
|
||
data[name+"_1h_median"] = data[name].rolling(4).median()
|
||
data[name+"_1h_std"] = data[name].rolling(4).std()
|
||
data[name+"_1h_var"] = data[name].rolling(4).var()
|
||
data[name+"_1h_skew"] = data[name].rolling(4).skew()
|
||
data[name+"_1h_kurt"] = data[name].rolling(4).kurt()
|
||
data[name+"_1_diff"] = data[name].diff(periods=1)
|
||
data[name+"_2_diff"] = data[name].diff(periods=2)
|
||
|
||
data[name+"_2h_mean"] = data[name].rolling(8).mean()
|
||
data[name+"_2h_max"] = data[name].rolling(8).max()
|
||
data[name+"_2h_min"] = data[name].rolling(8).min()
|
||
data[name+"_2h_median"] = data[name].rolling(8).median()
|
||
data[name+"_2h_std"] = data[name].rolling(8).std()
|
||
data[name+"_2h_var"] = data[name].rolling(8).var()
|
||
data[name+"_2h_skew"] = data[name].rolling(8).skew()
|
||
data[name+"_2h_kurt"] = data[name].rolling(8).kurt()
|
||
|
||
|
||
|
||
# 不想要日均的了,太长了
|
||
for name in x_cols:
|
||
data[name+"_d_mean"] = data[name].rolling(4*24).mean()
|
||
data[name+"_d_max"] = data[name].rolling(4*24).max()
|
||
data[name+"_d_min"] = data[name].rolling(4*24).min()
|
||
data[name+"_d_median"] = data[name].rolling(4).median()
|
||
data[name+"_d_std"] = data[name].rolling(4*24).std()
|
||
data[name+"_d_var"] = data[name].rolling(4*24).var()
|
||
data[name+"_d_skew"] = data[name].rolling(4*24).skew()
|
||
data[name+"_d_kurt"] = data[name].rolling(4*24).kurt()
|
||
|
||
return data
|
||
|
||
|
||
def data_result(data,target_1):
|
||
data[target_1+"_1_after"] = data[target_1].shift(-1)
|
||
data[target_1+"_2_after"] = data[target_1].shift(-2)
|
||
data[target_1+"_3_after"] = data[target_1].shift(-3)
|
||
data[target_1+"_4_after"] = data[target_1].shift(-4)
|
||
return data
|
||
|
||
def regression_metrics(true, pred):
|
||
print('回归模型评估指标结果:')
|
||
|
||
# 确保 true 和 pred 是 numpy 数组
|
||
true = np.array(true)
|
||
pred = np.array(pred)
|
||
|
||
# 检查形状是否一致
|
||
if true.shape != pred.shape:
|
||
raise ValueError("真实值和预测值的形状不一致: {} vs {}".format(true.shape, pred.shape))
|
||
|
||
# 计算每一列的指标
|
||
for i in range(true.shape[1]):
|
||
print(f'对于第 {i+1} 列:')
|
||
print('均方误差【MSE】:', mean_squared_error(true[:, i], pred[:, i]))
|
||
print('均方根误差【RMSE】:', np.sqrt(mean_squared_error(true[:, i], pred[:, i])))
|
||
print('平均绝对误差【MAE】:', mean_absolute_error(true[:, i], pred[:, i]))
|
||
print('绝对误差中位数【MedianAE】:', median_absolute_error(true[:, i], pred[:, i]))
|
||
print('平均绝对百分比误差【MAPE】:', mean_absolute_percentage_error(true[:, i], pred[:, i]))
|
||
print("R2 【R2 score】:", r2_score(true[:, i], pred[:, i]))
|
||
print('-----------------------')
|
||
|
||
|
||
feature_list = ['X_ch','X_pr','X_li','X_I','Q','pH','Nm3d-1-ch4','S_gas_ch4',
|
||
'X_ch_15_first','X_ch_30_first','X_ch_45_first','X_ch_60_first',
|
||
'X_ch_1h_mean','X_ch_1h_max','X_ch_1h_min','X_ch_1h_median','X_ch_1h_std',
|
||
'X_ch_1h_var','X_ch_1h_skew','X_ch_1h_kurt','X_ch_1_diff','X_ch_2_diff',
|
||
'X_ch_2h_mean','X_ch_2h_max','X_ch_2h_min','X_ch_2h_median','X_ch_2h_std',
|
||
'X_ch_2h_var','X_ch_2h_skew','X_ch_2h_kurt','X_pr_15_first',
|
||
'X_pr_30_first','X_pr_45_first','X_pr_60_first','X_pr_1h_mean',
|
||
'X_pr_1h_max','X_pr_1h_min','X_pr_1h_median','X_pr_1h_std','X_pr_1h_var',
|
||
'X_pr_1h_skew','X_pr_1h_kurt','X_pr_1_diff','X_pr_2_diff','X_pr_2h_mean',
|
||
'X_pr_2h_max','X_pr_2h_min','X_pr_2h_median','X_pr_2h_std','X_pr_2h_var',
|
||
'X_pr_2h_skew','X_pr_2h_kurt','X_li_15_first','X_li_30_first',
|
||
'X_li_45_first','X_li_60_first','X_li_1h_mean','X_li_1h_max',
|
||
'X_li_1h_min','X_li_1h_median','X_li_1h_std','X_li_1h_var','X_li_1h_skew',
|
||
'X_li_1h_kurt','X_li_1_diff','X_li_2_diff','X_li_2h_mean','X_li_2h_max',
|
||
'X_li_2h_min','X_li_2h_median','X_li_2h_std','X_li_2h_var','X_li_2h_skew',
|
||
'X_li_2h_kurt','X_I_15_first','X_I_30_first','X_I_45_first',
|
||
'X_I_60_first','X_I_1h_mean','X_I_1h_max','X_I_1h_min','X_I_1h_median',
|
||
'X_I_1h_std','X_I_1h_var','X_I_1h_skew','X_I_1h_kurt','X_I_1_diff',
|
||
'X_I_2_diff','X_I_2h_mean','X_I_2h_max','X_I_2h_min','X_I_2h_median',
|
||
'X_I_2h_std','X_I_2h_var','X_I_2h_skew','X_I_2h_kurt','Q_15_first',
|
||
'Q_30_first','Q_45_first','Q_60_first','Q_1h_mean','Q_1h_max','Q_1h_min',
|
||
'Q_1h_median','Q_1h_std','Q_1h_var','Q_1h_skew','Q_1h_kurt','Q_1_diff',
|
||
'Q_2_diff','Q_2h_mean','Q_2h_max','Q_2h_min','Q_2h_median','Q_2h_std',
|
||
'Q_2h_var','Q_2h_skew','Q_2h_kurt','pH_15_first','pH_30_first',
|
||
'pH_45_first','pH_60_first','pH_1h_mean','pH_1h_max','pH_1h_min',
|
||
'pH_1h_median','pH_1h_std','pH_1h_var','pH_1h_skew','pH_1h_kurt',
|
||
'pH_1_diff','pH_2_diff','pH_2h_mean','pH_2h_max','pH_2h_min',
|
||
'pH_2h_median','pH_2h_std','pH_2h_var','pH_2h_skew','pH_2h_kurt',
|
||
'Nm3d-1-ch4_15_first','Nm3d-1-ch4_30_first','Nm3d-1-ch4_45_first',
|
||
'Nm3d-1-ch4_60_first','Nm3d-1-ch4_1h_mean','Nm3d-1-ch4_1h_max',
|
||
'Nm3d-1-ch4_1h_min','Nm3d-1-ch4_1h_median','Nm3d-1-ch4_1h_std',
|
||
'Nm3d-1-ch4_1h_var','Nm3d-1-ch4_1h_skew','Nm3d-1-ch4_1h_kurt',
|
||
'Nm3d-1-ch4_1_diff','Nm3d-1-ch4_2_diff','Nm3d-1-ch4_2h_mean',
|
||
'Nm3d-1-ch4_2h_max','Nm3d-1-ch4_2h_min','Nm3d-1-ch4_2h_median',
|
||
'Nm3d-1-ch4_2h_std','Nm3d-1-ch4_2h_var','Nm3d-1-ch4_2h_skew',
|
||
'Nm3d-1-ch4_2h_kurt','S_gas_ch4_15_first','S_gas_ch4_30_first',
|
||
'S_gas_ch4_45_first','S_gas_ch4_60_first','S_gas_ch4_1h_mean',
|
||
'S_gas_ch4_1h_max','S_gas_ch4_1h_min','S_gas_ch4_1h_median',
|
||
'S_gas_ch4_1h_std','S_gas_ch4_1h_var','S_gas_ch4_1h_skew',
|
||
'S_gas_ch4_1h_kurt','S_gas_ch4_1_diff','S_gas_ch4_2_diff',
|
||
'S_gas_ch4_2h_mean','S_gas_ch4_2h_max','S_gas_ch4_2h_min',
|
||
'S_gas_ch4_2h_median','S_gas_ch4_2h_std','S_gas_ch4_2h_var',
|
||
'S_gas_ch4_2h_skew','S_gas_ch4_2h_kurt','X_ch_d_mean','X_ch_d_max',
|
||
'X_ch_d_min','X_ch_d_median','X_ch_d_std','X_ch_d_var','X_ch_d_skew',
|
||
'X_ch_d_kurt','X_pr_d_mean','X_pr_d_max','X_pr_d_min','X_pr_d_median',
|
||
'X_pr_d_std','X_pr_d_var','X_pr_d_skew','X_pr_d_kurt','X_li_d_mean',
|
||
'X_li_d_max','X_li_d_min','X_li_d_median','X_li_d_std','X_li_d_var',
|
||
'X_li_d_skew','X_li_d_kurt','X_I_d_mean','X_I_d_max','X_I_d_min',
|
||
'X_I_d_median','X_I_d_std','X_I_d_var','X_I_d_skew','X_I_d_kurt',
|
||
'Q_d_mean','Q_d_max','Q_d_min','Q_d_median','Q_d_std','Q_d_var',
|
||
'Q_d_skew','Q_d_kurt','pH_d_mean','pH_d_max','pH_d_min','pH_d_median',
|
||
'pH_d_std','pH_d_var','pH_d_skew','pH_d_kurt','Nm3d-1-ch4_d_mean',
|
||
'Nm3d-1-ch4_d_max','Nm3d-1-ch4_d_min','Nm3d-1-ch4_d_median',
|
||
'Nm3d-1-ch4_d_std','Nm3d-1-ch4_d_var','Nm3d-1-ch4_d_skew',
|
||
'Nm3d-1-ch4_d_kurt','S_gas_ch4_d_mean','S_gas_ch4_d_max',
|
||
'S_gas_ch4_d_min','S_gas_ch4_d_median','S_gas_ch4_d_std',
|
||
'S_gas_ch4_d_var','S_gas_ch4_d_skew','S_gas_ch4_d_kurt']
|
||
target_1_list = ['Nm3d-1-ch4_1_after', 'Nm3d-1-ch4_2_after', 'Nm3d-1-ch4_3_after','Nm3d-1-ch4_4_after']
|
||
target_2_list = [
|
||
'S_gas_ch4_1_after', 'S_gas_ch4_2_after','S_gas_ch4_3_after', 'S_gas_ch4_4_after'
|
||
]
|
||
|
||
if __name__ == "__main__":
|
||
data = get_orign_data()
|
||
x_cols = ['X_ch','X_pr','X_li','X_I','Q','pH','Nm3d-1-ch4','S_gas_ch4']
|
||
target_1 = 'Nm3d-1-ch4'
|
||
target_2 = 'S_gas_ch4'
|
||
data = data_feature(data, x_cols)
|
||
|
||
data = data_result(data, target_1)
|
||
data = data_result(data, target_2)
|
||
|
||
data = data.iloc[95:,:]
|
||
data = data.iloc[95:,:]
|
||
data = data.iloc[:-4,:]
|
||
# 拆分训练集,测试集
|
||
test_data = data.iloc[-14*24*4:,:]
|
||
train_data = data.iloc[:-14*24*4,:]
|
||
|
||
# 将测试数据保留,用于样例数据
|
||
test_data = test_data[['index','X_ch','X_pr','X_li','X_I','Q','pH','Nm3d-1-ch4','S_gas_ch4']]
|
||
test_data.to_csv(os.path.join(current_directory,'jiawanyuce/data/jiawan_test.csv'),index=False)
|
||
|
||
|
||
# 流量模型训练保存
|
||
X, y = np.array(train_data[feature_list].values), np.array(train_data[target_1_list].values)
|
||
|
||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||
|
||
# 创建 DMatrix 数据集
|
||
dtrain = xgb.DMatrix(X_train, label=y_train)
|
||
dtest = xgb.DMatrix(X_test, label=y_test)
|
||
|
||
# 设置参数
|
||
params = {
|
||
'objective': 'reg:squarederror',
|
||
'eval_metric': 'rmse',
|
||
'eta': 0.1,
|
||
'max_depth': 3,
|
||
}
|
||
|
||
# 训练模型
|
||
model = xgb.train(params, dtrain, num_boost_round=100)
|
||
|
||
# 进行预测
|
||
predictions = model.predict(dtest)
|
||
|
||
# 输出预测结果
|
||
# print(predictions)
|
||
|
||
regression_metrics(predictions,y_test)
|
||
|
||
# 模型保存
|
||
joblib.dump(model, os.path.join(current_directory,'jiawanyuce/liuliang_model/xgb_model_liuliang.pkl'))
|
||
|
||
|
||
|
||
"""流量模型训练保存"""
|
||
X, y = np.array(train_data[feature_list].values), np.array(train_data[target_2_list].values)
|
||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
||
# 创建 DMatrix 数据集
|
||
dtrain = xgb.DMatrix(X_train, label=y_train)
|
||
dtest = xgb.DMatrix(X_test, label=y_test)
|
||
# 设置参数
|
||
params = {
|
||
'objective': 'reg:squarederror',
|
||
'eval_metric': 'rmse',
|
||
'eta': 0.1,
|
||
'max_depth': 3,
|
||
}
|
||
|
||
# 训练模型
|
||
model = xgb.train(params, dtrain, num_boost_round=100)
|
||
|
||
# 进行预测
|
||
predictions = model.predict(dtest)
|
||
|
||
# 输出预测结果
|
||
# print(predictions)
|
||
|
||
regression_metrics(predictions,y_test)
|
||
|
||
# 模型保存
|
||
joblib.dump(model, os.path.join(current_directory,'jiawanyuce/qixiangnongdu_model/xgb_model_qixiangnongdu.pkl'))
|
||
|
||
|
||
|
||
|