133 lines
6.4 KiB
Python
133 lines
6.4 KiB
Python
# -*-coding:utf-8 -*-
|
||
import lightgbm as lgb
|
||
import numpy as np
|
||
import pandas as pd
|
||
import json
|
||
import datetime as dt
|
||
|
||
|
||
def load_history_data(data_path:str):
|
||
data = pd.read_csv(data_path)
|
||
return data
|
||
|
||
def load_config(cfg_path:str):
|
||
with open(cfg_path, 'r', encoding='utf-8') as fr:
|
||
config = json.load(fr)
|
||
return config
|
||
|
||
def load_lgb_model(model_path:str):
|
||
return lgb.Booster(model_file=model_path)
|
||
|
||
def cal_CO2(coal_cost, ncv):
|
||
return coal_cost * ncv * 26.37e-3 * 0.94 * 44 / 12
|
||
|
||
def cal_coal_cost_emission(coal_cost, boiler, emission_factors):
|
||
factor = emission_factors.get(boiler)
|
||
if factor is not None:
|
||
return coal_cost * factor
|
||
else:
|
||
return emission_factors.get("default") * coal_cost
|
||
|
||
def cal_PM(c_smoke, boiler, emission_factors):
|
||
factor = emission_factors.get(boiler)
|
||
if factor is not None:
|
||
return c_smoke * factor / 1e6
|
||
else:
|
||
return emission_factors.get("default") * c_smoke / 1e6
|
||
|
||
def predict(his_data, input_data, model:lgb.Booster, object_cols, emission_factors):
|
||
feature_names = model.feature_name()
|
||
date = dt.datetime.strptime(input_data.get('time'), '%Y-%m-%d %H:%M:%S')
|
||
r_NOx = float(input_data.get('nox'))
|
||
r_SO2 = float(input_data.get('so2'))
|
||
r_smoke = float(input_data.get('smoke'))
|
||
flow = float(input_data.get('flow'))
|
||
c_NOx, c_SO2, c_smoke = flow * np.asarray([r_NOx, r_SO2, r_smoke])
|
||
caloric = float(input_data.get("caloric"))
|
||
if caloric > 1000:
|
||
caloric = caloric / 1000
|
||
inputs = {
|
||
"生产设备类型": input_data.get('boiler'),
|
||
"汽轮机类型": input_data.get('steam'),
|
||
"冷却方式": input_data.get('cold'),
|
||
"压力参数": input_data.get('pressure'),
|
||
"day_of_week": date.weekday(),
|
||
"month": date.month,
|
||
"hour": date.hour,
|
||
"0_r_NOx":np.log1p(float(r_NOx)),
|
||
"0_r_SO2":np.log1p(float(r_SO2)),
|
||
"0_r_smoke":np.log1p(float(r_smoke)),
|
||
"0_c_NOx": np.log1p(float(c_NOx)),
|
||
"0_c_SO2": np.log1p(float(c_SO2)),
|
||
"0_c_smoke": np.log1p(float(c_smoke)),
|
||
"0_flow": np.log1p(float(flow)),
|
||
"0_O2": np.log1p(float(input_data.get("o2"))),
|
||
"0_temp": np.log1p(float(input_data.get("temp"))),
|
||
"额定蒸发量_t/h": np.log1p(float(input_data.get("evaporation"))),
|
||
"低位发热量": np.log1p(caloric),
|
||
"单机容量(MW)": np.log1p(float(input_data.get("capacity"))),
|
||
"lon": np.log1p(float(input_data.get("lon"))),
|
||
"lat": np.log1p(float(input_data.get("lat"))),
|
||
}
|
||
new_df = pd.DataFrame.from_dict(inputs, orient='index').T
|
||
total_data = pd.concat([his_data, new_df])
|
||
new_inputs = pd.get_dummies(total_data, columns=object_cols)
|
||
new_inputs = new_inputs[feature_names].iloc[-1].values
|
||
coal_cost = np.expm1(model.predict([new_inputs])[0])
|
||
co = cal_coal_cost_emission(coal_cost, input_data.get('boiler'), emission_factors.get('co'))
|
||
vocs = cal_coal_cost_emission(coal_cost, input_data.get('boiler'), emission_factors.get('vocs'))
|
||
pm25 = cal_PM(c_smoke, input_data.get('boiler'), emission_factors.get('pm25'))
|
||
pm10 = cal_PM(c_smoke, input_data.get('boiler'), emission_factors.get('pm10'))
|
||
co2 = cal_CO2(coal_cost, caloric)
|
||
return {'coal': coal_cost, 'co':co, 'vocs':vocs, 'pm25':pm25, 'pm10':pm10, 'co2':co2}
|
||
|
||
|
||
def predict_df(his_data, input_data:pd.DataFrame, model:lgb.Booster, object_cols:list, emission_factors):
|
||
feature_names = model.feature_name()
|
||
input_data_cp = input_data.copy()
|
||
ignore_cols = ['company', 'unit', 'time']
|
||
input_data['time'] = pd.to_datetime(input_data['time'])
|
||
input_data['day_of_week'] = input_data['time'].apply(lambda x: x.weekday())
|
||
input_data['month'] = input_data['time'].apply(lambda x: x.month)
|
||
input_data['hour'] = input_data['time'].apply(lambda x: x.hour)
|
||
input_data.drop(columns='time', inplace=True)
|
||
# for col in object_cols:
|
||
# input_data[col] = input_data[col].apply(lambda x: str(x).strip())
|
||
num_cols = [x for x in input_data.columns if x not in ignore_cols and x not in object_cols]
|
||
for col in num_cols:
|
||
input_data[col] = input_data[col].astype(float)
|
||
input_data['0_c_NOx'] = input_data.apply(lambda x:x['0_r_NOx'] * x['0_flow'], axis=1)
|
||
input_data['0_c_SO2'] = input_data.apply(lambda x:x['0_r_SO2'] * x['0_flow'], axis=1)
|
||
input_data['0_c_smoke'] = input_data.apply(lambda x:x['0_r_smoke'] * x['0_flow'], axis=1)
|
||
input_data['低位发热量'] = input_data['低位发热量'].apply(lambda x: (x/1000 if x > 9999 else x))
|
||
num_cols = [x for x in input_data.columns if x not in ignore_cols and x not in object_cols]
|
||
new_data = input_data.copy()
|
||
for col in num_cols:
|
||
new_data[col] = np.log1p(new_data[col])
|
||
total_data = pd.concat([his_data, new_data])
|
||
new_inputs = pd.get_dummies(total_data, columns=object_cols)
|
||
input_data_cp['coal'] = np.expm1(model.predict(new_inputs.iloc[-input_data.shape[0]:][feature_names]))
|
||
input_data_cp['co'] = input_data_cp.apply(lambda x: cal_coal_cost_emission(x['coal'], x['生产设备类型'], emission_factors.get('co')), axis=1)
|
||
input_data_cp['vocs'] = input_data_cp.apply(lambda x: cal_coal_cost_emission(x['coal'], x['生产设备类型'], emission_factors.get('vocs')), axis=1)
|
||
|
||
input_data_cp['pm25'] = input_data.apply(lambda x: cal_PM(x['0_c_smoke'], x['生产设备类型'], emission_factors.get('pm25')), axis=1)
|
||
input_data_cp['pm10'] = input_data.apply(lambda x: cal_PM(x['0_c_smoke'], x['生产设备类型'], emission_factors.get('pm10')), axis=1)
|
||
assert 'coal' in input_data_cp.columns
|
||
input_data_cp['co2'] = input_data_cp.apply(lambda x: cal_CO2(x['coal'], x['低位发热量']), axis=1)
|
||
out_cols = ['company', 'unit', 'time', 'coal', 'co', 'vocs', 'co2', 'pm25', 'pm10']
|
||
rst = input_data_cp[out_cols].to_dict(orient='list')
|
||
return rst
|
||
|
||
if __name__ == '__main__':
|
||
history = load_history_data('../data/data_sample.csv')
|
||
object_cols = load_config('../config/object_cols.json')
|
||
model = load_lgb_model('../model_files/hour_best_model.txt')
|
||
emission_factors = load_config('../config/emission_factor.json')
|
||
inputs = history.iloc[:2]
|
||
for col in inputs.columns[8:]:
|
||
inputs[col] = np.expm1(inputs[col])
|
||
inputs['time'] = dt.datetime.now()
|
||
inputs['company'] = '浙江秀舟热电'
|
||
inputs['unit'] = '1'
|
||
print(inputs.columns)
|
||
print(predict_df(history, inputs, model, object_cols, emission_factors)) |