emission_detect_model/models/lgb_predict.py

133 lines
6.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*-coding:utf-8 -*-
import lightgbm as lgb
import numpy as np
import pandas as pd
import json
import datetime as dt
def load_history_data(data_path:str):
data = pd.read_csv(data_path)
return data
def load_config(cfg_path:str):
with open(cfg_path, 'r', encoding='utf-8') as fr:
config = json.load(fr)
return config
def load_lgb_model(model_path:str):
return lgb.Booster(model_file=model_path)
def cal_CO2(coal_cost, ncv):
return coal_cost * ncv * 26.37e-3 * 0.94 * 44 / 12
def cal_coal_cost_emission(coal_cost, boiler, emission_factors):
factor = emission_factors.get(boiler)
if factor is not None:
return coal_cost * factor
else:
return emission_factors.get("default") * coal_cost
def cal_PM(c_smoke, boiler, emission_factors):
factor = emission_factors.get(boiler)
if factor is not None:
return c_smoke * factor / 1e6
else:
return emission_factors.get("default") * c_smoke / 1e6
def predict(his_data, input_data, model:lgb.Booster, object_cols, emission_factors):
feature_names = model.feature_name()
date = dt.datetime.strptime(input_data.get('time'), '%Y-%m-%d %H:%M:%S')
r_NOx = float(input_data.get('nox'))
r_SO2 = float(input_data.get('so2'))
r_smoke = float(input_data.get('smoke'))
flow = float(input_data.get('flow'))
c_NOx, c_SO2, c_smoke = flow * np.asarray([r_NOx, r_SO2, r_smoke])
caloric = float(input_data.get("caloric"))
if caloric > 1000:
caloric = caloric / 1000
inputs = {
"生产设备类型": input_data.get('boiler'),
"汽轮机类型": input_data.get('steam'),
"冷却方式": input_data.get('cold'),
"压力参数": input_data.get('pressure'),
"day_of_week": date.weekday(),
"month": date.month,
"hour": date.hour,
"0_r_NOx":np.log1p(float(r_NOx)),
"0_r_SO2":np.log1p(float(r_SO2)),
"0_r_smoke":np.log1p(float(r_smoke)),
"0_c_NOx": np.log1p(float(c_NOx)),
"0_c_SO2": np.log1p(float(c_SO2)),
"0_c_smoke": np.log1p(float(c_smoke)),
"0_flow": np.log1p(float(flow)),
"0_O2": np.log1p(float(input_data.get("o2"))),
"0_temp": np.log1p(float(input_data.get("temp"))),
"额定蒸发量_t/h": np.log1p(float(input_data.get("evaporation"))),
"低位发热量": np.log1p(caloric),
"单机容量MW": np.log1p(float(input_data.get("capacity"))),
"lon": np.log1p(float(input_data.get("lon"))),
"lat": np.log1p(float(input_data.get("lat"))),
}
new_df = pd.DataFrame.from_dict(inputs, orient='index').T
total_data = pd.concat([his_data, new_df])
new_inputs = pd.get_dummies(total_data, columns=object_cols)
new_inputs = new_inputs[feature_names].iloc[-1].values
coal_cost = np.expm1(model.predict([new_inputs])[0])
co = cal_coal_cost_emission(coal_cost, input_data.get('boiler'), emission_factors.get('co'))
vocs = cal_coal_cost_emission(coal_cost, input_data.get('boiler'), emission_factors.get('vocs'))
pm25 = cal_PM(c_smoke, input_data.get('boiler'), emission_factors.get('pm25'))
pm10 = cal_PM(c_smoke, input_data.get('boiler'), emission_factors.get('pm10'))
co2 = cal_CO2(coal_cost, caloric)
return {'coal': coal_cost, 'co':co, 'vocs':vocs, 'pm25':pm25, 'pm10':pm10, 'co2':co2}
def predict_df(his_data, input_data:pd.DataFrame, model:lgb.Booster, object_cols:list, emission_factors):
feature_names = model.feature_name()
input_data_cp = input_data.copy()
ignore_cols = ['company', 'unit', 'time']
input_data['time'] = pd.to_datetime(input_data['time'])
input_data['day_of_week'] = input_data['time'].apply(lambda x: x.weekday())
input_data['month'] = input_data['time'].apply(lambda x: x.month)
input_data['hour'] = input_data['time'].apply(lambda x: x.hour)
input_data.drop(columns='time', inplace=True)
# for col in object_cols:
# input_data[col] = input_data[col].apply(lambda x: str(x).strip())
num_cols = [x for x in input_data.columns if x not in ignore_cols and x not in object_cols]
for col in num_cols:
input_data[col] = input_data[col].astype(float)
input_data['0_c_NOx'] = input_data.apply(lambda x:x['0_r_NOx'] * x['0_flow'], axis=1)
input_data['0_c_SO2'] = input_data.apply(lambda x:x['0_r_SO2'] * x['0_flow'], axis=1)
input_data['0_c_smoke'] = input_data.apply(lambda x:x['0_r_smoke'] * x['0_flow'], axis=1)
input_data['低位发热量'] = input_data['低位发热量'].apply(lambda x: (x/1000 if x > 9999 else x))
num_cols = [x for x in input_data.columns if x not in ignore_cols and x not in object_cols]
new_data = input_data.copy()
for col in num_cols:
new_data[col] = np.log1p(new_data[col])
total_data = pd.concat([his_data, new_data])
new_inputs = pd.get_dummies(total_data, columns=object_cols)
input_data_cp['coal'] = np.expm1(model.predict(new_inputs.iloc[-input_data.shape[0]:][feature_names]))
input_data_cp['co'] = input_data_cp.apply(lambda x: cal_coal_cost_emission(x['coal'], x['生产设备类型'], emission_factors.get('co')), axis=1)
input_data_cp['vocs'] = input_data_cp.apply(lambda x: cal_coal_cost_emission(x['coal'], x['生产设备类型'], emission_factors.get('vocs')), axis=1)
input_data_cp['pm25'] = input_data.apply(lambda x: cal_PM(x['0_c_smoke'], x['生产设备类型'], emission_factors.get('pm25')), axis=1)
input_data_cp['pm10'] = input_data.apply(lambda x: cal_PM(x['0_c_smoke'], x['生产设备类型'], emission_factors.get('pm10')), axis=1)
assert 'coal' in input_data_cp.columns
input_data_cp['co2'] = input_data_cp.apply(lambda x: cal_CO2(x['coal'], x['低位发热量']), axis=1)
out_cols = ['company', 'unit', 'time', 'coal', 'co', 'vocs', 'co2', 'pm25', 'pm10']
rst = input_data_cp[out_cols].to_dict(orient='list')
return rst
if __name__ == '__main__':
history = load_history_data('../data/data_sample.csv')
object_cols = load_config('../config/object_cols.json')
model = load_lgb_model('../model_files/hour_best_model.txt')
emission_factors = load_config('../config/emission_factor.json')
inputs = history.iloc[:2]
for col in inputs.columns[8:]:
inputs[col] = np.expm1(inputs[col])
inputs['time'] = dt.datetime.now()
inputs['company'] = '浙江秀舟热电'
inputs['unit'] = '1'
print(inputs.columns)
print(predict_df(history, inputs, model, object_cols, emission_factors))