add batch predction

This commit is contained in:
赵敬皓 2023-06-28 12:34:57 +08:00
parent 527f92cdbe
commit d3779c8f9a
6 changed files with 6624 additions and 6545 deletions

109
README.md
View File

@ -6,22 +6,26 @@
输入为json串具体内容如下 输入为json串具体内容如下
``` ```
{ {
"key": "xxxx", "key": ['company', 'unit', 'time', 'boiler', 'steam', ..., 'lat']
"data":{ "data":[
"time": "", [第一条数据顺序与key相同],
"boiler": "", [第二条数据],
... //以下省略 ...
} [第n条数据],
]
} }
``` ```
其中`key`字段可用于并发时确认请求来源,`data`字段为传入的机组和排放参数,具体如下: `key`字段表示输入数据的列名,`data`字段为传入的机组和排放值的二维数组,具体如下:
|字段|类型|含义|是否由爬虫获取| |字段|类型|含义|是否由爬虫获取|
|----|----|----|----| |----|----|----|----|
|company|str|电厂名称|否|
|unit|str|机组编号|否|
|time|str|时间,'yyyy-mm-dd hh:MM:ss'|是| |time|str|时间,'yyyy-mm-dd hh:MM:ss'|是|
|boiler|str|锅炉类型|否| |boiler|str|锅炉类型|否|
|steam|str|汽轮机类型|否| |steam|str|汽轮机类型|否|
|cold|str|冷却方式|否| |cold|str|冷却方式|否|
|pressure|str|压力参数|否| |pressure|str|压力参数|否|
|capacity|float|装机容量|否|
|nox|float|氮氧化物($NO_x$)浓度数值($mg/m^3$)|是| |nox|float|氮氧化物($NO_x$)浓度数值($mg/m^3$)|是|
|so2|float|二氧化硫($SO_2$)浓度数值($mg/m^3$)|是| |so2|float|二氧化硫($SO_2$)浓度数值($mg/m^3$)|是|
|smoke|float|烟气浓度($mg/m^3$)|是| |smoke|float|烟气浓度($mg/m^3$)|是|
@ -38,14 +42,16 @@
``` ```
{ {
"code": 200, # 若调用错误code为406 "code": 200, # 若调用错误code为406
"request_key": "xxxx",
"msg": "若code为406返回该字段", "msg": "若code为406返回该字段",
"data": {...} # 若code为200返回该字段 "data": {"company": [], "unit":[], "coal":[], ..., "pm10":[]} # 若code为200返回该字段
} }
``` ```
其中`key`字段可用于并发时确认请求来源data字段对应模型返回的当前时刻的结果,具体如下: `data`字段对应模型返回的当前时刻的结果,具体如下:
|字段|类型|含义| |字段|类型|含义|
|----|----|----| |----|----|----|
|company|str|电厂名称|
|unit|str|机组编号|
|time|str|时间,'yyyy-mm-dd hh:MM:ss'|
|coal|float|该时刻对应的煤耗(t)| |coal|float|该时刻对应的煤耗(t)|
|co|float|一氧化碳排放量(kg)| |co|float|一氧化碳排放量(kg)|
|co2|float|二氧化碳排放量(kg)| |co2|float|二氧化碳排放量(kg)|
@ -57,39 +63,67 @@
### 输入示例 ### 输入示例
``` ```
{ {
"key": "123456", "key": ["boiler",
"data": { "steam",
"time": "2023-01-02 03:04:05", "cold",
"boiler": "循环流化床锅炉", "pressure",
"steam": "凝气式", "nox",
"cold": "水冷-开式循环", "so2",
"pressure": "超超临界", "smoke",
"nox": "12", "flow",
"so2": "0.15", "o2",
"smoke": "12", "temp",
"flow": "5000000", "evaporation",
"o2": "23", "caloric",
"temp": "55", "capacity",
"evaporation": "123", "lon",
"caloric": "23", "lat",
"capacity": "234", "time",
"lon": "122", "company",
"lat": "33" "unit"],
} "data": [
["煤粉", "凝气式", "空冷-直接空冷", "超超临界", 3.68, 21.42, 3.60, 1644322.11, 6.78, 56.01, 2080.0, 12.96, 600, 118.72, 43.59, "2023-06-28 11:21:07", "浙江秀舟热电", "1"],
["W火焰炉", "凝气式", "空冷-间接空冷", "超临界", 4.68, 12.42, 2.60, 1147852.11, 9.32, 56.01, 1060.0, 12.96, 1000, 118.72, 43.59, "2023-06-28 11:21:07", "浙江秀舟热电", "2"],
]
} }
``` ```
### 输出示例(调用成功) ### 输出示例(调用成功)
``` ```
{ {
"code": 200, "code": 200,
"key": "123456",
"data": { "data": {
"coal": 31.213128645837465, "company": [
"co": 62.42625729167493, "浙江秀舟热电",
"vocs": 67.42035787500893, "浙江秀舟热电"
"pm25": 0.72, ],
"pm10": 2.7600000000000002, "unit": [
"co2": 65.24910397752144 "1",
"2"
],
"time": [
"2023-06-28 11:21:07",
"2023-06-28 11:21:07"
]
"coal": [
473.88829463709556,
444.42982231624063
],
"co": [
947.7765892741911,
888.8596446324813
],
"vocs": [
1023.5987164161264,
959.9684162030799
],
"pm25": [
0.35560232895598914,
0.28232093872217645
],
"pm10": [
1.3631422609979584,
1.0822302651016766
]
} }
} }
``` ```
@ -97,7 +131,6 @@
``` ```
{ {
"code": 406, "code": 406,
"key": "123456", "msg": "Input is None, please check!" # or other Exception
"msg": "Input is None, please check!"
} }
``` ```

17
config/columns_dict.json Normal file
View File

@ -0,0 +1,17 @@
{
"boiler": "生产设备类型",
"steam": "汽轮机类型",
"cold": "冷却方式",
"pressure": "压力参数",
"nox": "0_r_NOx",
"so2": "0_r_SO2",
"smoke": "0_r_smoke",
"flow": "0_flow",
"o2": "0_O2",
"temp": "0_temp",
"evaporation": "额定蒸发量_t/h",
"caloric": "低位发热量",
"capacity": "单机容量MW",
"lon": "lon",
"lat": "lat"
}

File diff suppressed because it is too large Load Diff

View File

@ -82,27 +82,52 @@ def predict(his_data, input_data, model:lgb.Booster, object_cols, emission_facto
return {'coal': coal_cost, 'co':co, 'vocs':vocs, 'pm25':pm25, 'pm10':pm10, 'co2':co2} return {'coal': coal_cost, 'co':co, 'vocs':vocs, 'pm25':pm25, 'pm10':pm10, 'co2':co2}
def predict_df(his_data, input_data:pd.DataFrame, model:lgb.Booster, object_cols:list, emission_factors):
feature_names = model.feature_name()
input_data_cp = input_data.copy()
ignore_cols = ['company', 'unit', 'time']
input_data['time'] = pd.to_datetime(input_data['time'])
input_data['day_of_week'] = input_data['time'].apply(lambda x: x.weekday())
input_data['month'] = input_data['time'].apply(lambda x: x.month)
input_data['hour'] = input_data['time'].apply(lambda x: x.hour)
input_data.drop(columns='time', inplace=True)
# for col in object_cols:
# input_data[col] = input_data[col].apply(lambda x: str(x).strip())
num_cols = [x for x in input_data.columns if x not in ignore_cols and x not in object_cols]
for col in num_cols:
input_data[col] = input_data[col].astype(float)
input_data['0_c_NOx'] = input_data.apply(lambda x:x['0_r_NOx'] * x['0_flow'], axis=1)
input_data['0_c_SO2'] = input_data.apply(lambda x:x['0_r_SO2'] * x['0_flow'], axis=1)
input_data['0_c_smoke'] = input_data.apply(lambda x:x['0_r_smoke'] * x['0_flow'], axis=1)
input_data['低位发热量'] = input_data['低位发热量'].apply(lambda x: (x/1000 if x > 9999 else x))
num_cols = [x for x in input_data.columns if x not in ignore_cols and x not in object_cols]
new_data = input_data.copy()
for col in num_cols:
new_data[col] = np.log1p(new_data[col])
total_data = pd.concat([his_data, new_data])
new_inputs = pd.get_dummies(total_data, columns=object_cols)
input_data_cp['coal'] = np.expm1(model.predict(new_inputs.iloc[-input_data.shape[0]:][feature_names]))
input_data_cp['co'] = input_data_cp.apply(lambda x: cal_coal_cost_emission(x['coal'], x['生产设备类型'], emission_factors.get('co')), axis=1)
input_data_cp['vocs'] = input_data_cp.apply(lambda x: cal_coal_cost_emission(x['coal'], x['生产设备类型'], emission_factors.get('vocs')), axis=1)
input_data_cp['pm25'] = input_data.apply(lambda x: cal_PM(x['0_c_smoke'], x['生产设备类型'], emission_factors.get('pm25')), axis=1)
input_data_cp['pm10'] = input_data.apply(lambda x: cal_PM(x['0_c_smoke'], x['生产设备类型'], emission_factors.get('pm10')), axis=1)
assert 'coal' in input_data_cp.columns
input_data_cp['co2'] = input_data_cp.apply(lambda x: cal_CO2(x['coal'], x['低位发热量']), axis=1)
out_cols = ['company', 'unit', 'time', 'coal', 'co', 'vocs', 'pm25', 'pm10']
rst = input_data_cp[out_cols].to_dict(orient='list')
return rst
if __name__ == '__main__': if __name__ == '__main__':
history = load_history_data('../data/data_sample.csv') history = load_history_data('../data/data_sample.csv')
object_cols = load_config('../config/object_cols.json') object_cols = load_config('../config/object_cols.json')
model = load_lgb_model('../model_files/hour_best_model.txt') model = load_lgb_model('../model_files/hour_best_model.txt')
emission_factors = load_config('../config/emission_factor.json') emission_factors = load_config('../config/emission_factor.json')
inputs = { inputs = history.iloc[:2]
"time": "2023-01-02 03:04:05", for col in inputs.columns[8:]:
"boiler": "循环流化床锅炉", inputs[col] = np.expm1(inputs[col])
"steam": "凝气式", inputs['time'] = dt.datetime.now()
"cold": "水冷-开式循环", inputs['company'] = '浙江秀舟热电'
"pressure": "超超临界", inputs['unit'] = '1'
"nox": "12", print(inputs.columns)
"so2":"0.15", print(predict_df(history, inputs, model, object_cols, emission_factors))
"smoke":"12",
"flow":"5000000",
"o2": "23",
"temp": "55",
"evaporation": "123",
"caloric": "23",
"capacity": "234",
"lon": "122",
"lat":"33",
}
print(predict(history, inputs, model, object_cols, emission_factors))

16
run.py
View File

@ -4,17 +4,19 @@ os.environ['CUDA_VISIBLE_DEVICES'] = '-1'
import json import json
from flask import Flask, request, make_response from flask import Flask, request, make_response
from logzero import logger from logzero import logger
import pandas as pd
current_path = os.path.dirname(os.path.abspath(__file__)) # for local current_path = os.path.dirname(os.path.abspath(__file__)) # for local
# current_path = "/app" # for docker # current_path = "/app" # for docker
logger.info(f"{current_path}") logger.info(f"{current_path}")
from models.lgb_predict import load_config, load_history_data, load_lgb_model, predict from models.lgb_predict import load_config, load_history_data, load_lgb_model, predict_df
lgb_model = load_lgb_model(model_path=f"{current_path}/model_files/hour_best_model.txt") lgb_model = load_lgb_model(model_path=f"{current_path}/model_files/hour_best_model.txt")
object_cols = load_config(f"{current_path}/config/object_cols.json") object_cols = load_config(f"{current_path}/config/object_cols.json")
history_data = load_history_data(data_path=f"{current_path}/data/data_sample.csv") history_data = load_history_data(data_path=f"{current_path}/data/data_sample.csv")
emission_factors = load_config(f"{current_path}/config/emission_factor.json") emission_factors = load_config(f"{current_path}/config/emission_factor.json")
col_dict =load_config(f"{current_path}/config/columns_dict.json")
app = Flask(__name__) app = Flask(__name__)
@ -24,13 +26,15 @@ def run_case_check():
resp_info = dict() resp_info = dict()
if request.method == "POST": if request.method == "POST":
data = request.json.get('data') data = request.json.get('data')
key = request.json.get('key') columns = request.json.get('key')
resp_info['key'] = key new_cols = [col_dict.get(x) if x in col_dict else x for x in columns]
logger.info("request key: {key}") print(new_cols)
logger.info(data) df = pd.DataFrame.from_records(data, columns=new_cols)
logger.info(f"request key: {columns}")
logger.info(f"传入{len(data)}条数据")
if data is not None and len(data) != 0: if data is not None and len(data) != 0:
try: try:
rst = predict(history_data, data, lgb_model, object_cols, emission_factors) rst = predict_df(history_data, df, lgb_model, object_cols, emission_factors)
resp_info["code"] = 200 resp_info["code"] = 200
resp_info["data"] = rst resp_info["data"] = rst
except Exception as e: except Exception as e: