151 KiB
151 KiB
In [1]:
from sklearn.multioutput import MultiOutputRegressor import xgboost as xgb import pandas as pd import numpy as np from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score from sklearn.model_selection import train_test_split
In [2]:
total_data = pd.read_excel('train_data.xlsx') total_data.tail(1)
Out[2]:
企业名称 | 机组编号 | 铭牌容量 (MW) | 机组类型 | 参数分类 | 冷凝器型式 | 入炉煤低位热值(kJ/kg) | 燃煤挥发份Var(%) | 燃煤灰份Aar(%) | 煤种 | 所处地区 | longitude | latitude | altitude | 发电碳排放因子(kg/kWh) | 供热碳排放因子(kg/MJ) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
5740 | 榆能榆神热电有限公司 | 2 | 350.0 | 抽凝式 | 超临界 | 间接空冷 | 25514.0 | 38.84 | 7.28 | 烟煤 | 陕西省 | 109.820265 | 38.304383 | 1151 | 0.661759 | 0.091483 |
In [3]:
total_data.columns
Out[3]:
Index(['企业名称', '机组编号', '铭牌容量 (MW)', '机组类型', '参数分类', '冷凝器型式', '入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', '煤种', '所处地区', 'longitude', 'latitude', 'altitude', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'], dtype='object')
In [4]:
unit_data = pd.read_excel('./data/煤电机组情况(含企业名称).xlsx') unit_data
Out[4]:
发电类型 | 地区 | 城市 | 企业名称 | 机组编号 | 机组状态 | 机组数量 | 单机容量(MW) | 总容量(MW) | 核心设备类型 | 汽轮机类型 | 压力参数 | 冷却方式 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 煤电 | 安徽省 | 安庆市 | 国能神皖安庆发电有限责任公司 | 1 | 在役 | 1 | 320.0 | 320.0 | 煤粉锅炉 | 凝气式 | 亚临界 | 水冷-开式循环 |
1 | 煤电 | 安徽省 | 安庆市 | 国能神皖安庆发电有限责任公司 | 2 | 在役 | 1 | 320.0 | 320.0 | 煤粉锅炉 | 凝气式 | 亚临界 | 水冷-开式循环 |
2 | 煤电 | 安徽省 | 安庆市 | 国能神皖安庆发电有限责任公司 | 3 | 在役 | 1 | 1000.0 | 1000.0 | 煤粉锅炉 | 凝气式 | 超超临界 | 水冷-闭式循环 |
3 | 煤电 | 安徽省 | 安庆市 | 国能神皖安庆发电有限责任公司 | 4 | 在役 | 1 | 1000.0 | 1000.0 | 煤粉锅炉 | 凝气式 | 超超临界 | 水冷-闭式循环 |
4 | 煤电 | 安徽省 | 安庆市 | 安徽华泰林浆纸有限公司 | 化学浆生产线 | 在役 | 1 | 40.0 | 40.0 | 煤粉锅炉 | 抽凝式 | 高压 | 水冷-闭式循环 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5317 | 煤电 | 重庆市 | 长寿区 | 中国石化集团重庆川维化工有限公司 | B4 | 在役 | 1 | 49.0 | 49.0 | 煤粉锅炉 | 抽凝式 | 高压 | 水冷-闭式循环 |
5318 | 煤电 | 重庆市 | 长寿区 | 威立雅长扬热能(重庆)有限责任公司 | 1 | 在役 | 1 | 25.0 | 25.0 | 循环流化床锅炉 | 抽凝式 | 高压 | 水冷-闭式循环 |
5319 | 煤电 | 重庆市 | 长寿区 | 威立雅长扬热能(重庆)有限责任公司 | 2 | 在役 | 1 | 25.0 | 25.0 | 循环流化床锅炉 | 抽背式 | 高压 | 水冷-闭式循环 |
5320 | 煤电 | 重庆市 | 长寿区 | 重庆恩力吉投资有限责任公司 | 2 | 在役 | 1 | 30.0 | 30.0 | 循环流化床锅炉 | 背压式 | 高压 | 其他 |
5321 | 煤电 | 重庆市 | 长寿区 | 重庆恩力吉投资有限责任公司 | 3 | 在役 | 1 | 125.0 | 125.0 | 循环流化床锅炉 | 抽凝式 | 高压 | 水冷-闭式循环 |
5322 rows × 13 columns
In [5]:
total_data.drop_duplicates(inplace=True) total_data.shape
Out[5]:
(5694, 16)
In [6]:
total_data['机组编号'] = total_data['机组编号'].astype(str) unit_data['机组编号'] = unit_data['机组编号'].astype(str)
In [7]:
total_data = total_data.merge(unit_data[['企业名称', '机组编号', '核心设备类型', '汽轮机类型', '冷却方式']], how='left', on=['企业名称', '机组编号'])
In [8]:
total_data
Out[8]:
企业名称 | 机组编号 | 铭牌容量 (MW) | 机组类型 | 参数分类 | 冷凝器型式 | 入炉煤低位热值(kJ/kg) | 燃煤挥发份Var(%) | 燃煤灰份Aar(%) | 煤种 | 所处地区 | longitude | latitude | altitude | 发电碳排放因子(kg/kWh) | 供热碳排放因子(kg/MJ) | 核心设备类型 | 汽轮机类型 | 冷却方式 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 江苏利港电力有限公司 | 1 | 350.0 | 凝气式 | 亚临界 | 水冷-开式循环 | 21602.05000 | 26.09 | 16.80 | 烟煤 | 江苏省 | 120.096620 | 31.942361 | 1 | 0.586990 | 0.076843 | 煤粉锅炉 | 凝气式 | 水冷-开式循环 |
1 | 江苏利港电力有限公司 | 1 | 350.0 | 凝气式 | 亚临界 | 水冷-开式循环 | 21926.81000 | 26.68 | 15.41 | 烟煤 | 江苏省 | 120.096620 | 31.942361 | 1 | 0.632859 | 0.077676 | 煤粉锅炉 | 凝气式 | 水冷-开式循环 |
2 | 江苏利港电力有限公司 | 1 | 350.0 | 凝气式 | 亚临界 | 水冷-开式循环 | 21261.93062 | 26.46 | 15.18 | 烟煤 | 江苏省 | 120.096620 | 31.942361 | 1 | 0.609196 | 0.074823 | 煤粉锅炉 | 凝气式 | 水冷-开式循环 |
3 | 江苏利港电力有限公司 | 1 | 350.0 | 凝气式 | 亚临界 | 水冷-开式循环 | 20840.00000 | 26.43 | 14.55 | 烟煤 | 江苏省 | 120.096620 | 31.942361 | 1 | 0.602178 | 0.081628 | 煤粉锅炉 | 凝气式 | 水冷-开式循环 |
4 | 江苏利港电力有限公司 | 1 | 350.0 | 凝气式 | 亚临界 | 水冷-开式循环 | 20706.00000 | 26.43 | 14.96 | 烟煤 | 江苏省 | 120.096620 | 31.942361 | 1 | 0.590254 | 0.081103 | 煤粉锅炉 | 凝气式 | 水冷-开式循环 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5689 | 浙江浙能电力股份有限公司台州发电厂 | 8 | 350.0 | 凝气式 | 亚临界 | 水冷-开式循环 | 21973.00000 | 37.43 | 17.12 | 烟煤 | 浙江省 | 121.465840 | 28.704623 | 73 | 0.628300 | 0.078776 | 煤粉锅炉 | 凝气式 | 水冷-开式循环 |
5690 | 浙江浙能电力股份有限公司台州发电厂 | 8 | 350.0 | 凝气式 | 亚临界 | 水冷-开式循环 | 21372.00000 | 39.87 | 18.01 | 烟煤 | 浙江省 | 121.465840 | 28.704623 | 73 | 0.595019 | 0.076622 | 煤粉锅炉 | 凝气式 | 水冷-开式循环 |
5691 | 浙江浙能电力股份有限公司台州发电厂 | 8 | 350.0 | 凝气式 | 亚临界 | 水冷-开式循环 | 20856.00000 | 39.32 | 19.74 | 烟煤 | 浙江省 | 121.465840 | 28.704623 | 73 | 0.565718 | 0.074772 | 煤粉锅炉 | 凝气式 | 水冷-开式循环 |
5692 | 榆能榆神热电有限公司 | 1 | 350.0 | 抽凝式 | 超临界 | 间接空冷 | 25514.00000 | 38.84 | 7.28 | 烟煤 | 陕西省 | 109.820265 | 38.304383 | 1151 | 0.664456 | 0.091482 | 煤粉锅炉 | 抽凝式 | 空冷-间接空冷 |
5693 | 榆能榆神热电有限公司 | 2 | 350.0 | 抽凝式 | 超临界 | 间接空冷 | 25514.00000 | 38.84 | 7.28 | 烟煤 | 陕西省 | 109.820265 | 38.304383 | 1151 | 0.661759 | 0.091483 | 煤粉锅炉 | 抽凝式 | 空冷-间接空冷 |
5694 rows × 19 columns
In [9]:
na_boiler_df = total_data[total_data['核心设备类型'].isna()].drop(columns=['核心设备类型', '汽轮机类型', '冷却方式']) boiler_df = total_data[~total_data['核心设备类型'].isna()].copy() na_boiler = total_data[total_data['核心设备类型'].isna()]['企业名称'].unique()
In [10]:
na_boiler_df = na_boiler_df.merge(unit_data[['企业名称', '核心设备类型']], how='left', on=['企业名称'])
In [11]:
total_data = pd.concat([boiler_df, na_boiler_df], axis=0).drop_duplicates()
In [12]:
total_data['核心设备类型'].value_counts()
Out[12]:
煤粉锅炉 5428 W火焰炉 151 循环流化床锅炉 4 Name: 核心设备类型, dtype: int64
In [13]:
num_cols = ['铭牌容量 (MW)', '入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', 'longitude', 'latitude', 'altitude', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'] # object_cols = ['所处地区', '类型', '机组参数', '冷却型式'] # object_cols = ['所处地区', '汽轮机类型', '参数分类', '冷凝器型式', '核心设备类型'] object_cols = ['所处地区', '机组类型', '参数分类', '冷凝器型式']
In [14]:
def change_str(x): if pd.isna(x): return x if '空冷' in x: return '空冷' if '水冷' in x: return '水冷' return x
In [15]:
# total_data['冷凝器型式'] = total_data['冷凝器型式'].apply(change_str)
In [16]:
total_data = total_data[total_data['发电碳排放因子(kg/kWh)'] <= 0.9].copy()
In [17]:
use_cols = object_cols + [x for x in num_cols if '因子' not in x] use_cols
Out[17]:
['所处地区', '机组类型', '参数分类', '冷凝器型式', '铭牌容量 (MW)', '入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', 'longitude', 'latitude', 'altitude']
In [18]:
total_data = total_data[~total_data['供热碳排放因子(kg/MJ)'].isna()].copy()
In [19]:
total_data.shape
Out[19]:
(5685, 19)
In [20]:
total_data.groupby(['企业名称', '机组编号']).count().shape
Out[20]:
(1060, 17)
In [21]:
total_data['入炉煤低位热值(kJ/kg)'] = total_data['入炉煤低位热值(kJ/kg)'].apply(lambda x: x * 1000 if x < 100 else x * 1) total_data['燃煤灰份Aar(%)'] = total_data['燃煤灰份Aar(%)'].apply(lambda x: x / 1000 if x > 10000 else x * 1) total_data['燃煤挥发份Var(%)'] = total_data['燃煤挥发份Var(%)'].apply(lambda x: x / 1000 if x > 10000 else x * 1)
In [22]:
total_data.altitude = total_data.altitude.apply(lambda x: 0 if x < 0 else x)
In [23]:
use_data = total_data[(total_data['供热碳排放因子(kg/MJ)'] > 0.01)&(total_data['供热碳排放因子(kg/MJ)'] < 0.1)].dropna() use_data.shape
Out[23]:
(5041, 19)
In [24]:
import seaborn as sns
In [25]:
use_data['供热碳排放因子(kg/MJ)'].describe()
Out[25]:
count 5041.000000 mean 0.070824 std 0.009937 min 0.010464 25% 0.065431 50% 0.071466 75% 0.077387 max 0.099905 Name: 供热碳排放因子(kg/MJ), dtype: float64
In [26]:
train_data = use_data.groupby(use_cols)['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'].mean().reset_index()
D:\miniconda3\envs\py37\lib\site-packages\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead. """Entry point for launching an IPython kernel.
In [27]:
train_data
Out[27]:
所处地区 | 机组类型 | 参数分类 | 冷凝器型式 | 铭牌容量 (MW) | 入炉煤低位热值(kJ/kg) | 燃煤挥发份Var(%) | 燃煤灰份Aar(%) | longitude | latitude | altitude | 发电碳排放因子(kg/kWh) | 供热碳排放因子(kg/MJ) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 300.0 | 20209.00 | 25.94 | 15.34 | 121.471140 | 31.065113 | 3 | 0.623923 | 0.078064 |
1 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 300.0 | 20785.00 | 25.97 | 17.03 | 121.471140 | 31.065113 | 3 | 0.639474 | 0.079308 |
2 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 300.0 | 20796.00 | 26.00 | 13.00 | 121.471140 | 31.065113 | 3 | 0.635351 | 0.078691 |
3 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 300.0 | 21762.00 | 27.01 | 13.35 | 121.471140 | 31.065113 | 3 | 0.674456 | 0.085853 |
4 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 320.0 | 15829.32 | 30.85 | 4.77 | 121.601480 | 31.358794 | 2 | 0.506816 | 0.060934 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
3789 | 黑龙江省 | 纯凝式 | 超高压 | 水冷 | 200.0 | 15941.21 | 23.83 | 14.73 | 126.575647 | 45.918566 | 118 | 0.500172 | 0.064200 |
3790 | 黑龙江省 | 纯凝式 | 超高压 | 水冷 | 210.0 | 15355.00 | 42.00 | 36.70 | 131.695864 | 46.580444 | 91 | 0.518301 | 0.063249 |
3791 | 黑龙江省 | 背压式 | 超高压 | 水冷-开式循环 | 200.0 | 13396.00 | 23.39 | 15.66 | 123.639146 | 47.210696 | 151 | 0.224312 | 0.053770 |
3792 | 黑龙江省 | 背压式 | 超高压 | 水冷-闭式循环 | 215.0 | 15753.00 | 36.29 | 42.40 | 129.604803 | 44.608202 | 250 | 0.290814 | 0.068027 |
3793 | 黑龙江省 | 背压式 | 超高压 | 水冷-闭式循环 | 215.0 | 16471.11 | 30.10 | 38.67 | 129.604803 | 44.608202 | 250 | 0.321635 | 0.067798 |
3794 rows × 13 columns
In [28]:
for col in num_cols: if '因子' not in col: train_data[col] = np.log1p(train_data[col])
In [29]:
train_data = train_data[train_data['供热碳排放因子(kg/MJ)']<=0.1].copy()
In [30]:
train_data = pd.get_dummies(train_data, columns=object_cols).dropna()
In [31]:
for col in train_data.columns: train_data[col] = train_data[col].astype(float)
In [32]:
feature_cols = [x for x in train_data.columns if '因子' not in x and '其他' not in x] target_cols = [x for x in train_data.columns if '因子' in x]
In [33]:
train_data.to_csv('./train_data_processed.csv', encoding='utf-8-sig', index=False)
In [34]:
train, test = train_test_split(train_data.dropna(), test_size=0.1, shuffle=True, random_state=42) train, valid = train_test_split(train, test_size=0.1, shuffle=True, random_state=42)
In [35]:
train_X, train_y = train[feature_cols], train[target_cols] valid_X, valid_y = valid[feature_cols], valid[target_cols] test_X, test_y = test[feature_cols], test[target_cols]
In [36]:
from sklearn.model_selection import cross_val_score from xgboost import XGBRegressor from bayes_opt import BayesianOptimization
供电建模¶
In [37]:
params_xgb = {'objective': 'reg:squarederror', 'booster': 'gbtree', 'eta': 0.01, 'max_depth': 60, 'subsample': 0.8, 'colsample_bytree': 0.9, 'min_child_weight': 60, 'seed': 42} num_boost_round = 2000
In [38]:
dtrain = xgb.DMatrix(train_X, train_y.values[:, 0]) dvalid = xgb.DMatrix(valid_X, valid_y.values[:, 0]) watchlist = [(dtrain, 'train'), (dvalid, 'eval')] gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist, early_stopping_rounds=200, verbose_eval=False)
In [39]:
y_pred_xgb = gb_model.predict(xgb.DMatrix(test_X)) y_true_xgb = test_y.values[:, 0]
In [40]:
MSE = mean_squared_error(y_true_xgb, y_pred_xgb) RMSE = np.sqrt(mean_squared_error(y_true_xgb, y_pred_xgb)) MAE = mean_absolute_error(y_true_xgb, y_pred_xgb) MAPE = mean_absolute_percentage_error(y_true_xgb, y_pred_xgb) R_2 = r2_score(y_true_xgb, y_pred_xgb) print('MSE:', format(MSE, '.1E')) print('RMSE:', round(RMSE, 4)) print('MAE:', round(MAE, 4)) print('MAPE:', round(MAPE*100, 2), '%') print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差a
MSE: 9.9E-04 RMSE: 0.0315 MAE: 0.0146 MAPE: 4.39 % R_2: 0.83
In [41]:
from sklearn.model_selection import KFold
In [42]:
kf = KFold(n_splits=10, shuffle=True, random_state=666)
In [43]:
eva_list = list() for (train_index, test_index) in kf.split(train_data): train = train_data.loc[train_index] test = train_data.loc[test_index] train, valid = train_test_split(train, test_size=0.11, random_state=666) X_train, Y_train = train[feature_cols], train['发电碳排放因子(kg/kWh)'] X_valid, Y_valid = valid[feature_cols], valid['发电碳排放因子(kg/kWh)'] X_test, Y_test = valid[feature_cols], valid['发电碳排放因子(kg/kWh)'] dtrain = xgb.DMatrix(X_train, Y_train) dvalid = xgb.DMatrix(X_valid, Y_valid) watchlist = [(dvalid, 'eval')] gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist, early_stopping_rounds=100, verbose_eval=False) y_pred = gb_model.predict(xgb.DMatrix(X_test)) y_true = Y_test.values MSE = mean_squared_error(y_true, y_pred) RMSE = np.sqrt(mean_squared_error(y_true, y_pred)) MAE = mean_absolute_error(y_true, y_pred) MAPE = mean_absolute_percentage_error(y_true, y_pred) R_2 = r2_score(y_true, y_pred) print('MSE:', format(MSE, '.1E'), end=', ') print('RMSE:', round(RMSE, 4), end=', ') print('MAE:', round(MAE, 4), end=', ') print('MAPE:', round(MAPE*100, 2), '%', end=', ') print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差 eva_list.append([MSE, RMSE, MAE, MAPE, R_2]) if R_2 > 0.94: break
MSE: 3.5E-04, RMSE: 0.0188, MAE: 0.0126, MAPE: 2.6 %, R_2: 0.9346 MSE: 9.5E-04, RMSE: 0.0308, MAE: 0.0142, MAPE: 4.28 %, R_2: 0.8446 MSE: 9.9E-04, RMSE: 0.0314, MAE: 0.0139, MAPE: 4.29 %, R_2: 0.8507 MSE: 5.0E-04, RMSE: 0.0225, MAE: 0.0126, MAPE: 2.53 %, R_2: 0.9118 MSE: 9.9E-04, RMSE: 0.0314, MAE: 0.0143, MAPE: 4.45 %, R_2: 0.8383 MSE: 3.6E-04, RMSE: 0.0191, MAE: 0.0127, MAPE: 2.57 %, R_2: 0.9298 MSE: 5.3E-04, RMSE: 0.023, MAE: 0.0143, MAPE: 3.13 %, R_2: 0.9112 MSE: 5.1E-04, RMSE: 0.0226, MAE: 0.0138, MAPE: 2.84 %, R_2: 0.9092 MSE: 3.5E-04, RMSE: 0.0187, MAE: 0.0128, MAPE: 2.63 %, R_2: 0.9371 MSE: 1.3E-03, RMSE: 0.0361, MAE: 0.015, MAPE: 6.76 %, R_2: 0.8045
In [44]:
test_X['power_pred'] = y_pred_xgb test_X['power_real'] = y_true_xgb test_X['error_rate'] = abs(test_X.power_pred - test_X.power_real) / test_X.power_real
In [45]:
X_test['power_pred'] = y_pred X_test['power_real'] = y_true X_test['error_rate'] = abs(X_test.power_pred - X_test.power_real) / X_test.power_real
D:\miniconda3\envs\py37\lib\site-packages\ipykernel_launcher.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy """Entry point for launching an IPython kernel. D:\miniconda3\envs\py37\lib\site-packages\ipykernel_launcher.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
In [46]:
test_data = X_test.copy()
In [47]:
for col in num_cols: if '因子' not in col: test_data[col] = np.expm1(test_data[col])
In [48]:
test_data[test_data.error_rate > 0.05].sort_values(by='error_rate')
Out[48]:
铭牌容量 (MW) | 入炉煤低位热值(kJ/kg) | 燃煤挥发份Var(%) | 燃煤灰份Aar(%) | longitude | latitude | altitude | 所处地区_上海市 | 所处地区_云南省 | 所处地区_内蒙古 | ... | 参数分类_高压 | 冷凝器型式_水冷 | 冷凝器型式_水冷-开式循环 | 冷凝器型式_水冷-闭式循环 | 冷凝器型式_直接空冷 | 冷凝器型式_空冷 | 冷凝器型式_间接空冷 | power_pred | power_real | error_rate | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2132 | 300.0 | 17602.00 | 23.95 | 32.830 | 118.211355 | 39.655509 | 26.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.438344 | 0.461568 | 0.050316 |
2424 | 350.0 | 15525.82 | 19.72 | 25.540 | 118.075445 | 40.812210 | 338.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.437482 | 0.460870 | 0.050747 |
1626 | 330.0 | 17997.00 | 33.00 | 35.000 | 82.892729 | 41.741365 | 1066.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.509937 | 0.537416 | 0.051130 |
319 | 350.0 | 14187.00 | 25.53 | 16.710 | 125.579363 | 43.657507 | 208.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.362236 | 0.344575 | 0.051256 |
891 | 350.0 | 19279.92 | 35.49 | 27.340 | 115.784650 | 36.881948 | 33.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.478009 | 0.454326 | 0.052129 |
2234 | 350.0 | 21697.63 | 26.43 | 12.710 | 116.860260 | 39.953617 | 27.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.579227 | 0.549844 | 0.053440 |
2304 | 300.0 | 18611.00 | 15.26 | 29.750 | 115.497149 | 38.802049 | 17.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.488333 | 0.515910 | 0.053453 |
420 | 350.0 | 14445.00 | 45.39 | 20.600 | 125.162487 | 43.784873 | 222.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.376880 | 0.357277 | 0.054869 |
1039 | 330.0 | 21095.00 | 25.80 | 20.410 | 119.276289 | 36.668747 | 75.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.562676 | 0.595438 | 0.055023 |
3090 | 165.0 | 18990.00 | 33.58 | 23.540 | 103.624731 | 36.134735 | 1545.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.516500 | 0.489478 | 0.055205 |
485 | 300.0 | 20068.81 | 25.65 | 15.040 | 117.059925 | 39.157647 | 8.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.515064 | 0.487882 | 0.055715 |
3714 | 600.0 | 16567.18 | 34.02 | 38.640 | 131.063724 | 45.766399 | 207.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.489738 | 0.518801 | 0.056019 |
2076 | 350.0 | 20141.10 | 42.64 | 15.990 | 115.113369 | 25.926232 | 102.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.536181 | 0.568241 | 0.056419 |
3304 | 600.0 | 12950.00 | 48.22 | 25.040 | 124.150700 | 42.540258 | 103.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.380260 | 0.359795 | 0.056878 |
3744 | 350.0 | 15235.27 | 23.23 | 25.780 | 128.768082 | 47.746953 | 240.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.413687 | 0.389536 | 0.061999 |
2572 | 300.0 | 19965.73 | 10.79 | 29.130 | 114.283788 | 36.128262 | 83.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.484924 | 0.455817 | 0.063858 |
3423 | 600.0 | 17981.89 | 26.88 | 17.700 | 122.123524 | 40.311935 | 2.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.464855 | 0.436654 | 0.064586 |
3770 | 600.0 | 15941.38 | 23.83 | 13.750 | 126.575647 | 45.918566 | 118.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.444540 | 0.475513 | 0.065135 |
1422 | 330.0 | 18283.00 | 13.23 | 34.680 | 112.761299 | 37.634620 | 849.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.478954 | 0.513112 | 0.066570 |
3660 | 300.0 | 16031.00 | 21.96 | 39.790 | 130.397051 | 46.805507 | 80.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.420857 | 0.394264 | 0.067449 |
2439 | 600.0 | 19736.25 | 9.21 | 29.210 | 114.437782 | 38.038867 | 76.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.552083 | 0.517015 | 0.067829 |
3365 | 300.0 | 13306.62 | 23.55 | 23.090 | 121.228525 | 41.143879 | 44.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.365339 | 0.392430 | 0.069034 |
1211 | 140.0 | 20919.00 | 19.29 | 26.120 | 115.920941 | 36.466442 | 30.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.525372 | 0.565201 | 0.070470 |
2576 | 300.0 | 21052.00 | 10.74 | 31.360 | 114.283788 | 36.128262 | 83.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.535750 | 0.576367 | 0.070472 |
3395 | 350.0 | 13278.00 | 47.67 | 20.720 | 123.821910 | 41.354877 | 147.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.362074 | 0.338230 | 0.070499 |
1732 | 200.0 | 17620.64 | 19.25 | 14.380 | 87.660577 | 43.750058 | 1010.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.466749 | 0.435941 | 0.070672 |
1244 | 150.0 | 20030.49 | 26.13 | 27.880 | 117.142424 | 35.075862 | 65.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.519800 | 0.559412 | 0.070810 |
1227 | 145.0 | 19292.66 | 19.44 | 26.570 | 117.421027 | 36.738368 | 57.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.471088 | 0.439672 | 0.071453 |
1213 | 140.0 | 21160.00 | 24.26 | 20.310 | 118.335347 | 35.017881 | 65.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.584875 | 0.630051 | 0.071702 |
2224 | 350.0 | 21061.75 | 26.06 | 14.167 | 116.860260 | 39.953617 | 27.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.547243 | 0.510040 | 0.072941 |
3381 | 300.0 | 14582.00 | 28.34 | 23.200 | 124.330739 | 40.115662 | 135.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.378266 | 0.410316 | 0.078112 |
1195 | 140.0 | 19731.00 | 22.23 | 26.550 | 118.335347 | 35.017881 | 65.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.518649 | 0.480565 | 0.079248 |
2366 | 330.0 | 20310.24 | 15.53 | 32.640 | 114.703987 | 38.014364 | 57.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.505584 | 0.551764 | 0.083696 |
1453 | 220.0 | 20434.00 | 26.70 | 27.410 | 113.232289 | 40.067556 | 1061.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.536567 | 0.493686 | 0.086859 |
1624 | 330.0 | 17470.00 | 36.17 | 27.620 | 82.892729 | 41.741365 | 1066.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.474544 | 0.525190 | 0.096434 |
3383 | 300.0 | 15736.00 | 43.85 | 37.540 | 123.817380 | 42.347201 | 98.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.428344 | 0.475805 | 0.099748 |
1128 | 350.0 | 20403.00 | 36.51 | 25.690 | 117.149304 | 36.084927 | 123.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.484347 | 0.541039 | 0.104785 |
2636 | 350.0 | 18193.00 | 16.96 | 31.720 | 112.409429 | 34.584441 | 160.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.465685 | 0.420098 | 0.108516 |
2992 | 350.0 | 23253.68 | 23.72 | 18.450 | 113.672684 | 30.918494 | 34.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.590282 | 0.663618 | 0.110509 |
881 | 165.0 | 20822.00 | 39.57 | 24.600 | 118.128354 | 37.694642 | 8.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.561782 | 0.633226 | 0.112826 |
2644 | 350.0 | 19871.76 | 21.06 | 30.780 | 113.875986 | 36.116424 | 294.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.521570 | 0.467694 | 0.115195 |
2627 | 330.0 | 20682.00 | 11.52 | 28.850 | 113.866062 | 35.248375 | 72.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.563966 | 0.637925 | 0.115937 |
3666 | 330.0 | 14813.00 | 43.74 | 14.460 | 124.613843 | 46.144809 | 154.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.403054 | 0.456362 | 0.116811 |
2215 | 315.0 | 21691.59 | 23.80 | 11.900 | 116.860260 | 39.953617 | 27.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.549092 | 0.490824 | 0.118715 |
1717 | 350.0 | 22600.03 | 28.67 | 18.680 | 76.054876 | 39.484097 | 1288.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.628243 | 0.560538 | 0.120785 |
2686 | 660.0 | 17624.15 | 31.79 | 32.040 | 115.270887 | 35.775540 | 48.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.428897 | 0.381988 | 0.122801 |
3228 | 300.0 | 13075.00 | 28.45 | 27.010 | 123.943182 | 41.899725 | 124.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.356203 | 0.317093 | 0.123338 |
2311 | 300.0 | 19779.00 | 17.49 | 31.090 | 114.525863 | 37.959933 | 63.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.505898 | 0.449974 | 0.124284 |
1064 | 330.0 | 22054.00 | 20.08 | 20.800 | 117.103149 | 36.718761 | 27.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.549820 | 0.628280 | 0.124881 |
1710 | 350.0 | 20519.75 | 24.23 | 12.710 | 76.054876 | 39.484097 | 1288.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.516507 | 0.434781 | 0.187970 |
1651 | 330.0 | 17913.00 | 35.08 | 22.070 | 87.703630 | 43.909559 | 724.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.471082 | 0.395266 | 0.191812 |
3391 | 300.0 | 13874.00 | 24.01 | 20.430 | 121.228525 | 41.143879 | 44.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.378657 | 0.470581 | 0.195341 |
3793 | 215.0 | 16471.11 | 30.10 | 38.670 | 129.604803 | 44.608202 | 250.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.428770 | 0.321635 | 0.333093 |
1022 | 330.0 | 20634.77 | 24.66 | 25.390 | 119.276289 | 36.668747 | 75.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.530251 | 0.073112 | 6.252559 |
343 | 670.0 | 14109.00 | 45.20 | 13.640 | 125.941747 | 44.106509 | 199.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.398257 | 0.038802 | 9.263788 |
55 rows × 66 columns
In [49]:
test_data.shape
Out[49]:
(376, 66)
In [50]:
power_eva_df = pd.DataFrame.from_records([y_true_xgb, y_pred_xgb]).T power_eva_df.to_csv('./发电测试结果.csv', index=False, encoding='utf-8-sig')
In [51]:
gb_model.save_model('./models/power_model.txt')
发热建模¶
In [52]:
def xgb_cv(max_depth, learning_rate, min_child_weight, subsample, colsample_bytree, reg_alpha, gamma): val = cross_val_score(estimator=XGBRegressor(max_depth=int(max_depth), learning_rate=learning_rate, n_estimators=2000, min_child_weight=min_child_weight, subsample=max(min(subsample, 1), 0), colsample_bytree=max(min(colsample_bytree, 1), 0), reg_alpha=max(reg_alpha, 0), gamma=gamma, objective='reg:squarederror', booster='gbtree', seed=10), X=train[feature_cols], y=train['供热碳排放因子(kg/MJ)'], scoring='r2', cv=10).max() return val
In [53]:
params_xgb = {'objective': 'reg:squarederror', 'booster': 'gbtree', 'eta': 0.005, 'max_depth': 60, 'subsample': 0.5, 'colsample_bytree': 0.9, 'min_child_weight': 30, 'seed': 666} num_boost_round = 2000
In [54]:
eva_list = list() for (train_index, test_index) in kf.split(train_data): train = train_data.loc[train_index] test = train_data.loc[test_index] train, valid = train_test_split(train, test_size=0.11, random_state=42) X_train, Y_train = train[feature_cols], train['供热碳排放因子(kg/MJ)'] X_valid, Y_valid = valid[feature_cols], valid['供热碳排放因子(kg/MJ)'] X_test, Y_test = valid[feature_cols], valid['供热碳排放因子(kg/MJ)'] dtrain = xgb.DMatrix(X_train, Y_train) dvalid = xgb.DMatrix(X_valid, Y_valid) watchlist = [(dvalid, 'eval')] gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist, early_stopping_rounds=100, verbose_eval=False) y_pred = gb_model.predict(xgb.DMatrix(X_test)) y_true = Y_test.values MSE = mean_squared_error(y_true, y_pred) RMSE = np.sqrt(mean_squared_error(y_true, y_pred)) MAE = mean_absolute_error(y_true, y_pred) MAPE = mean_absolute_percentage_error(y_true, y_pred) R_2 = r2_score(y_true, y_pred) print('MSE:', format(MSE, '.1E'), end=', ') print('RMSE:', round(RMSE, 4), end=', ') print('MAE:', round(MAE, 4), end=', ') print('MAPE:', round(MAPE*100, 3), '%', end=', ') print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差 eva_list.append([MSE, RMSE, MAE, MAPE, R_2])
MSE: 2.7E-05, RMSE: 0.0052, MAE: 0.0025, MAPE: 4.651 %, R_2: 0.7287 MSE: 2.8E-05, RMSE: 0.0053, MAE: 0.0028, MAPE: 4.129 %, R_2: 0.7189 MSE: 2.0E-05, RMSE: 0.0045, MAE: 0.0026, MAPE: 3.629 %, R_2: 0.7839 MSE: 1.6E-05, RMSE: 0.004, MAE: 0.0025, MAPE: 3.73 %, R_2: 0.8376 MSE: 3.0E-05, RMSE: 0.0054, MAE: 0.0029, MAPE: 5.181 %, R_2: 0.7219 MSE: 1.9E-05, RMSE: 0.0044, MAE: 0.0025, MAPE: 3.849 %, R_2: 0.8013 MSE: 3.1E-05, RMSE: 0.0056, MAE: 0.0028, MAPE: 5.661 %, R_2: 0.7182 MSE: 2.3E-05, RMSE: 0.0048, MAE: 0.0026, MAPE: 4.386 %, R_2: 0.7888 MSE: 2.0E-05, RMSE: 0.0045, MAE: 0.0024, MAPE: 3.456 %, R_2: 0.8005 MSE: 2.3E-05, RMSE: 0.0048, MAE: 0.0027, MAPE: 3.897 %, R_2: 0.7742
In [55]:
dtrain = xgb.DMatrix(train_X, train_y.values[:, 1]) dvalid = xgb.DMatrix(valid_X, valid_y.values[:, 1]) watchlist = [(dtrain, 'train'), (dvalid, 'eval')] gb_model_heat = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist, early_stopping_rounds=200, verbose_eval=False)
In [56]:
y_pred_heat = gb_model_heat.predict(xgb.DMatrix(test_X[feature_cols])) y_true_heat = test_y.values[:, 1]
In [57]:
MSE = mean_squared_error(y_true_heat, y_pred_heat) RMSE = np.sqrt(mean_squared_error(y_true_heat, y_pred_heat)) MAE = mean_absolute_error(y_true_heat, y_pred_heat) MAPE = mean_absolute_percentage_error(y_true_heat, y_pred_heat) R_2 = r2_score(y_true_heat, y_pred_heat) print('MSE:', format(MSE, '.1E')) print('RMSE:', round(RMSE, 4)) print('MAE:', round(MAE, 4)) print('MAPE:', round(MAPE*100, 2), '%') print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差a
MSE: 1.7E-05 RMSE: 0.0041 MAE: 0.0024 MAPE: 3.61 % R_2: 0.8188
In [58]:
pd.DataFrame.from_records([y_true_heat, y_pred_heat]).T.to_csv('./供热测试结果.csv', index=False, encoding='utf-8-sig')
In [59]:
gb_model_heat.save_model('./models/heat_model.txt')
煤种标准化工程¶
In [60]:
new_values = use_data.groupby(['煤种', '入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)'])['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'].mean()
D:\miniconda3\envs\py37\lib\site-packages\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead. """Entry point for launching an IPython kernel.
In [61]:
new_values
Out[61]:
发电碳排放因子(kg/kWh) | 供热碳排放因子(kg/MJ) | ||||
---|---|---|---|---|---|
煤种 | 入炉煤低位热值(kJ/kg) | 燃煤挥发份Var(%) | 燃煤灰份Aar(%) | ||
无烟煤 | 19827.00 | 11.18 | 2539.00 | 0.561424 | 0.087794 |
烟煤 | 16733.00 | 22.53 | 27.46 | 0.441511 | 0.064259 |
16740.00 | 18.99 | 37.00 | 0.487225 | 0.064535 | |
27.93 | 24.43 | 0.418457 | 0.064747 | ||
16741.00 | 26.69 | 25.92 | 0.433679 | 0.061822 | |
... | ... | ... | ... | ... | ... |
贫煤 | 21938.00 | 13.40 | 22.58 | 0.615856 | 0.099905 |
22042.72 | 12.96 | 25.69 | 0.636563 | 0.079468 | |
22149.00 | 12.43 | 25.10 | 0.629733 | 0.082772 | |
22272.51 | 11.83 | 22.97 | 0.627877 | 0.083234 | |
22475.97 | 8.90 | 23.98 | 0.620331 | 0.086574 |
3579 rows × 2 columns
In [62]:
coal_df = new_values.reset_index().drop(columns=['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)']) coal_df
Out[62]:
煤种 | 入炉煤低位热值(kJ/kg) | 燃煤挥发份Var(%) | 燃煤灰份Aar(%) | |
---|---|---|---|---|
0 | 无烟煤 | 19827.00 | 11.18 | 2539.00 |
1 | 烟煤 | 16733.00 | 22.53 | 27.46 |
2 | 烟煤 | 16740.00 | 18.99 | 37.00 |
3 | 烟煤 | 16740.00 | 27.93 | 24.43 |
4 | 烟煤 | 16741.00 | 26.69 | 25.92 |
... | ... | ... | ... | ... |
3574 | 贫煤 | 21938.00 | 13.40 | 22.58 |
3575 | 贫煤 | 22042.72 | 12.96 | 25.69 |
3576 | 贫煤 | 22149.00 | 12.43 | 25.10 |
3577 | 贫煤 | 22272.51 | 11.83 | 22.97 |
3578 | 贫煤 | 22475.97 | 8.90 | 23.98 |
3579 rows × 4 columns
In [63]:
coal_params_dict = dict() for coal_type in coal_df['煤种'].unique().tolist(): options = coal_df[coal_df['煤种']==coal_type][['入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)']].values coal_params_dict[coal_type] = options
In [64]:
use_data
Out[64]:
企业名称 | 机组编号 | 铭牌容量 (MW) | 机组类型 | 参数分类 | 冷凝器型式 | 入炉煤低位热值(kJ/kg) | 燃煤挥发份Var(%) | 燃煤灰份Aar(%) | 煤种 | 所处地区 | longitude | latitude | altitude | 发电碳排放因子(kg/kWh) | 供热碳排放因子(kg/MJ) | 核心设备类型 | 汽轮机类型 | 冷却方式 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 江苏利港电力有限公司 | 1 | 350.0 | 凝气式 | 亚临界 | 水冷-开式循环 | 21602.05000 | 26.09 | 16.80 | 烟煤 | 江苏省 | 120.096620 | 31.942361 | 1 | 0.586990 | 0.076843 | 煤粉锅炉 | 凝气式 | 水冷-开式循环 |
1 | 江苏利港电力有限公司 | 1 | 350.0 | 凝气式 | 亚临界 | 水冷-开式循环 | 21926.81000 | 26.68 | 15.41 | 烟煤 | 江苏省 | 120.096620 | 31.942361 | 1 | 0.632859 | 0.077676 | 煤粉锅炉 | 凝气式 | 水冷-开式循环 |
2 | 江苏利港电力有限公司 | 1 | 350.0 | 凝气式 | 亚临界 | 水冷-开式循环 | 21261.93062 | 26.46 | 15.18 | 烟煤 | 江苏省 | 120.096620 | 31.942361 | 1 | 0.609196 | 0.074823 | 煤粉锅炉 | 凝气式 | 水冷-开式循环 |
3 | 江苏利港电力有限公司 | 1 | 350.0 | 凝气式 | 亚临界 | 水冷-开式循环 | 20840.00000 | 26.43 | 14.55 | 烟煤 | 江苏省 | 120.096620 | 31.942361 | 1 | 0.602178 | 0.081628 | 煤粉锅炉 | 凝气式 | 水冷-开式循环 |
4 | 江苏利港电力有限公司 | 1 | 350.0 | 凝气式 | 亚临界 | 水冷-开式循环 | 20706.00000 | 26.43 | 14.96 | 烟煤 | 江苏省 | 120.096620 | 31.942361 | 1 | 0.590254 | 0.081103 | 煤粉锅炉 | 凝气式 | 水冷-开式循环 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
5689 | 浙江浙能电力股份有限公司台州发电厂 | 8 | 350.0 | 凝气式 | 亚临界 | 水冷-开式循环 | 21973.00000 | 37.43 | 17.12 | 烟煤 | 浙江省 | 121.465840 | 28.704623 | 73 | 0.628300 | 0.078776 | 煤粉锅炉 | 凝气式 | 水冷-开式循环 |
5690 | 浙江浙能电力股份有限公司台州发电厂 | 8 | 350.0 | 凝气式 | 亚临界 | 水冷-开式循环 | 21372.00000 | 39.87 | 18.01 | 烟煤 | 浙江省 | 121.465840 | 28.704623 | 73 | 0.595019 | 0.076622 | 煤粉锅炉 | 凝气式 | 水冷-开式循环 |
5691 | 浙江浙能电力股份有限公司台州发电厂 | 8 | 350.0 | 凝气式 | 亚临界 | 水冷-开式循环 | 20856.00000 | 39.32 | 19.74 | 烟煤 | 浙江省 | 121.465840 | 28.704623 | 73 | 0.565718 | 0.074772 | 煤粉锅炉 | 凝气式 | 水冷-开式循环 |
5692 | 榆能榆神热电有限公司 | 1 | 350.0 | 抽凝式 | 超临界 | 间接空冷 | 25514.00000 | 38.84 | 7.28 | 烟煤 | 陕西省 | 109.820265 | 38.304383 | 1151 | 0.664456 | 0.091482 | 煤粉锅炉 | 抽凝式 | 空冷-间接空冷 |
5693 | 榆能榆神热电有限公司 | 2 | 350.0 | 抽凝式 | 超临界 | 间接空冷 | 25514.00000 | 38.84 | 7.28 | 烟煤 | 陕西省 | 109.820265 | 38.304383 | 1151 | 0.661759 | 0.091483 | 煤粉锅炉 | 抽凝式 | 空冷-间接空冷 |
5041 rows × 19 columns
In [65]:
new_use_data = use_data.groupby(use_cols+['煤种'])['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'].mean().reset_index().drop(columns=['入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)']) new_use_data
D:\miniconda3\envs\py37\lib\site-packages\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead. """Entry point for launching an IPython kernel.
Out[65]:
所处地区 | 机组类型 | 参数分类 | 冷凝器型式 | 铭牌容量 (MW) | longitude | latitude | altitude | 煤种 | 发电碳排放因子(kg/kWh) | 供热碳排放因子(kg/MJ) | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 300.0 | 121.471140 | 31.065113 | 3 | 烟煤 | 0.623923 | 0.078064 |
1 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 300.0 | 121.471140 | 31.065113 | 3 | 烟煤 | 0.639474 | 0.079308 |
2 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 300.0 | 121.471140 | 31.065113 | 3 | 烟煤 | 0.635351 | 0.078691 |
3 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 300.0 | 121.471140 | 31.065113 | 3 | 烟煤 | 0.674456 | 0.085853 |
4 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 320.0 | 121.601480 | 31.358794 | 2 | 褐煤 | 0.506816 | 0.060934 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
3789 | 黑龙江省 | 纯凝式 | 超高压 | 水冷 | 200.0 | 126.575647 | 45.918566 | 118 | 褐煤 | 0.500172 | 0.064200 |
3790 | 黑龙江省 | 纯凝式 | 超高压 | 水冷 | 210.0 | 131.695864 | 46.580444 | 91 | 褐煤 | 0.518301 | 0.063249 |
3791 | 黑龙江省 | 背压式 | 超高压 | 水冷-开式循环 | 200.0 | 123.639146 | 47.210696 | 151 | 褐煤 | 0.224312 | 0.053770 |
3792 | 黑龙江省 | 背压式 | 超高压 | 水冷-闭式循环 | 215.0 | 129.604803 | 44.608202 | 250 | 褐煤 | 0.290814 | 0.068027 |
3793 | 黑龙江省 | 背压式 | 超高压 | 水冷-闭式循环 | 215.0 | 129.604803 | 44.608202 | 250 | 褐煤 | 0.321635 | 0.067798 |
3794 rows × 11 columns
In [66]:
new_use_data['coal_params'] = new_use_data['煤种'].apply(lambda x: coal_params_dict.get(x))
In [67]:
new_use_data.drop(columns='煤种', inplace=True)
In [68]:
new_data = new_use_data.explode(column='coal_params')
In [69]:
new_data
Out[69]:
所处地区 | 机组类型 | 参数分类 | 冷凝器型式 | 铭牌容量 (MW) | longitude | latitude | altitude | 发电碳排放因子(kg/kWh) | 供热碳排放因子(kg/MJ) | coal_params | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 300.0 | 121.471140 | 31.065113 | 3 | 0.623923 | 0.078064 | [16733.0, 22.53, 27.46] |
0 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 300.0 | 121.471140 | 31.065113 | 3 | 0.623923 | 0.078064 | [16740.0, 18.99, 37.0] |
0 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 300.0 | 121.471140 | 31.065113 | 3 | 0.623923 | 0.078064 | [16740.0, 27.93, 24.43] |
0 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 300.0 | 121.471140 | 31.065113 | 3 | 0.623923 | 0.078064 | [16741.0, 26.69, 25.92] |
0 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 300.0 | 121.471140 | 31.065113 | 3 | 0.623923 | 0.078064 | [16741.51, 19.51, 35.62] |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
3793 | 黑龙江省 | 背压式 | 超高压 | 水冷-闭式循环 | 215.0 | 129.604803 | 44.608202 | 250 | 0.321635 | 0.067798 | [16723.0, 40.63, 39.94] |
3793 | 黑龙江省 | 背压式 | 超高压 | 水冷-闭式循环 | 215.0 | 129.604803 | 44.608202 | 250 | 0.321635 | 0.067798 | [16725.0, 26.36, 28.51] |
3793 | 黑龙江省 | 背压式 | 超高压 | 水冷-闭式循环 | 215.0 | 129.604803 | 44.608202 | 250 | 0.321635 | 0.067798 | [16725.19, 34.59, 37.71] |
3793 | 黑龙江省 | 背压式 | 超高压 | 水冷-闭式循环 | 215.0 | 129.604803 | 44.608202 | 250 | 0.321635 | 0.067798 | [16725.85, 43.2, 12.0] |
3793 | 黑龙江省 | 背压式 | 超高压 | 水冷-闭式循环 | 215.0 | 129.604803 | 44.608202 | 250 | 0.321635 | 0.067798 | [16729.0, 51.42, 17.33] |
8019537 rows × 11 columns
In [70]:
new_data['入炉煤低位热值(kJ/kg)'] = new_data.coal_params.apply(lambda x: x[0]).values new_data['燃煤挥发份Var(%)'] = new_data.coal_params.apply(lambda x: x[1]).values new_data['燃煤灰份Aar(%)'] = new_data.coal_params.apply(lambda x: x[2]).values
In [71]:
norm_data = new_data.drop(columns='coal_params')
In [72]:
norm_data.head()
Out[72]:
所处地区 | 机组类型 | 参数分类 | 冷凝器型式 | 铭牌容量 (MW) | longitude | latitude | altitude | 发电碳排放因子(kg/kWh) | 供热碳排放因子(kg/MJ) | 入炉煤低位热值(kJ/kg) | 燃煤挥发份Var(%) | 燃煤灰份Aar(%) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 300.0 | 121.47114 | 31.065113 | 3 | 0.623923 | 0.078064 | 16733.00 | 22.53 | 27.46 |
0 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 300.0 | 121.47114 | 31.065113 | 3 | 0.623923 | 0.078064 | 16740.00 | 18.99 | 37.00 |
0 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 300.0 | 121.47114 | 31.065113 | 3 | 0.623923 | 0.078064 | 16740.00 | 27.93 | 24.43 |
0 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 300.0 | 121.47114 | 31.065113 | 3 | 0.623923 | 0.078064 | 16741.00 | 26.69 | 25.92 |
0 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 300.0 | 121.47114 | 31.065113 | 3 | 0.623923 | 0.078064 | 16741.51 | 19.51 | 35.62 |
In [73]:
for col in num_cols: norm_data[col] = np.log1p(norm_data[col]) # total_data[col] = (total_data[col] - total_data[col].min()) / (total_data[col].max() - total_data[col].min()) norm_data_dummpy = pd.get_dummies(norm_data, columns=object_cols)
In [74]:
norm_data_dummpy.drop(columns=['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'], inplace=True)
In [75]:
new_xgb_data = xgb.DMatrix(norm_data_dummpy[feature_cols])
In [76]:
norm_data['power_co2_factor'] = gb_model.predict(new_xgb_data) norm_data['heat_co2_factor'] = gb_model_heat.predict(new_xgb_data)
In [77]:
normaled_data = norm_data.drop(columns=['入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'])
In [78]:
normaled_data
Out[78]:
所处地区 | 机组类型 | 参数分类 | 冷凝器型式 | 铭牌容量 (MW) | longitude | latitude | altitude | power_co2_factor | heat_co2_factor | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 5.707110 | 4.807875 | 3.467769 | 1.386294 | 0.063166 | 0.063012 |
0 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 5.707110 | 4.807875 | 3.467769 | 1.386294 | 0.062317 | 0.062422 |
0 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 5.707110 | 4.807875 | 3.467769 | 1.386294 | 0.062508 | 0.062922 |
0 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 5.707110 | 4.807875 | 3.467769 | 1.386294 | 0.062466 | 0.062950 |
0 | 上海市 | 凝气式 | 亚临界 | 水冷-开式循环 | 5.707110 | 4.807875 | 3.467769 | 1.386294 | 0.062743 | 0.063012 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
3793 | 黑龙江省 | 背压式 | 超高压 | 水冷-闭式循环 | 5.375278 | 4.872176 | 3.820088 | 5.525453 | 0.067768 | 0.068277 |
3793 | 黑龙江省 | 背压式 | 超高压 | 水冷-闭式循环 | 5.375278 | 4.872176 | 3.820088 | 5.525453 | 0.066563 | 0.066854 |
3793 | 黑龙江省 | 背压式 | 超高压 | 水冷-闭式循环 | 5.375278 | 4.872176 | 3.820088 | 5.525453 | 0.068115 | 0.068242 |
3793 | 黑龙江省 | 背压式 | 超高压 | 水冷-闭式循环 | 5.375278 | 4.872176 | 3.820088 | 5.525453 | 0.066680 | 0.066995 |
3793 | 黑龙江省 | 背压式 | 超高压 | 水冷-闭式循环 | 5.375278 | 4.872176 | 3.820088 | 5.525453 | 0.067563 | 0.067869 |
8019537 rows × 10 columns
In [79]:
target_cols = ['power_co2_factor', 'heat_co2_factor']
In [80]:
save_data = normaled_data.groupby([x for x in normaled_data.columns if x not in target_cols])[target_cols].mean()
In [81]:
save_data.reset_index().to_csv('./results/去煤种化数据.csv', encoding='utf-8-sig', index=False)
In [81]: