T85_code/.ipynb_checkpoints/xgboost多任务回归-checkpoint.ipynb

2973 lines
92 KiB
Plaintext
Raw Permalink Normal View History

2023-05-11 14:18:08 +08:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"pycharm": {
"name": "#%%\n"
},
"tags": []
},
"outputs": [],
"source": [
"from sklearn.multioutput import MultiOutputRegressor\n",
"import xgboost as xgb\n",
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score\n",
"from sklearn.model_selection import train_test_split"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>企业名称</th>\n",
" <th>机组编号</th>\n",
" <th>铭牌容量 (MW)</th>\n",
" <th>机组类型</th>\n",
" <th>参数分类</th>\n",
" <th>冷凝器型式</th>\n",
" <th>入炉煤低位热值(kJ/kg)</th>\n",
" <th>燃煤挥发份Var(%)</th>\n",
" <th>燃煤灰份Aar(%)</th>\n",
" <th>煤种</th>\n",
" <th>所处地区</th>\n",
" <th>longitude</th>\n",
" <th>latitude</th>\n",
" <th>altitude</th>\n",
" <th>发电碳排放因子(kg/kWh)</th>\n",
" <th>供热碳排放因子(kg/MJ)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5740</th>\n",
" <td>榆能榆神热电有限公司</td>\n",
" <td>2</td>\n",
" <td>350.0</td>\n",
" <td>抽凝式</td>\n",
" <td>超临界</td>\n",
" <td>间接空冷</td>\n",
" <td>25514.0</td>\n",
" <td>38.84</td>\n",
" <td>7.28</td>\n",
" <td>烟煤</td>\n",
" <td>陕西省</td>\n",
" <td>109.820265</td>\n",
" <td>38.304383</td>\n",
" <td>1151</td>\n",
" <td>0.661759</td>\n",
" <td>0.091483</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 企业名称 机组编号 铭牌容量 (MW) 机组类型 参数分类 冷凝器型式 入炉煤低位热值(kJ/kg) \\\n",
"5740 榆能榆神热电有限公司 2 350.0 抽凝式 超临界 间接空冷 25514.0 \n",
"\n",
" 燃煤挥发份Var(%) 燃煤灰份Aar(%) 煤种 所处地区 longitude latitude altitude \\\n",
"5740 38.84 7.28 烟煤 陕西省 109.820265 38.304383 1151 \n",
"\n",
" 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) \n",
"5740 0.661759 0.091483 "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"total_data = pd.read_excel('train_data.xlsx')\n",
"total_data.tail(1)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['企业名称', '机组编号', '铭牌容量 (MW)', '机组类型', '参数分类', '冷凝器型式', '入炉煤低位热值(kJ/kg)',\n",
" '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', '煤种', '所处地区', 'longitude', 'latitude',\n",
" 'altitude', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'],\n",
" dtype='object')"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"total_data.columns"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"num_cols = ['铭牌容量 (MW)', '入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', 'longitude', 'latitude', 'altitude', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)']\n",
"# object_cols = ['所处地区', '类型', '机组参数', '冷却型式']\n",
"object_cols = ['所处地区', '机组类型', '参数分类', '冷凝器型式']\n",
"# object_cols = ['所处地区', '机组类型', '参数分类', '冷凝器型式', '煤种']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def change_str(x):\n",
" if '空冷' in x:\n",
" return '空冷'\n",
" if '水冷' in x:\n",
" return '水冷'"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"total_data = total_data[total_data['发电碳排放因子(kg/kWh)'] <= 0.9].copy()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"['所处地区',\n",
" '机组类型',\n",
" '参数分类',\n",
" '冷凝器型式',\n",
" '铭牌容量 (MW)',\n",
" '入炉煤低位热值(kJ/kg)',\n",
" '燃煤挥发份Var(%)',\n",
" '燃煤灰份Aar(%)',\n",
" 'longitude',\n",
" 'latitude',\n",
" 'altitude']"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"use_cols = object_cols + [x for x in num_cols if '因子' not in x]\n",
"use_cols"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"total_data = total_data[~total_data['供热碳排放因子(kg/MJ)'].isna()].copy()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(5732, 16)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"total_data.shape"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(1092, 14)"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"total_data.groupby(['企业名称', '机组编号']).count().shape"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"total_data['入炉煤低位热值(kJ/kg)'] = total_data['入炉煤低位热值(kJ/kg)'].apply(lambda x: x * 1000 if x < 100 else x * 1)\n",
"total_data['燃煤灰份Aar(%)'] = total_data['燃煤灰份Aar(%)'].apply(lambda x: x / 1000 if x > 10000 else x * 1)\n",
"total_data['燃煤挥发份Var(%)'] = total_data['燃煤挥发份Var(%)'].apply(lambda x: x / 1000 if x > 10000 else x * 1)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"total_data.altitude = total_data.altitude.apply(lambda x: 0 if x < 0 else x)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"(5629, 16)"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"use_data = total_data[(total_data['供热碳排放因子(kg/MJ)'] > 0.01)&(total_data['供热碳排放因子(kg/MJ)'] < 0.1)].copy()\n",
"use_data.shape"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"import seaborn as sns"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/plain": [
"count 5629.000000\n",
"mean 0.070915\n",
"std 0.009967\n",
"min 0.010464\n",
"25% 0.065467\n",
"50% 0.071533\n",
"75% 0.077513\n",
"max 0.099905\n",
"Name: 供热碳排放因子(kg/MJ), dtype: float64"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"use_data['供热碳排放因子(kg/MJ)'].describe()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
}
],
"source": [
"train_data = use_data.groupby(use_cols)['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'].mean().reset_index()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>所处地区</th>\n",
" <th>机组类型</th>\n",
" <th>参数分类</th>\n",
" <th>冷凝器型式</th>\n",
" <th>铭牌容量 (MW)</th>\n",
" <th>入炉煤低位热值(kJ/kg)</th>\n",
" <th>燃煤挥发份Var(%)</th>\n",
" <th>燃煤灰份Aar(%)</th>\n",
" <th>longitude</th>\n",
" <th>latitude</th>\n",
" <th>altitude</th>\n",
" <th>发电碳排放因子(kg/kWh)</th>\n",
" <th>供热碳排放因子(kg/MJ)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>18366.00</td>\n",
" <td>26.05</td>\n",
" <td>15.70</td>\n",
" <td>121.471140</td>\n",
" <td>31.065113</td>\n",
" <td>3</td>\n",
" <td>0.537574</td>\n",
" <td>0.070992</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>18426.00</td>\n",
" <td>26.05</td>\n",
" <td>15.70</td>\n",
" <td>121.471140</td>\n",
" <td>31.065113</td>\n",
" <td>3</td>\n",
" <td>0.545516</td>\n",
" <td>0.072476</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>19507.00</td>\n",
" <td>26.40</td>\n",
" <td>14.95</td>\n",
" <td>121.471140</td>\n",
" <td>31.065113</td>\n",
" <td>3</td>\n",
" <td>0.595849</td>\n",
" <td>0.064745</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>19599.00</td>\n",
" <td>26.78</td>\n",
" <td>11.58</td>\n",
" <td>121.471140</td>\n",
" <td>31.065113</td>\n",
" <td>3</td>\n",
" <td>0.584432</td>\n",
" <td>0.068390</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>20125.00</td>\n",
" <td>24.92</td>\n",
" <td>14.90</td>\n",
" <td>121.471140</td>\n",
" <td>31.065113</td>\n",
" <td>3</td>\n",
" <td>0.605369</td>\n",
" <td>0.066996</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3961</th>\n",
" <td>黑龙江省</td>\n",
" <td>纯凝式</td>\n",
" <td>超高压</td>\n",
" <td>水冷</td>\n",
" <td>200.0</td>\n",
" <td>15941.21</td>\n",
" <td>23.83</td>\n",
" <td>14.73</td>\n",
" <td>126.575647</td>\n",
" <td>45.918566</td>\n",
" <td>118</td>\n",
" <td>0.500172</td>\n",
" <td>0.064200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3962</th>\n",
" <td>黑龙江省</td>\n",
" <td>纯凝式</td>\n",
" <td>超高压</td>\n",
" <td>水冷</td>\n",
" <td>210.0</td>\n",
" <td>15355.00</td>\n",
" <td>42.00</td>\n",
" <td>36.70</td>\n",
" <td>131.695864</td>\n",
" <td>46.580444</td>\n",
" <td>91</td>\n",
" <td>0.518301</td>\n",
" <td>0.063249</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3963</th>\n",
" <td>黑龙江省</td>\n",
" <td>背压式</td>\n",
" <td>超高压</td>\n",
" <td>水冷-开式循环</td>\n",
" <td>200.0</td>\n",
" <td>13396.00</td>\n",
" <td>23.39</td>\n",
" <td>15.66</td>\n",
" <td>123.639146</td>\n",
" <td>47.210696</td>\n",
" <td>151</td>\n",
" <td>0.224312</td>\n",
" <td>0.053770</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3964</th>\n",
" <td>黑龙江省</td>\n",
" <td>背压式</td>\n",
" <td>超高压</td>\n",
" <td>水冷-闭式循环</td>\n",
" <td>215.0</td>\n",
" <td>15753.00</td>\n",
" <td>36.29</td>\n",
" <td>42.40</td>\n",
" <td>129.604803</td>\n",
" <td>44.608202</td>\n",
" <td>250</td>\n",
" <td>0.290814</td>\n",
" <td>0.068027</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3965</th>\n",
" <td>黑龙江省</td>\n",
" <td>背压式</td>\n",
" <td>超高压</td>\n",
" <td>水冷-闭式循环</td>\n",
" <td>215.0</td>\n",
" <td>16471.11</td>\n",
" <td>30.10</td>\n",
" <td>38.67</td>\n",
" <td>129.604803</td>\n",
" <td>44.608202</td>\n",
" <td>250</td>\n",
" <td>0.321635</td>\n",
" <td>0.067798</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3966 rows × 13 columns</p>\n",
"</div>"
],
"text/plain": [
" 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) \\\n",
"0 上海市 供热式 亚临界 水冷 300.0 18366.00 26.05 \n",
"1 上海市 供热式 亚临界 水冷 300.0 18426.00 26.05 \n",
"2 上海市 供热式 亚临界 水冷 300.0 19507.00 26.40 \n",
"3 上海市 供热式 亚临界 水冷 300.0 19599.00 26.78 \n",
"4 上海市 供热式 亚临界 水冷 300.0 20125.00 24.92 \n",
"... ... ... ... ... ... ... ... \n",
"3961 黑龙江省 纯凝式 超高压 水冷 200.0 15941.21 23.83 \n",
"3962 黑龙江省 纯凝式 超高压 水冷 210.0 15355.00 42.00 \n",
"3963 黑龙江省 背压式 超高压 水冷-开式循环 200.0 13396.00 23.39 \n",
"3964 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 15753.00 36.29 \n",
"3965 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 16471.11 30.10 \n",
"\n",
" 燃煤灰份Aar(%) longitude latitude altitude 发电碳排放因子(kg/kWh) \\\n",
"0 15.70 121.471140 31.065113 3 0.537574 \n",
"1 15.70 121.471140 31.065113 3 0.545516 \n",
"2 14.95 121.471140 31.065113 3 0.595849 \n",
"3 11.58 121.471140 31.065113 3 0.584432 \n",
"4 14.90 121.471140 31.065113 3 0.605369 \n",
"... ... ... ... ... ... \n",
"3961 14.73 126.575647 45.918566 118 0.500172 \n",
"3962 36.70 131.695864 46.580444 91 0.518301 \n",
"3963 15.66 123.639146 47.210696 151 0.224312 \n",
"3964 42.40 129.604803 44.608202 250 0.290814 \n",
"3965 38.67 129.604803 44.608202 250 0.321635 \n",
"\n",
" 供热碳排放因子(kg/MJ) \n",
"0 0.070992 \n",
"1 0.072476 \n",
"2 0.064745 \n",
"3 0.068390 \n",
"4 0.066996 \n",
"... ... \n",
"3961 0.064200 \n",
"3962 0.063249 \n",
"3963 0.053770 \n",
"3964 0.068027 \n",
"3965 0.067798 \n",
"\n",
"[3966 rows x 13 columns]"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_data"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"for col in num_cols:\n",
" if '因子' not in col:\n",
" train_data[col] = np.log1p(train_data[col])"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"train_data = train_data[train_data['供热碳排放因子(kg/MJ)']<=0.1].copy()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"train_data = pd.get_dummies(train_data, columns=object_cols).dropna()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"for col in train_data.columns:\n",
" train_data[col] = train_data[col].astype(float)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"feature_cols = [x for x in train_data.columns if '因子' not in x]\n",
"target_cols = [x for x in train_data.columns if '因子' in x]"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"train_data.to_csv('./train_data_processed.csv', encoding='utf-8-sig', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"train, test = train_test_split(train_data.dropna(), test_size=0.1, shuffle=True, random_state=666)\n",
"train, valid = train_test_split(train, test_size=0.2, shuffle=True, random_state=666)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"train_X, train_y = train[feature_cols], train[target_cols]\n",
"valid_X, valid_y = valid[feature_cols], valid[target_cols]\n",
"test_X, test_y = test[feature_cols], test[target_cols]"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"from sklearn.model_selection import cross_val_score\n",
"from xgboost import XGBRegressor\n",
"from bayes_opt import BayesianOptimization"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"### 供电建模"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"params_xgb = {'objective': 'reg:squarederror',\n",
" 'booster': 'gbtree',\n",
" 'eta': 0.01,\n",
" 'max_depth': 60,\n",
" 'subsample': 0.85,\n",
" 'colsample_bytree': 0.85,\n",
" 'min_child_weight': 10,\n",
" 'seed': 10}\n",
"\n",
"num_boost_round = 2000\n",
"\n",
"dtrain = xgb.DMatrix(train_X, train_y.values[:, 0])\n",
"dvalid = xgb.DMatrix(valid_X, valid_y.values[:, 0])\n",
"watchlist = [(dtrain, 'train'), (dvalid, 'eval')]\n",
"\n",
"gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n",
" early_stopping_rounds=200, verbose_eval=False)\n"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"y_pred_xgb = gb_model.predict(xgb.DMatrix(test_X))\n",
"y_true_xgb = test_y.values[:, 0]"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"MSE: 5.5E-04\n",
"RMSE: 0.0235\n",
"MAE: 0.0145\n",
"MAPE: 2.99 %\n",
"R_2: 0.9011\n"
]
}
],
"source": [
"MSE = mean_squared_error(y_true_xgb, y_pred_xgb)\n",
"RMSE = np.sqrt(mean_squared_error(y_true_xgb, y_pred_xgb))\n",
"MAE = mean_absolute_error(y_true_xgb, y_pred_xgb)\n",
"MAPE = mean_absolute_percentage_error(y_true_xgb, y_pred_xgb)\n",
"R_2 = r2_score(y_true_xgb, y_pred_xgb)\n",
"print('MSE:', format(MSE, '.1E'))\n",
"print('RMSE:', round(RMSE, 4))\n",
"print('MAE:', round(MAE, 4))\n",
"print('MAPE:', round(MAPE*100, 2), '%')\n",
"print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差a"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"power_eva_df = pd.DataFrame.from_records([y_true_xgb, y_pred_xgb]).T\n",
"power_eva_df.to_csv('./发电测试结果.csv', index=False, encoding='utf-8-sig')"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"gb_model.save_model('./models/power_model.txt')"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"### 发热建模"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"def xgb_cv(max_depth, learning_rate, min_child_weight, subsample, colsample_bytree, reg_alpha, gamma):\n",
" val = cross_val_score(estimator=XGBRegressor(max_depth=int(max_depth),\n",
" learning_rate=learning_rate,\n",
" n_estimators=2000,\n",
" min_child_weight=min_child_weight,\n",
" subsample=max(min(subsample, 1), 0),\n",
" colsample_bytree=max(min(colsample_bytree, 1), 0),\n",
" reg_alpha=max(reg_alpha, 0), gamma=gamma, objective='reg:squarederror',\n",
" booster='gbtree',\n",
" seed=10), X=train[feature_cols], y=train['供热碳排放因子(kg/MJ)'], scoring='r2',\n",
" cv=10).max()\n",
" return val"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"params_xgb = {'objective': 'reg:squarederror',\n",
" 'booster': 'gbtree',\n",
" 'eta': 0.01,\n",
" 'max_depth': 30,\n",
" 'subsample': 0.8,\n",
" 'colsample_bytree': 0.9,\n",
" 'min_child_weight': 10,\n",
" 'seed': 108}\n",
"\n",
"num_boost_round = 2000\n",
"\n",
"dtrain = xgb.DMatrix(train_X, train_y.values[:, 1])\n",
"dvalid = xgb.DMatrix(valid_X, valid_y.values[:, 1])\n",
"watchlist = [(dtrain, 'train'), (dvalid, 'eval')]\n",
"\n",
"gb_model_heat = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n",
" early_stopping_rounds=200, verbose_eval=False)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"y_pred_heat = gb_model_heat.predict(xgb.DMatrix(test_X))\n",
"y_true_heat = test_y.values[:, 1]"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"MSE: 2.9E-05\n",
"RMSE: 0.0054\n",
"MAE: 0.0024\n",
"MAPE: 5.19 %\n",
"R_2: 0.7392\n"
]
}
],
"source": [
"MSE = mean_squared_error(y_true_heat, y_pred_heat)\n",
"RMSE = np.sqrt(mean_squared_error(y_true_heat, y_pred_heat))\n",
"MAE = mean_absolute_error(y_true_heat, y_pred_heat)\n",
"MAPE = mean_absolute_percentage_error(y_true_heat, y_pred_heat)\n",
"R_2 = r2_score(y_true_heat, y_pred_heat)\n",
"print('MSE:', format(MSE, '.1E'))\n",
"print('RMSE:', round(RMSE, 4))\n",
"print('MAE:', round(MAE, 4))\n",
"print('MAPE:', round(MAPE*100, 2), '%')\n",
"print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差a"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"pd.DataFrame.from_records([y_true_heat, y_pred_heat]).T.to_csv('./供热测试结果.csv', index=False, encoding='utf-8-sig')"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"gb_model_heat.save_model('./models/heat_model.txt')"
]
},
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"### 煤种标准化工程"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
}
],
"source": [
"new_values = use_data.groupby(['煤种', '入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)'])['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'].mean()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th>发电碳排放因子(kg/kWh)</th>\n",
" <th>供热碳排放因子(kg/MJ)</th>\n",
" </tr>\n",
" <tr>\n",
" <th>煤种</th>\n",
" <th>入炉煤低位热值(kJ/kg)</th>\n",
" <th>燃煤挥发份Var(%)</th>\n",
" <th>燃煤灰份Aar(%)</th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>无烟煤</th>\n",
" <th>19827.00</th>\n",
" <th>11.18</th>\n",
" <th>2539.00</th>\n",
" <td>0.561424</td>\n",
" <td>0.087794</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">烟煤</th>\n",
" <th>16733.00</th>\n",
" <th>22.53</th>\n",
" <th>27.46</th>\n",
" <td>0.441511</td>\n",
" <td>0.064259</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">16740.00</th>\n",
" <th>18.99</th>\n",
" <th>37.00</th>\n",
" <td>0.487225</td>\n",
" <td>0.064535</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27.93</th>\n",
" <th>24.43</th>\n",
" <td>0.418457</td>\n",
" <td>0.064747</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16741.00</th>\n",
" <th>26.69</th>\n",
" <th>25.92</th>\n",
" <td>0.433679</td>\n",
" <td>0.061822</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <th>...</th>\n",
" <th>...</th>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">贫煤</th>\n",
" <th>22149.00</th>\n",
" <th>12.43</th>\n",
" <th>25.10</th>\n",
" <td>0.629733</td>\n",
" <td>0.082772</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22272.51</th>\n",
" <th>11.83</th>\n",
" <th>22.97</th>\n",
" <td>0.627877</td>\n",
" <td>0.083234</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22475.97</th>\n",
" <th>8.90</th>\n",
" <th>23.98</th>\n",
" <td>0.620331</td>\n",
" <td>0.086574</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23215.00</th>\n",
" <th>11.00</th>\n",
" <th>19.31</th>\n",
" <td>0.682221</td>\n",
" <td>0.080249</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23791.00</th>\n",
" <th>11.00</th>\n",
" <th>19.31</th>\n",
" <td>0.701795</td>\n",
" <td>0.082240</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3936 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ)\n",
"煤种 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%) \n",
"无烟煤 19827.00 11.18 2539.00 0.561424 0.087794\n",
"烟煤 16733.00 22.53 27.46 0.441511 0.064259\n",
" 16740.00 18.99 37.00 0.487225 0.064535\n",
" 27.93 24.43 0.418457 0.064747\n",
" 16741.00 26.69 25.92 0.433679 0.061822\n",
"... ... ...\n",
"贫煤 22149.00 12.43 25.10 0.629733 0.082772\n",
" 22272.51 11.83 22.97 0.627877 0.083234\n",
" 22475.97 8.90 23.98 0.620331 0.086574\n",
" 23215.00 11.00 19.31 0.682221 0.080249\n",
" 23791.00 11.00 19.31 0.701795 0.082240\n",
"\n",
"[3936 rows x 2 columns]"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_values"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>煤种</th>\n",
" <th>入炉煤低位热值(kJ/kg)</th>\n",
" <th>燃煤挥发份Var(%)</th>\n",
" <th>燃煤灰份Aar(%)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>无烟煤</td>\n",
" <td>19827.00</td>\n",
" <td>11.18</td>\n",
" <td>2539.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>烟煤</td>\n",
" <td>16733.00</td>\n",
" <td>22.53</td>\n",
" <td>27.46</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>烟煤</td>\n",
" <td>16740.00</td>\n",
" <td>18.99</td>\n",
" <td>37.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>烟煤</td>\n",
" <td>16740.00</td>\n",
" <td>27.93</td>\n",
" <td>24.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>烟煤</td>\n",
" <td>16741.00</td>\n",
" <td>26.69</td>\n",
" <td>25.92</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3931</th>\n",
" <td>贫煤</td>\n",
" <td>22149.00</td>\n",
" <td>12.43</td>\n",
" <td>25.10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3932</th>\n",
" <td>贫煤</td>\n",
" <td>22272.51</td>\n",
" <td>11.83</td>\n",
" <td>22.97</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3933</th>\n",
" <td>贫煤</td>\n",
" <td>22475.97</td>\n",
" <td>8.90</td>\n",
" <td>23.98</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3934</th>\n",
" <td>贫煤</td>\n",
" <td>23215.00</td>\n",
" <td>11.00</td>\n",
" <td>19.31</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3935</th>\n",
" <td>贫煤</td>\n",
" <td>23791.00</td>\n",
" <td>11.00</td>\n",
" <td>19.31</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3936 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" 煤种 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%)\n",
"0 无烟煤 19827.00 11.18 2539.00\n",
"1 烟煤 16733.00 22.53 27.46\n",
"2 烟煤 16740.00 18.99 37.00\n",
"3 烟煤 16740.00 27.93 24.43\n",
"4 烟煤 16741.00 26.69 25.92\n",
"... ... ... ... ...\n",
"3931 贫煤 22149.00 12.43 25.10\n",
"3932 贫煤 22272.51 11.83 22.97\n",
"3933 贫煤 22475.97 8.90 23.98\n",
"3934 贫煤 23215.00 11.00 19.31\n",
"3935 贫煤 23791.00 11.00 19.31\n",
"\n",
"[3936 rows x 4 columns]"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"coal_df = new_values.reset_index().drop(columns=['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'])\n",
"coal_df"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"coal_params_dict = dict()\n",
"for coal_type in coal_df['煤种'].unique().tolist():\n",
" options = coal_df[coal_df['煤种']==coal_type][['入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)']].values\n",
" coal_params_dict[coal_type] = options"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>电厂名称</th>\n",
" <th>机组编号</th>\n",
" <th>铭牌容量 (MW)</th>\n",
" <th>机组类型</th>\n",
" <th>参数分类</th>\n",
" <th>冷凝器型式</th>\n",
" <th>入炉煤低位热值(kJ/kg)</th>\n",
" <th>燃煤挥发份Var(%)</th>\n",
" <th>燃煤灰份Aar(%)</th>\n",
" <th>煤种</th>\n",
" <th>所处地区</th>\n",
" <th>longitude</th>\n",
" <th>latitude</th>\n",
" <th>altitude</th>\n",
" <th>发电碳排放因子(kg/kWh)</th>\n",
" <th>供热碳排放因子(kg/MJ)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>江苏利港电力有限公司</td>\n",
" <td>1</td>\n",
" <td>350.0</td>\n",
" <td>纯凝式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>21602.05000</td>\n",
" <td>26.09</td>\n",
" <td>16.80</td>\n",
" <td>烟煤</td>\n",
" <td>江苏省</td>\n",
" <td>120.096620</td>\n",
" <td>31.942361</td>\n",
" <td>1.0</td>\n",
" <td>0.586990</td>\n",
" <td>0.076843</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>江苏利港电力有限公司</td>\n",
" <td>1</td>\n",
" <td>350.0</td>\n",
" <td>纯凝式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>21926.81000</td>\n",
" <td>26.68</td>\n",
" <td>15.41</td>\n",
" <td>烟煤</td>\n",
" <td>江苏省</td>\n",
" <td>120.096620</td>\n",
" <td>31.942361</td>\n",
" <td>1.0</td>\n",
" <td>0.632859</td>\n",
" <td>0.077676</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>江苏利港电力有限公司</td>\n",
" <td>1</td>\n",
" <td>350.0</td>\n",
" <td>纯凝式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>21261.93062</td>\n",
" <td>26.46</td>\n",
" <td>15.18</td>\n",
" <td>烟煤</td>\n",
" <td>江苏省</td>\n",
" <td>120.096620</td>\n",
" <td>31.942361</td>\n",
" <td>1.0</td>\n",
" <td>0.609196</td>\n",
" <td>0.074823</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>江苏利港电力有限公司</td>\n",
" <td>1</td>\n",
" <td>350.0</td>\n",
" <td>纯凝式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>20840.00000</td>\n",
" <td>26.43</td>\n",
" <td>14.55</td>\n",
" <td>烟煤</td>\n",
" <td>江苏省</td>\n",
" <td>120.096620</td>\n",
" <td>31.942361</td>\n",
" <td>1.0</td>\n",
" <td>0.602178</td>\n",
" <td>0.081628</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>江苏利港电力有限公司</td>\n",
" <td>1</td>\n",
" <td>350.0</td>\n",
" <td>纯凝式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>20706.00000</td>\n",
" <td>26.43</td>\n",
" <td>14.96</td>\n",
" <td>烟煤</td>\n",
" <td>江苏省</td>\n",
" <td>120.096620</td>\n",
" <td>31.942361</td>\n",
" <td>1.0</td>\n",
" <td>0.590254</td>\n",
" <td>0.081103</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5736</th>\n",
" <td>浙江浙能电力股份有限公司台州发电厂</td>\n",
" <td>8</td>\n",
" <td>350.0</td>\n",
" <td>纯凝式</td>\n",
" <td>亚临界</td>\n",
" <td>NaN</td>\n",
" <td>21973.00000</td>\n",
" <td>37.43</td>\n",
" <td>17.12</td>\n",
" <td>烟煤</td>\n",
" <td>浙江省</td>\n",
" <td>121.465840</td>\n",
" <td>28.704623</td>\n",
" <td>73.0</td>\n",
" <td>0.628300</td>\n",
" <td>0.078776</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5737</th>\n",
" <td>浙江浙能电力股份有限公司台州发电厂</td>\n",
" <td>8</td>\n",
" <td>350.0</td>\n",
" <td>纯凝式</td>\n",
" <td>亚临界</td>\n",
" <td>NaN</td>\n",
" <td>21372.00000</td>\n",
" <td>39.87</td>\n",
" <td>18.01</td>\n",
" <td>烟煤</td>\n",
" <td>浙江省</td>\n",
" <td>121.465840</td>\n",
" <td>28.704623</td>\n",
" <td>73.0</td>\n",
" <td>0.595019</td>\n",
" <td>0.076622</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5738</th>\n",
" <td>浙江浙能电力股份有限公司台州发电厂</td>\n",
" <td>8</td>\n",
" <td>350.0</td>\n",
" <td>纯凝式</td>\n",
" <td>亚临界</td>\n",
" <td>NaN</td>\n",
" <td>20856.00000</td>\n",
" <td>39.32</td>\n",
" <td>19.74</td>\n",
" <td>烟煤</td>\n",
" <td>浙江省</td>\n",
" <td>121.465840</td>\n",
" <td>28.704623</td>\n",
" <td>73.0</td>\n",
" <td>0.565718</td>\n",
" <td>0.074772</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5739</th>\n",
" <td>榆能榆神热电有限公司</td>\n",
" <td>1</td>\n",
" <td>350.0</td>\n",
" <td>供热式</td>\n",
" <td>超临界</td>\n",
" <td>间接空冷</td>\n",
" <td>25514.00000</td>\n",
" <td>38.84</td>\n",
" <td>7.28</td>\n",
" <td>烟煤</td>\n",
" <td>陕西省</td>\n",
" <td>109.820265</td>\n",
" <td>38.304383</td>\n",
" <td>1151.0</td>\n",
" <td>0.664456</td>\n",
" <td>0.091482</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5740</th>\n",
" <td>榆能榆神热电有限公司</td>\n",
" <td>2</td>\n",
" <td>350.0</td>\n",
" <td>供热式</td>\n",
" <td>超临界</td>\n",
" <td>间接空冷</td>\n",
" <td>25514.00000</td>\n",
" <td>38.84</td>\n",
" <td>7.28</td>\n",
" <td>烟煤</td>\n",
" <td>陕西省</td>\n",
" <td>109.820265</td>\n",
" <td>38.304383</td>\n",
" <td>1151.0</td>\n",
" <td>0.661759</td>\n",
" <td>0.091483</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5629 rows × 16 columns</p>\n",
"</div>"
],
"text/plain": [
" 电厂名称 机组编号 铭牌容量 (MW) 机组类型 参数分类 冷凝器型式 入炉煤低位热值(kJ/kg) \\\n",
"0 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 21602.05000 \n",
"1 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 21926.81000 \n",
"2 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 21261.93062 \n",
"3 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 20840.00000 \n",
"4 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 20706.00000 \n",
"... ... ... ... ... ... ... ... \n",
"5736 浙江浙能电力股份有限公司台州发电厂 8 350.0 纯凝式 亚临界 NaN 21973.00000 \n",
"5737 浙江浙能电力股份有限公司台州发电厂 8 350.0 纯凝式 亚临界 NaN 21372.00000 \n",
"5738 浙江浙能电力股份有限公司台州发电厂 8 350.0 纯凝式 亚临界 NaN 20856.00000 \n",
"5739 榆能榆神热电有限公司 1 350.0 供热式 超临界 间接空冷 25514.00000 \n",
"5740 榆能榆神热电有限公司 2 350.0 供热式 超临界 间接空冷 25514.00000 \n",
"\n",
" 燃煤挥发份Var(%) 燃煤灰份Aar(%) 煤种 所处地区 longitude latitude altitude \\\n",
"0 26.09 16.80 烟煤 江苏省 120.096620 31.942361 1.0 \n",
"1 26.68 15.41 烟煤 江苏省 120.096620 31.942361 1.0 \n",
"2 26.46 15.18 烟煤 江苏省 120.096620 31.942361 1.0 \n",
"3 26.43 14.55 烟煤 江苏省 120.096620 31.942361 1.0 \n",
"4 26.43 14.96 烟煤 江苏省 120.096620 31.942361 1.0 \n",
"... ... ... .. ... ... ... ... \n",
"5736 37.43 17.12 烟煤 浙江省 121.465840 28.704623 73.0 \n",
"5737 39.87 18.01 烟煤 浙江省 121.465840 28.704623 73.0 \n",
"5738 39.32 19.74 烟煤 浙江省 121.465840 28.704623 73.0 \n",
"5739 38.84 7.28 烟煤 陕西省 109.820265 38.304383 1151.0 \n",
"5740 38.84 7.28 烟煤 陕西省 109.820265 38.304383 1151.0 \n",
"\n",
" 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) \n",
"0 0.586990 0.076843 \n",
"1 0.632859 0.077676 \n",
"2 0.609196 0.074823 \n",
"3 0.602178 0.081628 \n",
"4 0.590254 0.081103 \n",
"... ... ... \n",
"5736 0.628300 0.078776 \n",
"5737 0.595019 0.076622 \n",
"5738 0.565718 0.074772 \n",
"5739 0.664456 0.091482 \n",
"5740 0.661759 0.091483 \n",
"\n",
"[5629 rows x 16 columns]"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"use_data"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>所处地区</th>\n",
" <th>机组类型</th>\n",
" <th>参数分类</th>\n",
" <th>冷凝器型式</th>\n",
" <th>铭牌容量 (MW)</th>\n",
" <th>longitude</th>\n",
" <th>latitude</th>\n",
" <th>altitude</th>\n",
" <th>煤种</th>\n",
" <th>发电碳排放因子(kg/kWh)</th>\n",
" <th>供热碳排放因子(kg/MJ)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>121.471140</td>\n",
" <td>31.065113</td>\n",
" <td>3.0</td>\n",
" <td>烟煤</td>\n",
" <td>0.537574</td>\n",
" <td>0.070992</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>121.471140</td>\n",
" <td>31.065113</td>\n",
" <td>3.0</td>\n",
" <td>烟煤</td>\n",
" <td>0.545516</td>\n",
" <td>0.072476</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>121.471140</td>\n",
" <td>31.065113</td>\n",
" <td>3.0</td>\n",
" <td>烟煤</td>\n",
" <td>0.595849</td>\n",
" <td>0.064745</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>121.471140</td>\n",
" <td>31.065113</td>\n",
" <td>3.0</td>\n",
" <td>烟煤</td>\n",
" <td>0.584432</td>\n",
" <td>0.068390</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>121.471140</td>\n",
" <td>31.065113</td>\n",
" <td>3.0</td>\n",
" <td>烟煤</td>\n",
" <td>0.605369</td>\n",
" <td>0.066996</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3075</th>\n",
" <td>黑龙江省</td>\n",
" <td>纯凝式</td>\n",
" <td>超高压</td>\n",
" <td>水冷</td>\n",
" <td>200.0</td>\n",
" <td>126.575647</td>\n",
" <td>45.918566</td>\n",
" <td>118.0</td>\n",
" <td>褐煤</td>\n",
" <td>0.500172</td>\n",
" <td>0.064200</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3076</th>\n",
" <td>黑龙江省</td>\n",
" <td>纯凝式</td>\n",
" <td>超高压</td>\n",
" <td>水冷</td>\n",
" <td>200.0</td>\n",
" <td>129.604803</td>\n",
" <td>44.608202</td>\n",
" <td>250.0</td>\n",
" <td>褐煤</td>\n",
" <td>0.378298</td>\n",
" <td>0.069663</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3077</th>\n",
" <td>黑龙江省</td>\n",
" <td>纯凝式</td>\n",
" <td>超高压</td>\n",
" <td>水冷</td>\n",
" <td>210.0</td>\n",
" <td>131.695864</td>\n",
" <td>46.580444</td>\n",
" <td>91.0</td>\n",
" <td>褐煤</td>\n",
" <td>0.518301</td>\n",
" <td>0.063249</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3078</th>\n",
" <td>黑龙江省</td>\n",
" <td>纯凝式</td>\n",
" <td>超高压</td>\n",
" <td>水冷</td>\n",
" <td>215.0</td>\n",
" <td>129.604803</td>\n",
" <td>44.608202</td>\n",
" <td>250.0</td>\n",
" <td>褐煤</td>\n",
" <td>0.290814</td>\n",
" <td>0.068027</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3079</th>\n",
" <td>黑龙江省</td>\n",
" <td>纯凝式</td>\n",
" <td>超高压</td>\n",
" <td>水冷</td>\n",
" <td>215.0</td>\n",
" <td>129.604803</td>\n",
" <td>44.608202</td>\n",
" <td>250.0</td>\n",
" <td>褐煤</td>\n",
" <td>0.321635</td>\n",
" <td>0.067798</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3080 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude 煤种 \\\n",
"0 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 烟煤 \n",
"1 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 烟煤 \n",
"2 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 烟煤 \n",
"3 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 烟煤 \n",
"4 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 烟煤 \n",
"... ... ... ... ... ... ... ... ... .. \n",
"3075 黑龙江省 纯凝式 超高压 水冷 200.0 126.575647 45.918566 118.0 褐煤 \n",
"3076 黑龙江省 纯凝式 超高压 水冷 200.0 129.604803 44.608202 250.0 褐煤 \n",
"3077 黑龙江省 纯凝式 超高压 水冷 210.0 131.695864 46.580444 91.0 褐煤 \n",
"3078 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 褐煤 \n",
"3079 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 褐煤 \n",
"\n",
" 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) \n",
"0 0.537574 0.070992 \n",
"1 0.545516 0.072476 \n",
"2 0.595849 0.064745 \n",
"3 0.584432 0.068390 \n",
"4 0.605369 0.066996 \n",
"... ... ... \n",
"3075 0.500172 0.064200 \n",
"3076 0.378298 0.069663 \n",
"3077 0.518301 0.063249 \n",
"3078 0.290814 0.068027 \n",
"3079 0.321635 0.067798 \n",
"\n",
"[3080 rows x 11 columns]"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_use_data = use_data.groupby(use_cols+['煤种'])['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'].mean().reset_index().drop(columns=['入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)'])\n",
"new_use_data"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"new_use_data['coal_params'] = new_use_data['煤种'].apply(lambda x: coal_params_dict.get(x))"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"new_use_data.drop(columns='煤种', inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"new_data = new_use_data.explode(column='coal_params')"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>所处地区</th>\n",
" <th>机组类型</th>\n",
" <th>参数分类</th>\n",
" <th>冷凝器型式</th>\n",
" <th>铭牌容量 (MW)</th>\n",
" <th>longitude</th>\n",
" <th>latitude</th>\n",
" <th>altitude</th>\n",
" <th>发电碳排放因子(kg/kWh)</th>\n",
" <th>供热碳排放因子(kg/MJ)</th>\n",
" <th>coal_params</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>121.471140</td>\n",
" <td>31.065113</td>\n",
" <td>3.0</td>\n",
" <td>0.537574</td>\n",
" <td>0.070992</td>\n",
" <td>[16733.0, 22.53, 27.46]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>121.471140</td>\n",
" <td>31.065113</td>\n",
" <td>3.0</td>\n",
" <td>0.537574</td>\n",
" <td>0.070992</td>\n",
" <td>[16740.0, 18.99, 37.0]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>121.471140</td>\n",
" <td>31.065113</td>\n",
" <td>3.0</td>\n",
" <td>0.537574</td>\n",
" <td>0.070992</td>\n",
" <td>[16740.0, 27.93, 24.43]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>121.471140</td>\n",
" <td>31.065113</td>\n",
" <td>3.0</td>\n",
" <td>0.537574</td>\n",
" <td>0.070992</td>\n",
" <td>[16741.0, 26.69, 25.92]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>121.471140</td>\n",
" <td>31.065113</td>\n",
" <td>3.0</td>\n",
" <td>0.537574</td>\n",
" <td>0.070992</td>\n",
" <td>[16741.51, 19.51, 35.62]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3079</th>\n",
" <td>黑龙江省</td>\n",
" <td>纯凝式</td>\n",
" <td>超高压</td>\n",
" <td>水冷</td>\n",
" <td>215.0</td>\n",
" <td>129.604803</td>\n",
" <td>44.608202</td>\n",
" <td>250.0</td>\n",
" <td>0.321635</td>\n",
" <td>0.067798</td>\n",
" <td>[16723.0, 40.63, 39.94]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3079</th>\n",
" <td>黑龙江省</td>\n",
" <td>纯凝式</td>\n",
" <td>超高压</td>\n",
" <td>水冷</td>\n",
" <td>215.0</td>\n",
" <td>129.604803</td>\n",
" <td>44.608202</td>\n",
" <td>250.0</td>\n",
" <td>0.321635</td>\n",
" <td>0.067798</td>\n",
" <td>[16725.0, 26.36, 28.51]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3079</th>\n",
" <td>黑龙江省</td>\n",
" <td>纯凝式</td>\n",
" <td>超高压</td>\n",
" <td>水冷</td>\n",
" <td>215.0</td>\n",
" <td>129.604803</td>\n",
" <td>44.608202</td>\n",
" <td>250.0</td>\n",
" <td>0.321635</td>\n",
" <td>0.067798</td>\n",
" <td>[16725.19, 34.59, 37.71]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3079</th>\n",
" <td>黑龙江省</td>\n",
" <td>纯凝式</td>\n",
" <td>超高压</td>\n",
" <td>水冷</td>\n",
" <td>215.0</td>\n",
" <td>129.604803</td>\n",
" <td>44.608202</td>\n",
" <td>250.0</td>\n",
" <td>0.321635</td>\n",
" <td>0.067798</td>\n",
" <td>[16725.85, 43.2, 12.0]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3079</th>\n",
" <td>黑龙江省</td>\n",
" <td>纯凝式</td>\n",
" <td>超高压</td>\n",
" <td>水冷</td>\n",
" <td>215.0</td>\n",
" <td>129.604803</td>\n",
" <td>44.608202</td>\n",
" <td>250.0</td>\n",
" <td>0.321635</td>\n",
" <td>0.067798</td>\n",
" <td>[16729.0, 51.42, 17.33]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>7151079 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n",
"0 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 \n",
"0 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 \n",
"0 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 \n",
"0 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 \n",
"0 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 \n",
"... ... ... ... ... ... ... ... ... \n",
"3079 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 \n",
"3079 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 \n",
"3079 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 \n",
"3079 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 \n",
"3079 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 \n",
"\n",
" 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) coal_params \n",
"0 0.537574 0.070992 [16733.0, 22.53, 27.46] \n",
"0 0.537574 0.070992 [16740.0, 18.99, 37.0] \n",
"0 0.537574 0.070992 [16740.0, 27.93, 24.43] \n",
"0 0.537574 0.070992 [16741.0, 26.69, 25.92] \n",
"0 0.537574 0.070992 [16741.51, 19.51, 35.62] \n",
"... ... ... ... \n",
"3079 0.321635 0.067798 [16723.0, 40.63, 39.94] \n",
"3079 0.321635 0.067798 [16725.0, 26.36, 28.51] \n",
"3079 0.321635 0.067798 [16725.19, 34.59, 37.71] \n",
"3079 0.321635 0.067798 [16725.85, 43.2, 12.0] \n",
"3079 0.321635 0.067798 [16729.0, 51.42, 17.33] \n",
"\n",
"[7151079 rows x 11 columns]"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"new_data"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"new_data['入炉煤低位热值(kJ/kg)'] = new_data.coal_params.apply(lambda x: x[0]).values\n",
"new_data['燃煤挥发份Var(%)'] = new_data.coal_params.apply(lambda x: x[1]).values\n",
"new_data['燃煤灰份Aar(%)'] = new_data.coal_params.apply(lambda x: x[2]).values"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"norm_data = new_data.drop(columns='coal_params')"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>所处地区</th>\n",
" <th>机组类型</th>\n",
" <th>参数分类</th>\n",
" <th>冷凝器型式</th>\n",
" <th>铭牌容量 (MW)</th>\n",
" <th>longitude</th>\n",
" <th>latitude</th>\n",
" <th>altitude</th>\n",
" <th>发电碳排放因子(kg/kWh)</th>\n",
" <th>供热碳排放因子(kg/MJ)</th>\n",
" <th>入炉煤低位热值(kJ/kg)</th>\n",
" <th>燃煤挥发份Var(%)</th>\n",
" <th>燃煤灰份Aar(%)</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>121.47114</td>\n",
" <td>31.065113</td>\n",
" <td>3.0</td>\n",
" <td>0.537574</td>\n",
" <td>0.070992</td>\n",
" <td>16733.00</td>\n",
" <td>22.53</td>\n",
" <td>27.46</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>121.47114</td>\n",
" <td>31.065113</td>\n",
" <td>3.0</td>\n",
" <td>0.537574</td>\n",
" <td>0.070992</td>\n",
" <td>16740.00</td>\n",
" <td>18.99</td>\n",
" <td>37.00</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>121.47114</td>\n",
" <td>31.065113</td>\n",
" <td>3.0</td>\n",
" <td>0.537574</td>\n",
" <td>0.070992</td>\n",
" <td>16740.00</td>\n",
" <td>27.93</td>\n",
" <td>24.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>121.47114</td>\n",
" <td>31.065113</td>\n",
" <td>3.0</td>\n",
" <td>0.537574</td>\n",
" <td>0.070992</td>\n",
" <td>16741.00</td>\n",
" <td>26.69</td>\n",
" <td>25.92</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>300.0</td>\n",
" <td>121.47114</td>\n",
" <td>31.065113</td>\n",
" <td>3.0</td>\n",
" <td>0.537574</td>\n",
" <td>0.070992</td>\n",
" <td>16741.51</td>\n",
" <td>19.51</td>\n",
" <td>35.62</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n",
"0 上海市 供热式 亚临界 水冷 300.0 121.47114 31.065113 3.0 \n",
"0 上海市 供热式 亚临界 水冷 300.0 121.47114 31.065113 3.0 \n",
"0 上海市 供热式 亚临界 水冷 300.0 121.47114 31.065113 3.0 \n",
"0 上海市 供热式 亚临界 水冷 300.0 121.47114 31.065113 3.0 \n",
"0 上海市 供热式 亚临界 水冷 300.0 121.47114 31.065113 3.0 \n",
"\n",
" 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%) \n",
"0 0.537574 0.070992 16733.00 22.53 27.46 \n",
"0 0.537574 0.070992 16740.00 18.99 37.00 \n",
"0 0.537574 0.070992 16740.00 27.93 24.43 \n",
"0 0.537574 0.070992 16741.00 26.69 25.92 \n",
"0 0.537574 0.070992 16741.51 19.51 35.62 "
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"norm_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"for col in num_cols:\n",
" norm_data[col] = np.log1p(norm_data[col])\n",
" # total_data[col] = (total_data[col] - total_data[col].min()) / (total_data[col].max() - total_data[col].min())\n",
"norm_data_dummpy = pd.get_dummies(norm_data, columns=object_cols)"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"norm_data_dummpy.drop(columns=['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'], inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
},
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"new_xgb_data = xgb.DMatrix(norm_data_dummpy[feature_cols])"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"norm_data['power_co2_factor'] = gb_model.predict(new_xgb_data)\n",
"norm_data['heat_co2_factor'] = gb_model_heat.predict(new_xgb_data)"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"normaled_data = norm_data.drop(columns=['入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'])"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>所处地区</th>\n",
" <th>机组类型</th>\n",
" <th>参数分类</th>\n",
" <th>冷凝器型式</th>\n",
" <th>铭牌容量 (MW)</th>\n",
" <th>longitude</th>\n",
" <th>latitude</th>\n",
" <th>altitude</th>\n",
" <th>power_co2_factor</th>\n",
" <th>heat_co2_factor</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>5.707110</td>\n",
" <td>4.807875</td>\n",
" <td>3.467769</td>\n",
" <td>1.386294</td>\n",
" <td>0.492558</td>\n",
" <td>0.064411</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>5.707110</td>\n",
" <td>4.807875</td>\n",
" <td>3.467769</td>\n",
" <td>1.386294</td>\n",
" <td>0.474082</td>\n",
" <td>0.062117</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>5.707110</td>\n",
" <td>4.807875</td>\n",
" <td>3.467769</td>\n",
" <td>1.386294</td>\n",
" <td>0.489623</td>\n",
" <td>0.063859</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>5.707110</td>\n",
" <td>4.807875</td>\n",
" <td>3.467769</td>\n",
" <td>1.386294</td>\n",
" <td>0.493615</td>\n",
" <td>0.064382</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>上海市</td>\n",
" <td>供热式</td>\n",
" <td>亚临界</td>\n",
" <td>水冷</td>\n",
" <td>5.707110</td>\n",
" <td>4.807875</td>\n",
" <td>3.467769</td>\n",
" <td>1.386294</td>\n",
" <td>0.470883</td>\n",
" <td>0.062354</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3079</th>\n",
" <td>黑龙江省</td>\n",
" <td>纯凝式</td>\n",
" <td>超高压</td>\n",
" <td>水冷</td>\n",
" <td>5.375278</td>\n",
" <td>4.872176</td>\n",
" <td>3.820088</td>\n",
" <td>5.525453</td>\n",
" <td>0.388912</td>\n",
" <td>0.067787</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3079</th>\n",
" <td>黑龙江省</td>\n",
" <td>纯凝式</td>\n",
" <td>超高压</td>\n",
" <td>水冷</td>\n",
" <td>5.375278</td>\n",
" <td>4.872176</td>\n",
" <td>3.820088</td>\n",
" <td>5.525453</td>\n",
" <td>0.388606</td>\n",
" <td>0.065639</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3079</th>\n",
" <td>黑龙江省</td>\n",
" <td>纯凝式</td>\n",
" <td>超高压</td>\n",
" <td>水冷</td>\n",
" <td>5.375278</td>\n",
" <td>4.872176</td>\n",
" <td>3.820088</td>\n",
" <td>5.525453</td>\n",
" <td>0.380971</td>\n",
" <td>0.068147</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3079</th>\n",
" <td>黑龙江省</td>\n",
" <td>纯凝式</td>\n",
" <td>超高压</td>\n",
" <td>水冷</td>\n",
" <td>5.375278</td>\n",
" <td>4.872176</td>\n",
" <td>3.820088</td>\n",
" <td>5.525453</td>\n",
" <td>0.401973</td>\n",
" <td>0.065844</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3079</th>\n",
" <td>黑龙江省</td>\n",
" <td>纯凝式</td>\n",
" <td>超高压</td>\n",
" <td>水冷</td>\n",
" <td>5.375278</td>\n",
" <td>4.872176</td>\n",
" <td>3.820088</td>\n",
" <td>5.525453</td>\n",
" <td>0.386369</td>\n",
" <td>0.065845</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>7151079 rows × 10 columns</p>\n",
"</div>"
],
"text/plain": [
" 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n",
"0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n",
"0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n",
"0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n",
"0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n",
"0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n",
"... ... ... ... ... ... ... ... ... \n",
"3079 黑龙江省 纯凝式 超高压 水冷 5.375278 4.872176 3.820088 5.525453 \n",
"3079 黑龙江省 纯凝式 超高压 水冷 5.375278 4.872176 3.820088 5.525453 \n",
"3079 黑龙江省 纯凝式 超高压 水冷 5.375278 4.872176 3.820088 5.525453 \n",
"3079 黑龙江省 纯凝式 超高压 水冷 5.375278 4.872176 3.820088 5.525453 \n",
"3079 黑龙江省 纯凝式 超高压 水冷 5.375278 4.872176 3.820088 5.525453 \n",
"\n",
" power_co2_factor heat_co2_factor \n",
"0 0.492558 0.064411 \n",
"0 0.474082 0.062117 \n",
"0 0.489623 0.063859 \n",
"0 0.493615 0.064382 \n",
"0 0.470883 0.062354 \n",
"... ... ... \n",
"3079 0.388912 0.067787 \n",
"3079 0.388606 0.065639 \n",
"3079 0.380971 0.068147 \n",
"3079 0.401973 0.065844 \n",
"3079 0.386369 0.065845 \n",
"\n",
"[7151079 rows x 10 columns]"
]
},
"execution_count": 66,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"normaled_data"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"target_cols = ['power_co2_factor', 'heat_co2_factor']"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
"save_data = normaled_data.groupby([x for x in normaled_data.columns if x not in target_cols])[target_cols].mean()"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [],
"source": [
"save_data.reset_index().to_csv('./results/去煤种化数据.csv', encoding='utf-8-sig', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.13"
}
},
"nbformat": 4,
"nbformat_minor": 4
}