1809 lines
151 KiB
Plaintext
1809 lines
151 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
},
|
||
"tags": []
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.multioutput import MultiOutputRegressor\n",
|
||
"import xgboost as xgb\n",
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score\n",
|
||
"from sklearn.model_selection import train_test_split"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": " 企业名称 机组编号 铭牌容量 (MW) 机组类型 参数分类 冷凝器型式 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) \\\n5740 榆能榆神热电有限公司 2 350.0 抽凝式 超临界 间接空冷 25514.0 38.84 \n\n 燃煤灰份Aar(%) 煤种 所处地区 longitude latitude altitude 发电碳排放因子(kg/kWh) \\\n5740 7.28 烟煤 陕西省 109.820265 38.304383 1151 0.661759 \n\n 供热碳排放因子(kg/MJ) \n5740 0.091483 ",
|
||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>企业名称</th>\n <th>机组编号</th>\n <th>铭牌容量 (MW)</th>\n <th>机组类型</th>\n <th>参数分类</th>\n <th>冷凝器型式</th>\n <th>入炉煤低位热值(kJ/kg)</th>\n <th>燃煤挥发份Var(%)</th>\n <th>燃煤灰份Aar(%)</th>\n <th>煤种</th>\n <th>所处地区</th>\n <th>longitude</th>\n <th>latitude</th>\n <th>altitude</th>\n <th>发电碳排放因子(kg/kWh)</th>\n <th>供热碳排放因子(kg/MJ)</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>5740</th>\n <td>榆能榆神热电有限公司</td>\n <td>2</td>\n <td>350.0</td>\n <td>抽凝式</td>\n <td>超临界</td>\n <td>间接空冷</td>\n <td>25514.0</td>\n <td>38.84</td>\n <td>7.28</td>\n <td>烟煤</td>\n <td>陕西省</td>\n <td>109.820265</td>\n <td>38.304383</td>\n <td>1151</td>\n <td>0.661759</td>\n <td>0.091483</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||
},
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"total_data = pd.read_excel('train_data.xlsx')\n",
|
||
"total_data.tail(1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": "Index(['企业名称', '机组编号', '铭牌容量 (MW)', '机组类型', '参数分类', '冷凝器型式', '入炉煤低位热值(kJ/kg)',\n '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', '煤种', '所处地区', 'longitude', 'latitude',\n 'altitude', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'],\n dtype='object')"
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"total_data.columns"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": " 发电类型 地区 城市 企业名称 机组编号 机组状态 机组数量 单机容量(MW) 总容量(MW) \\\n0 煤电 安徽省 安庆市 国能神皖安庆发电有限责任公司 1 在役 1 320.0 320.0 \n1 煤电 安徽省 安庆市 国能神皖安庆发电有限责任公司 2 在役 1 320.0 320.0 \n2 煤电 安徽省 安庆市 国能神皖安庆发电有限责任公司 3 在役 1 1000.0 1000.0 \n3 煤电 安徽省 安庆市 国能神皖安庆发电有限责任公司 4 在役 1 1000.0 1000.0 \n4 煤电 安徽省 安庆市 安徽华泰林浆纸有限公司 化学浆生产线 在役 1 40.0 40.0 \n... ... ... ... ... ... ... ... ... ... \n5317 煤电 重庆市 长寿区 中国石化集团重庆川维化工有限公司 B4 在役 1 49.0 49.0 \n5318 煤电 重庆市 长寿区 威立雅长扬热能(重庆)有限责任公司 1 在役 1 25.0 25.0 \n5319 煤电 重庆市 长寿区 威立雅长扬热能(重庆)有限责任公司 2 在役 1 25.0 25.0 \n5320 煤电 重庆市 长寿区 重庆恩力吉投资有限责任公司 2 在役 1 30.0 30.0 \n5321 煤电 重庆市 长寿区 重庆恩力吉投资有限责任公司 3 在役 1 125.0 125.0 \n\n 核心设备类型 汽轮机类型 压力参数 冷却方式 \n0 煤粉锅炉 凝气式 亚临界 水冷-开式循环 \n1 煤粉锅炉 凝气式 亚临界 水冷-开式循环 \n2 煤粉锅炉 凝气式 超超临界 水冷-闭式循环 \n3 煤粉锅炉 凝气式 超超临界 水冷-闭式循环 \n4 煤粉锅炉 抽凝式 高压 水冷-闭式循环 \n... ... ... ... ... \n5317 煤粉锅炉 抽凝式 高压 水冷-闭式循环 \n5318 循环流化床锅炉 抽凝式 高压 水冷-闭式循环 \n5319 循环流化床锅炉 抽背式 高压 水冷-闭式循环 \n5320 循环流化床锅炉 背压式 高压 其他 \n5321 循环流化床锅炉 抽凝式 高压 水冷-闭式循环 \n\n[5322 rows x 13 columns]",
|
||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>发电类型</th>\n <th>地区</th>\n <th>城市</th>\n <th>企业名称</th>\n <th>机组编号</th>\n <th>机组状态</th>\n <th>机组数量</th>\n <th>单机容量(MW)</th>\n <th>总容量(MW)</th>\n <th>核心设备类型</th>\n <th>汽轮机类型</th>\n <th>压力参数</th>\n <th>冷却方式</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>煤电</td>\n <td>安徽省</td>\n <td>安庆市</td>\n <td>国能神皖安庆发电有限责任公司</td>\n <td>1</td>\n <td>在役</td>\n <td>1</td>\n <td>320.0</td>\n <td>320.0</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n </tr>\n <tr>\n <th>1</th>\n <td>煤电</td>\n <td>安徽省</td>\n <td>安庆市</td>\n <td>国能神皖安庆发电有限责任公司</td>\n <td>2</td>\n <td>在役</td>\n <td>1</td>\n <td>320.0</td>\n <td>320.0</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n </tr>\n <tr>\n <th>2</th>\n <td>煤电</td>\n <td>安徽省</td>\n <td>安庆市</td>\n <td>国能神皖安庆发电有限责任公司</td>\n <td>3</td>\n <td>在役</td>\n <td>1</td>\n <td>1000.0</td>\n <td>1000.0</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>超超临界</td>\n <td>水冷-闭式循环</td>\n </tr>\n <tr>\n <th>3</th>\n <td>煤电</td>\n <td>安徽省</td>\n <td>安庆市</td>\n <td>国能神皖安庆发电有限责任公司</td>\n <td>4</td>\n <td>在役</td>\n <td>1</td>\n <td>1000.0</td>\n <td>1000.0</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>超超临界</td>\n <td>水冷-闭式循环</td>\n </tr>\n <tr>\n <th>4</th>\n <td>煤电</td>\n <td>安徽省</td>\n <td>安庆市</td>\n <td>安徽华泰林浆纸有限公司</td>\n <td>化学浆生产线</td>\n <td>在役</td>\n <td>1</td>\n <td>40.0</td>\n <td>40.0</td>\n <td>煤粉锅炉</td>\n <td>抽凝式</td>\n <td>高压</td>\n <td>水冷-闭式循环</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>5317</th>\n <td>煤电</td>\n <td>重庆市</td>\n <td>长寿区</td>\n <td>中国石化集团重庆川维化工有限公司</td>\n <td>B4</td>\n <td>在役</td>\n <td>1</td>\n <td>49.0</td>\n <td>49.0</td>\n <td>煤粉锅炉</td>\n <td>抽凝式</td>\n <td>高压</td>\n <td>水冷-闭式循环</td>\n </tr>\n <tr>\n <th>5318</th>\n <td>煤电</td>\n <td>重庆市</td>\n <td>长寿区</td>\n <td>威立雅长扬热能(重庆)有限责任公司</td>\n <td>1</td>\n <td>在役</td>\n <td>1</td>\n <td>25.0</td>\n <td>25.0</td>\n <td>循环流化床锅炉</td>\n <td>抽凝式</td>\n <td>高压</td>\n <td>水冷-闭式循环</td>\n </tr>\n <tr>\n <th>5319</th>\n <td>煤电</td>\n <td>重庆市</td>\n <td>长寿区</td>\n <td>威立雅长扬热能(重庆)有限责任公司</td>\n <td>2</td>\n <td>在役</td>\n <td>1</td>\n <td>25.0</td>\n <td>25.0</td>\n <td>循环流化床锅炉</td>\n <td>抽背式</td>\n <td>高压</td>\n <td>水冷-闭式循环</td>\n </tr>\n <tr>\n <th>5320</th>\n <td>煤电</td>\n <td>重庆市</td>\n <td>长寿区</td>\n <td>重庆恩力吉投资有限责任公司</td>\n <td>2</td>\n <td>在役</td>\n <td>1</td>\n <td>30.0</td>\n <td>30.0</td>\n <td>循环流化床锅炉</td>\n <td>背压式</td>\n <td>高压</td>\n <td>其他</td>\n </tr>\n <tr>\n <th>5321</th>\n <td>煤电</td>\n <td>重庆市</td>\n <td>长寿区</td>\n <td>重庆恩力吉投资有限责任公司</td>\n <td>3</td>\n <td>在役</td>\n <td>1</td>\n <td>125.0</td>\n <td>125.0</td>\n <td>循环流化床锅炉</td>\n <td>抽凝式</td>\n <td>高压</td>\n <td>水冷-闭式循环</td>\n </tr>\n </tbody>\n</table>\n<p>5322 rows × 13 columns</p>\n</div>"
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"unit_data = pd.read_excel('./data/煤电机组情况(含企业名称).xlsx')\n",
|
||
"unit_data"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": "(5694, 16)"
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"total_data.drop_duplicates(inplace=True)\n",
|
||
"total_data.shape"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"outputs": [],
|
||
"source": [
|
||
"total_data['机组编号'] = total_data['机组编号'].astype(str)\n",
|
||
"unit_data['机组编号'] = unit_data['机组编号'].astype(str)"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"outputs": [],
|
||
"source": [
|
||
"total_data = total_data.merge(unit_data[['企业名称', '机组编号', '核心设备类型', '汽轮机类型', '冷却方式']], how='left', on=['企业名称', '机组编号'])"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": " 企业名称 机组编号 铭牌容量 (MW) 机组类型 参数分类 冷凝器型式 入炉煤低位热值(kJ/kg) \\\n0 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 21602.05000 \n1 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 21926.81000 \n2 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 21261.93062 \n3 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 20840.00000 \n4 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 20706.00000 \n... ... ... ... ... ... ... ... \n5689 浙江浙能电力股份有限公司台州发电厂 8 350.0 凝气式 亚临界 水冷-开式循环 21973.00000 \n5690 浙江浙能电力股份有限公司台州发电厂 8 350.0 凝气式 亚临界 水冷-开式循环 21372.00000 \n5691 浙江浙能电力股份有限公司台州发电厂 8 350.0 凝气式 亚临界 水冷-开式循环 20856.00000 \n5692 榆能榆神热电有限公司 1 350.0 抽凝式 超临界 间接空冷 25514.00000 \n5693 榆能榆神热电有限公司 2 350.0 抽凝式 超临界 间接空冷 25514.00000 \n\n 燃煤挥发份Var(%) 燃煤灰份Aar(%) 煤种 所处地区 longitude latitude altitude \\\n0 26.09 16.80 烟煤 江苏省 120.096620 31.942361 1 \n1 26.68 15.41 烟煤 江苏省 120.096620 31.942361 1 \n2 26.46 15.18 烟煤 江苏省 120.096620 31.942361 1 \n3 26.43 14.55 烟煤 江苏省 120.096620 31.942361 1 \n4 26.43 14.96 烟煤 江苏省 120.096620 31.942361 1 \n... ... ... .. ... ... ... ... \n5689 37.43 17.12 烟煤 浙江省 121.465840 28.704623 73 \n5690 39.87 18.01 烟煤 浙江省 121.465840 28.704623 73 \n5691 39.32 19.74 烟煤 浙江省 121.465840 28.704623 73 \n5692 38.84 7.28 烟煤 陕西省 109.820265 38.304383 1151 \n5693 38.84 7.28 烟煤 陕西省 109.820265 38.304383 1151 \n\n 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) 核心设备类型 汽轮机类型 冷却方式 \n0 0.586990 0.076843 煤粉锅炉 凝气式 水冷-开式循环 \n1 0.632859 0.077676 煤粉锅炉 凝气式 水冷-开式循环 \n2 0.609196 0.074823 煤粉锅炉 凝气式 水冷-开式循环 \n3 0.602178 0.081628 煤粉锅炉 凝气式 水冷-开式循环 \n4 0.590254 0.081103 煤粉锅炉 凝气式 水冷-开式循环 \n... ... ... ... ... ... \n5689 0.628300 0.078776 煤粉锅炉 凝气式 水冷-开式循环 \n5690 0.595019 0.076622 煤粉锅炉 凝气式 水冷-开式循环 \n5691 0.565718 0.074772 煤粉锅炉 凝气式 水冷-开式循环 \n5692 0.664456 0.091482 煤粉锅炉 抽凝式 空冷-间接空冷 \n5693 0.661759 0.091483 煤粉锅炉 抽凝式 空冷-间接空冷 \n\n[5694 rows x 19 columns]",
|
||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>企业名称</th>\n <th>机组编号</th>\n <th>铭牌容量 (MW)</th>\n <th>机组类型</th>\n <th>参数分类</th>\n <th>冷凝器型式</th>\n <th>入炉煤低位热值(kJ/kg)</th>\n <th>燃煤挥发份Var(%)</th>\n <th>燃煤灰份Aar(%)</th>\n <th>煤种</th>\n <th>所处地区</th>\n <th>longitude</th>\n <th>latitude</th>\n <th>altitude</th>\n <th>发电碳排放因子(kg/kWh)</th>\n <th>供热碳排放因子(kg/MJ)</th>\n <th>核心设备类型</th>\n <th>汽轮机类型</th>\n <th>冷却方式</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>江苏利港电力有限公司</td>\n <td>1</td>\n <td>350.0</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>21602.05000</td>\n <td>26.09</td>\n <td>16.80</td>\n <td>烟煤</td>\n <td>江苏省</td>\n <td>120.096620</td>\n <td>31.942361</td>\n <td>1</td>\n <td>0.586990</td>\n <td>0.076843</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>水冷-开式循环</td>\n </tr>\n <tr>\n <th>1</th>\n <td>江苏利港电力有限公司</td>\n <td>1</td>\n <td>350.0</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>21926.81000</td>\n <td>26.68</td>\n <td>15.41</td>\n <td>烟煤</td>\n <td>江苏省</td>\n <td>120.096620</td>\n <td>31.942361</td>\n <td>1</td>\n <td>0.632859</td>\n <td>0.077676</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>水冷-开式循环</td>\n </tr>\n <tr>\n <th>2</th>\n <td>江苏利港电力有限公司</td>\n <td>1</td>\n <td>350.0</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>21261.93062</td>\n <td>26.46</td>\n <td>15.18</td>\n <td>烟煤</td>\n <td>江苏省</td>\n <td>120.096620</td>\n <td>31.942361</td>\n <td>1</td>\n <td>0.609196</td>\n <td>0.074823</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>水冷-开式循环</td>\n </tr>\n <tr>\n <th>3</th>\n <td>江苏利港电力有限公司</td>\n <td>1</td>\n <td>350.0</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>20840.00000</td>\n <td>26.43</td>\n <td>14.55</td>\n <td>烟煤</td>\n <td>江苏省</td>\n <td>120.096620</td>\n <td>31.942361</td>\n <td>1</td>\n <td>0.602178</td>\n <td>0.081628</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>水冷-开式循环</td>\n </tr>\n <tr>\n <th>4</th>\n <td>江苏利港电力有限公司</td>\n <td>1</td>\n <td>350.0</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>20706.00000</td>\n <td>26.43</td>\n <td>14.96</td>\n <td>烟煤</td>\n <td>江苏省</td>\n <td>120.096620</td>\n <td>31.942361</td>\n <td>1</td>\n <td>0.590254</td>\n <td>0.081103</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>水冷-开式循环</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>5689</th>\n <td>浙江浙能电力股份有限公司台州发电厂</td>\n <td>8</td>\n <td>350.0</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>21973.00000</td>\n <td>37.43</td>\n <td>17.12</td>\n <td>烟煤</td>\n <td>浙江省</td>\n <td>121.465840</td>\n <td>28.704623</td>\n <td>73</td>\n <td>0.628300</td>\n <td>0.078776</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>水冷-开式循环</td>\n </tr>\n <tr>\n <th>5690</th>\n <td>浙江浙能电力股份有限公司台州发电厂</td>\n <td>8</td>\n <td>350.0</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>21372.00000</td>\n <td>39.87</td>\n <td>18.01</td>\n <td>烟煤</td>\n <td>浙江省</td>\n <td>121.465840</td>\n <td>28.704623</td>\n <td>73</td>\n <td>0.595019</td>\n <td>0.076622</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>水冷-开式循环</td>\n </tr>\n <tr>\n <th>5691</th>\n <td>浙江浙能电力股份有限公司台州发电厂</td>\n <td>8</td>\n <td>350.0</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>20856.00000</td>\n <td>39.32</td>\n <td>19.74</td>\n <td>烟煤</td>\n <td>浙江省</td>\n <td>121.465840</td>\n <td>28.704623</td>\n <td>73</td>\n <td>0.565718</td>\n <td>0.074772</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>水冷-开式循环</td>\n </tr>\n <tr>\n <th>5692</th>\n <td>榆能榆神热电有限公司</td>\n <td>1</td>\n <td>350.0</td>\n <td>抽凝式</td>\n <td>超临界</td>\n <td>间接空冷</td>\n <td>25514.00000</td>\n <td>38.84</td>\n <td>7.28</td>\n <td>烟煤</td>\n <td>陕西省</td>\n <td>109.820265</td>\n <td>38.304383</td>\n <td>1151</td>\n <td>0.664456</td>\n <td>0.091482</td>\n <td>煤粉锅炉</td>\n <td>抽凝式</td>\n <td>空冷-间接空冷</td>\n </tr>\n <tr>\n <th>5693</th>\n <td>榆能榆神热电有限公司</td>\n <td>2</td>\n <td>350.0</td>\n <td>抽凝式</td>\n <td>超临界</td>\n <td>间接空冷</td>\n <td>25514.00000</td>\n <td>38.84</td>\n <td>7.28</td>\n <td>烟煤</td>\n <td>陕西省</td>\n <td>109.820265</td>\n <td>38.304383</td>\n <td>1151</td>\n <td>0.661759</td>\n <td>0.091483</td>\n <td>煤粉锅炉</td>\n <td>抽凝式</td>\n <td>空冷-间接空冷</td>\n </tr>\n </tbody>\n</table>\n<p>5694 rows × 19 columns</p>\n</div>"
|
||
},
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"total_data"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"outputs": [],
|
||
"source": [
|
||
"na_boiler_df = total_data[total_data['核心设备类型'].isna()].drop(columns=['核心设备类型', '汽轮机类型', '冷却方式'])\n",
|
||
"boiler_df = total_data[~total_data['核心设备类型'].isna()].copy()\n",
|
||
"na_boiler = total_data[total_data['核心设备类型'].isna()]['企业名称'].unique()"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"outputs": [],
|
||
"source": [
|
||
"na_boiler_df = na_boiler_df.merge(unit_data[['企业名称', '核心设备类型']], how='left', on=['企业名称'])"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"outputs": [],
|
||
"source": [
|
||
"total_data = pd.concat([boiler_df, na_boiler_df], axis=0).drop_duplicates()"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": "煤粉锅炉 5428\nW火焰炉 151\n循环流化床锅炉 4\nName: 核心设备类型, dtype: int64"
|
||
},
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"total_data['核心设备类型'].value_counts()"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"num_cols = ['铭牌容量 (MW)', '入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', 'longitude', 'latitude', 'altitude', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)']\n",
|
||
"# object_cols = ['所处地区', '类型', '机组参数', '冷却型式']\n",
|
||
"# object_cols = ['所处地区', '汽轮机类型', '参数分类', '冷凝器型式', '核心设备类型']\n",
|
||
"object_cols = ['所处地区', '机组类型', '参数分类', '冷凝器型式']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"def change_str(x):\n",
|
||
" if pd.isna(x):\n",
|
||
" return x\n",
|
||
" if '空冷' in x:\n",
|
||
" return '空冷'\n",
|
||
" if '水冷' in x:\n",
|
||
" return '水冷'\n",
|
||
" return x"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"# total_data['冷凝器型式'] = total_data['冷凝器型式'].apply(change_str)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"total_data = total_data[total_data['发电碳排放因子(kg/kWh)'] <= 0.9].copy()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": "['所处地区',\n '机组类型',\n '参数分类',\n '冷凝器型式',\n '铭牌容量 (MW)',\n '入炉煤低位热值(kJ/kg)',\n '燃煤挥发份Var(%)',\n '燃煤灰份Aar(%)',\n 'longitude',\n 'latitude',\n 'altitude']"
|
||
},
|
||
"execution_count": 17,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"use_cols = object_cols + [x for x in num_cols if '因子' not in x]\n",
|
||
"use_cols"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"total_data = total_data[~total_data['供热碳排放因子(kg/MJ)'].isna()].copy()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": "(5685, 19)"
|
||
},
|
||
"execution_count": 19,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"total_data.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": "(1060, 17)"
|
||
},
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"total_data.groupby(['企业名称', '机组编号']).count().shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"total_data['入炉煤低位热值(kJ/kg)'] = total_data['入炉煤低位热值(kJ/kg)'].apply(lambda x: x * 1000 if x < 100 else x * 1)\n",
|
||
"total_data['燃煤灰份Aar(%)'] = total_data['燃煤灰份Aar(%)'].apply(lambda x: x / 1000 if x > 10000 else x * 1)\n",
|
||
"total_data['燃煤挥发份Var(%)'] = total_data['燃煤挥发份Var(%)'].apply(lambda x: x / 1000 if x > 10000 else x * 1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"total_data.altitude = total_data.altitude.apply(lambda x: 0 if x < 0 else x)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": "(5041, 19)"
|
||
},
|
||
"execution_count": 23,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"use_data = total_data[(total_data['供热碳排放因子(kg/MJ)'] > 0.01)&(total_data['供热碳排放因子(kg/MJ)'] < 0.1)].dropna()\n",
|
||
"use_data.shape"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import seaborn as sns"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": "count 5041.000000\nmean 0.070824\nstd 0.009937\nmin 0.010464\n25% 0.065431\n50% 0.071466\n75% 0.077387\nmax 0.099905\nName: 供热碳排放因子(kg/MJ), dtype: float64"
|
||
},
|
||
"execution_count": 25,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"use_data['供热碳排放因子(kg/MJ)'].describe()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.\n",
|
||
" \"\"\"Entry point for launching an IPython kernel.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"train_data = use_data.groupby(use_cols)['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'].mean().reset_index()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) \\\n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 20209.00 25.94 \n1 上海市 凝气式 亚临界 水冷-开式循环 300.0 20785.00 25.97 \n2 上海市 凝气式 亚临界 水冷-开式循环 300.0 20796.00 26.00 \n3 上海市 凝气式 亚临界 水冷-开式循环 300.0 21762.00 27.01 \n4 上海市 凝气式 亚临界 水冷-开式循环 320.0 15829.32 30.85 \n... ... ... ... ... ... ... ... \n3789 黑龙江省 纯凝式 超高压 水冷 200.0 15941.21 23.83 \n3790 黑龙江省 纯凝式 超高压 水冷 210.0 15355.00 42.00 \n3791 黑龙江省 背压式 超高压 水冷-开式循环 200.0 13396.00 23.39 \n3792 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 15753.00 36.29 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 16471.11 30.10 \n\n 燃煤灰份Aar(%) longitude latitude altitude 发电碳排放因子(kg/kWh) \\\n0 15.34 121.471140 31.065113 3 0.623923 \n1 17.03 121.471140 31.065113 3 0.639474 \n2 13.00 121.471140 31.065113 3 0.635351 \n3 13.35 121.471140 31.065113 3 0.674456 \n4 4.77 121.601480 31.358794 2 0.506816 \n... ... ... ... ... ... \n3789 14.73 126.575647 45.918566 118 0.500172 \n3790 36.70 131.695864 46.580444 91 0.518301 \n3791 15.66 123.639146 47.210696 151 0.224312 \n3792 42.40 129.604803 44.608202 250 0.290814 \n3793 38.67 129.604803 44.608202 250 0.321635 \n\n 供热碳排放因子(kg/MJ) \n0 0.078064 \n1 0.079308 \n2 0.078691 \n3 0.085853 \n4 0.060934 \n... ... \n3789 0.064200 \n3790 0.063249 \n3791 0.053770 \n3792 0.068027 \n3793 0.067798 \n\n[3794 rows x 13 columns]",
|
||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>所处地区</th>\n <th>机组类型</th>\n <th>参数分类</th>\n <th>冷凝器型式</th>\n <th>铭牌容量 (MW)</th>\n <th>入炉煤低位热值(kJ/kg)</th>\n <th>燃煤挥发份Var(%)</th>\n <th>燃煤灰份Aar(%)</th>\n <th>longitude</th>\n <th>latitude</th>\n <th>altitude</th>\n <th>发电碳排放因子(kg/kWh)</th>\n <th>供热碳排放因子(kg/MJ)</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>300.0</td>\n <td>20209.00</td>\n <td>25.94</td>\n <td>15.34</td>\n <td>121.471140</td>\n <td>31.065113</td>\n <td>3</td>\n <td>0.623923</td>\n <td>0.078064</td>\n </tr>\n <tr>\n <th>1</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>300.0</td>\n <td>20785.00</td>\n <td>25.97</td>\n <td>17.03</td>\n <td>121.471140</td>\n <td>31.065113</td>\n <td>3</td>\n <td>0.639474</td>\n <td>0.079308</td>\n </tr>\n <tr>\n <th>2</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>300.0</td>\n <td>20796.00</td>\n <td>26.00</td>\n <td>13.00</td>\n <td>121.471140</td>\n <td>31.065113</td>\n <td>3</td>\n <td>0.635351</td>\n <td>0.078691</td>\n </tr>\n <tr>\n <th>3</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>300.0</td>\n <td>21762.00</td>\n <td>27.01</td>\n <td>13.35</td>\n <td>121.471140</td>\n <td>31.065113</td>\n <td>3</td>\n <td>0.674456</td>\n <td>0.085853</td>\n </tr>\n <tr>\n <th>4</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>320.0</td>\n <td>15829.32</td>\n <td>30.85</td>\n <td>4.77</td>\n <td>121.601480</td>\n <td>31.358794</td>\n <td>2</td>\n <td>0.506816</td>\n <td>0.060934</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>3789</th>\n <td>黑龙江省</td>\n <td>纯凝式</td>\n <td>超高压</td>\n <td>水冷</td>\n <td>200.0</td>\n <td>15941.21</td>\n <td>23.83</td>\n <td>14.73</td>\n <td>126.575647</td>\n <td>45.918566</td>\n <td>118</td>\n <td>0.500172</td>\n <td>0.064200</td>\n </tr>\n <tr>\n <th>3790</th>\n <td>黑龙江省</td>\n <td>纯凝式</td>\n <td>超高压</td>\n <td>水冷</td>\n <td>210.0</td>\n <td>15355.00</td>\n <td>42.00</td>\n <td>36.70</td>\n <td>131.695864</td>\n <td>46.580444</td>\n <td>91</td>\n <td>0.518301</td>\n <td>0.063249</td>\n </tr>\n <tr>\n <th>3791</th>\n <td>黑龙江省</td>\n <td>背压式</td>\n <td>超高压</td>\n <td>水冷-开式循环</td>\n <td>200.0</td>\n <td>13396.00</td>\n <td>23.39</td>\n <td>15.66</td>\n <td>123.639146</td>\n <td>47.210696</td>\n <td>151</td>\n <td>0.224312</td>\n <td>0.053770</td>\n </tr>\n <tr>\n <th>3792</th>\n <td>黑龙江省</td>\n <td>背压式</td>\n <td>超高压</td>\n <td>水冷-闭式循环</td>\n <td>215.0</td>\n <td>15753.00</td>\n <td>36.29</td>\n <td>42.40</td>\n <td>129.604803</td>\n <td>44.608202</td>\n <td>250</td>\n <td>0.290814</td>\n <td>0.068027</td>\n </tr>\n <tr>\n <th>3793</th>\n <td>黑龙江省</td>\n <td>背压式</td>\n <td>超高压</td>\n <td>水冷-闭式循环</td>\n <td>215.0</td>\n <td>16471.11</td>\n <td>30.10</td>\n <td>38.67</td>\n <td>129.604803</td>\n <td>44.608202</td>\n <td>250</td>\n <td>0.321635</td>\n <td>0.067798</td>\n </tr>\n </tbody>\n</table>\n<p>3794 rows × 13 columns</p>\n</div>"
|
||
},
|
||
"execution_count": 27,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"train_data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"for col in num_cols:\n",
|
||
" if '因子' not in col:\n",
|
||
" train_data[col] = np.log1p(train_data[col])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"train_data = train_data[train_data['供热碳排放因子(kg/MJ)']<=0.1].copy()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"train_data = pd.get_dummies(train_data, columns=object_cols).dropna()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"for col in train_data.columns:\n",
|
||
" train_data[col] = train_data[col].astype(float)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"feature_cols = [x for x in train_data.columns if '因子' not in x and '其他' not in x]\n",
|
||
"target_cols = [x for x in train_data.columns if '因子' in x]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"train_data.to_csv('./train_data_processed.csv', encoding='utf-8-sig', index=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 34,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"train, test = train_test_split(train_data.dropna(), test_size=0.1, shuffle=True, random_state=42)\n",
|
||
"train, valid = train_test_split(train, test_size=0.1, shuffle=True, random_state=42)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 35,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"train_X, train_y = train[feature_cols], train[target_cols]\n",
|
||
"valid_X, valid_y = valid[feature_cols], valid[target_cols]\n",
|
||
"test_X, test_y = test[feature_cols], test[target_cols]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 36,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.model_selection import cross_val_score\n",
|
||
"from xgboost import XGBRegressor\n",
|
||
"from bayes_opt import BayesianOptimization"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"### 供电建模"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"params_xgb = {'objective': 'reg:squarederror',\n",
|
||
" 'booster': 'gbtree',\n",
|
||
" 'eta': 0.01,\n",
|
||
" 'max_depth': 60,\n",
|
||
" 'subsample': 0.8,\n",
|
||
" 'colsample_bytree': 0.9,\n",
|
||
" 'min_child_weight': 60,\n",
|
||
" 'seed': 42}\n",
|
||
"\n",
|
||
"num_boost_round = 2000"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"outputs": [],
|
||
"source": [
|
||
"\n",
|
||
"dtrain = xgb.DMatrix(train_X, train_y.values[:, 0])\n",
|
||
"dvalid = xgb.DMatrix(valid_X, valid_y.values[:, 0])\n",
|
||
"watchlist = [(dtrain, 'train'), (dvalid, 'eval')]\n",
|
||
"\n",
|
||
"gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n",
|
||
" early_stopping_rounds=200, verbose_eval=False)\n"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_pred_xgb = gb_model.predict(xgb.DMatrix(test_X))\n",
|
||
"y_true_xgb = test_y.values[:, 0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 40,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"MSE: 9.9E-04\n",
|
||
"RMSE: 0.0315\n",
|
||
"MAE: 0.0146\n",
|
||
"MAPE: 4.39 %\n",
|
||
"R_2: 0.83\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"MSE = mean_squared_error(y_true_xgb, y_pred_xgb)\n",
|
||
"RMSE = np.sqrt(mean_squared_error(y_true_xgb, y_pred_xgb))\n",
|
||
"MAE = mean_absolute_error(y_true_xgb, y_pred_xgb)\n",
|
||
"MAPE = mean_absolute_percentage_error(y_true_xgb, y_pred_xgb)\n",
|
||
"R_2 = r2_score(y_true_xgb, y_pred_xgb)\n",
|
||
"print('MSE:', format(MSE, '.1E'))\n",
|
||
"print('RMSE:', round(RMSE, 4))\n",
|
||
"print('MAE:', round(MAE, 4))\n",
|
||
"print('MAPE:', round(MAPE*100, 2), '%')\n",
|
||
"print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差a"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.model_selection import KFold"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"outputs": [],
|
||
"source": [
|
||
"kf = KFold(n_splits=10, shuffle=True, random_state=666)"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 43,
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"MSE: 3.5E-04, RMSE: 0.0188, MAE: 0.0126, MAPE: 2.6 %, R_2: 0.9346\n",
|
||
"MSE: 9.5E-04, RMSE: 0.0308, MAE: 0.0142, MAPE: 4.28 %, R_2: 0.8446\n",
|
||
"MSE: 9.9E-04, RMSE: 0.0314, MAE: 0.0139, MAPE: 4.29 %, R_2: 0.8507\n",
|
||
"MSE: 5.0E-04, RMSE: 0.0225, MAE: 0.0126, MAPE: 2.53 %, R_2: 0.9118\n",
|
||
"MSE: 9.9E-04, RMSE: 0.0314, MAE: 0.0143, MAPE: 4.45 %, R_2: 0.8383\n",
|
||
"MSE: 3.6E-04, RMSE: 0.0191, MAE: 0.0127, MAPE: 2.57 %, R_2: 0.9298\n",
|
||
"MSE: 5.3E-04, RMSE: 0.023, MAE: 0.0143, MAPE: 3.13 %, R_2: 0.9112\n",
|
||
"MSE: 5.1E-04, RMSE: 0.0226, MAE: 0.0138, MAPE: 2.84 %, R_2: 0.9092\n",
|
||
"MSE: 3.5E-04, RMSE: 0.0187, MAE: 0.0128, MAPE: 2.63 %, R_2: 0.9371\n",
|
||
"MSE: 1.3E-03, RMSE: 0.0361, MAE: 0.015, MAPE: 6.76 %, R_2: 0.8045\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"eva_list = list()\n",
|
||
"for (train_index, test_index) in kf.split(train_data):\n",
|
||
" train = train_data.loc[train_index]\n",
|
||
" test = train_data.loc[test_index]\n",
|
||
" train, valid = train_test_split(train, test_size=0.11, random_state=666)\n",
|
||
" X_train, Y_train = train[feature_cols], train['发电碳排放因子(kg/kWh)']\n",
|
||
" X_valid, Y_valid = valid[feature_cols], valid['发电碳排放因子(kg/kWh)']\n",
|
||
" X_test, Y_test = valid[feature_cols], valid['发电碳排放因子(kg/kWh)']\n",
|
||
" dtrain = xgb.DMatrix(X_train, Y_train)\n",
|
||
" dvalid = xgb.DMatrix(X_valid, Y_valid)\n",
|
||
" watchlist = [(dvalid, 'eval')]\n",
|
||
" gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n",
|
||
" early_stopping_rounds=100, verbose_eval=False)\n",
|
||
" y_pred = gb_model.predict(xgb.DMatrix(X_test))\n",
|
||
" y_true = Y_test.values\n",
|
||
" MSE = mean_squared_error(y_true, y_pred)\n",
|
||
" RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n",
|
||
" MAE = mean_absolute_error(y_true, y_pred)\n",
|
||
" MAPE = mean_absolute_percentage_error(y_true, y_pred)\n",
|
||
" R_2 = r2_score(y_true, y_pred)\n",
|
||
" print('MSE:', format(MSE, '.1E'), end=', ')\n",
|
||
" print('RMSE:', round(RMSE, 4), end=', ')\n",
|
||
" print('MAE:', round(MAE, 4), end=', ')\n",
|
||
" print('MAPE:', round(MAPE*100, 2), '%', end=', ')\n",
|
||
" print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差\n",
|
||
" eva_list.append([MSE, RMSE, MAE, MAPE, R_2])\n",
|
||
" if R_2 > 0.94:\n",
|
||
" break"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 44,
|
||
"outputs": [],
|
||
"source": [
|
||
"test_X['power_pred'] = y_pred_xgb\n",
|
||
"test_X['power_real'] = y_true_xgb\n",
|
||
"test_X['error_rate'] = abs(test_X.power_pred - test_X.power_real) / test_X.power_real"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 45,
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n",
|
||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||
"\n",
|
||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||
" \"\"\"Entry point for launching an IPython kernel.\n",
|
||
"D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n",
|
||
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
|
||
"Try using .loc[row_indexer,col_indexer] = value instead\n",
|
||
"\n",
|
||
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
|
||
" \n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"X_test['power_pred'] = y_pred\n",
|
||
"X_test['power_real'] = y_true\n",
|
||
"X_test['error_rate'] = abs(X_test.power_pred - X_test.power_real) / X_test.power_real"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 46,
|
||
"outputs": [],
|
||
"source": [
|
||
"test_data = X_test.copy()"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 47,
|
||
"outputs": [],
|
||
"source": [
|
||
"for col in num_cols:\n",
|
||
" if '因子' not in col:\n",
|
||
" test_data[col] = np.expm1(test_data[col])"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 48,
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": " 铭牌容量 (MW) 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%) longitude \\\n2132 300.0 17602.00 23.95 32.830 118.211355 \n2424 350.0 15525.82 19.72 25.540 118.075445 \n1626 330.0 17997.00 33.00 35.000 82.892729 \n319 350.0 14187.00 25.53 16.710 125.579363 \n891 350.0 19279.92 35.49 27.340 115.784650 \n2234 350.0 21697.63 26.43 12.710 116.860260 \n2304 300.0 18611.00 15.26 29.750 115.497149 \n420 350.0 14445.00 45.39 20.600 125.162487 \n1039 330.0 21095.00 25.80 20.410 119.276289 \n3090 165.0 18990.00 33.58 23.540 103.624731 \n485 300.0 20068.81 25.65 15.040 117.059925 \n3714 600.0 16567.18 34.02 38.640 131.063724 \n2076 350.0 20141.10 42.64 15.990 115.113369 \n3304 600.0 12950.00 48.22 25.040 124.150700 \n3744 350.0 15235.27 23.23 25.780 128.768082 \n2572 300.0 19965.73 10.79 29.130 114.283788 \n3423 600.0 17981.89 26.88 17.700 122.123524 \n3770 600.0 15941.38 23.83 13.750 126.575647 \n1422 330.0 18283.00 13.23 34.680 112.761299 \n3660 300.0 16031.00 21.96 39.790 130.397051 \n2439 600.0 19736.25 9.21 29.210 114.437782 \n3365 300.0 13306.62 23.55 23.090 121.228525 \n1211 140.0 20919.00 19.29 26.120 115.920941 \n2576 300.0 21052.00 10.74 31.360 114.283788 \n3395 350.0 13278.00 47.67 20.720 123.821910 \n1732 200.0 17620.64 19.25 14.380 87.660577 \n1244 150.0 20030.49 26.13 27.880 117.142424 \n1227 145.0 19292.66 19.44 26.570 117.421027 \n1213 140.0 21160.00 24.26 20.310 118.335347 \n2224 350.0 21061.75 26.06 14.167 116.860260 \n3381 300.0 14582.00 28.34 23.200 124.330739 \n1195 140.0 19731.00 22.23 26.550 118.335347 \n2366 330.0 20310.24 15.53 32.640 114.703987 \n1453 220.0 20434.00 26.70 27.410 113.232289 \n1624 330.0 17470.00 36.17 27.620 82.892729 \n3383 300.0 15736.00 43.85 37.540 123.817380 \n1128 350.0 20403.00 36.51 25.690 117.149304 \n2636 350.0 18193.00 16.96 31.720 112.409429 \n2992 350.0 23253.68 23.72 18.450 113.672684 \n881 165.0 20822.00 39.57 24.600 118.128354 \n2644 350.0 19871.76 21.06 30.780 113.875986 \n2627 330.0 20682.00 11.52 28.850 113.866062 \n3666 330.0 14813.00 43.74 14.460 124.613843 \n2215 315.0 21691.59 23.80 11.900 116.860260 \n1717 350.0 22600.03 28.67 18.680 76.054876 \n2686 660.0 17624.15 31.79 32.040 115.270887 \n3228 300.0 13075.00 28.45 27.010 123.943182 \n2311 300.0 19779.00 17.49 31.090 114.525863 \n1064 330.0 22054.00 20.08 20.800 117.103149 \n1710 350.0 20519.75 24.23 12.710 76.054876 \n1651 330.0 17913.00 35.08 22.070 87.703630 \n3391 300.0 13874.00 24.01 20.430 121.228525 \n3793 215.0 16471.11 30.10 38.670 129.604803 \n1022 330.0 20634.77 24.66 25.390 119.276289 \n343 670.0 14109.00 45.20 13.640 125.941747 \n\n latitude altitude 所处地区_上海市 所处地区_云南省 所处地区_内蒙古 ... 参数分类_高压 \\\n2132 39.655509 26.0 0.0 0.0 0.0 ... 0.0 \n2424 40.812210 338.0 0.0 0.0 0.0 ... 0.0 \n1626 41.741365 1066.0 0.0 0.0 0.0 ... 0.0 \n319 43.657507 208.0 0.0 0.0 0.0 ... 0.0 \n891 36.881948 33.0 0.0 0.0 0.0 ... 0.0 \n2234 39.953617 27.0 0.0 0.0 0.0 ... 0.0 \n2304 38.802049 17.0 0.0 0.0 0.0 ... 0.0 \n420 43.784873 222.0 0.0 0.0 0.0 ... 0.0 \n1039 36.668747 75.0 0.0 0.0 0.0 ... 0.0 \n3090 36.134735 1545.0 0.0 0.0 0.0 ... 0.0 \n485 39.157647 8.0 0.0 0.0 0.0 ... 0.0 \n3714 45.766399 207.0 0.0 0.0 0.0 ... 0.0 \n2076 25.926232 102.0 0.0 0.0 0.0 ... 0.0 \n3304 42.540258 103.0 0.0 0.0 0.0 ... 0.0 \n3744 47.746953 240.0 0.0 0.0 0.0 ... 0.0 \n2572 36.128262 83.0 0.0 0.0 0.0 ... 0.0 \n3423 40.311935 2.0 0.0 0.0 0.0 ... 0.0 \n3770 45.918566 118.0 0.0 0.0 0.0 ... 0.0 \n1422 37.634620 849.0 0.0 0.0 0.0 ... 0.0 \n3660 46.805507 80.0 0.0 0.0 0.0 ... 0.0 \n2439 38.038867 76.0 0.0 0.0 0.0 ... 0.0 \n3365 41.143879 44.0 0.0 0.0 0.0 ... 0.0 \n1211 36.466442 30.0 0.0 0.0 0.0 ... 0.0 \n2576 36.128262 83.0 0.0 0.0 0.0 ... 0.0 \n3395 41.354877 147.0 0.0 0.0 0.0 ... 0.0 \n1732 43.750058 1010.0 0.0 0.0 0.0 ... 0.0 \n1244 35.075862 65.0 0.0 0.0 0.0 ... 0.0 \n1227 36.738368 57.0 0.0 0.0 0.0 ... 0.0 \n1213 35.017881 65.0 0.0 0.0 0.0 ... 0.0 \n2224 39.953617 27.0 0.0 0.0 0.0 ... 0.0 \n3381 40.115662 135.0 0.0 0.0 0.0 ... 0.0 \n1195 35.017881 65.0 0.0 0.0 0.0 ... 0.0 \n2366 38.014364 57.0 0.0 0.0 0.0 ... 0.0 \n1453 40.067556 1061.0 0.0 0.0 0.0 ... 0.0 \n1624 41.741365 1066.0 0.0 0.0 0.0 ... 0.0 \n3383 42.347201 98.0 0.0 0.0 0.0 ... 0.0 \n1128 36.084927 123.0 0.0 0.0 0.0 ... 0.0 \n2636 34.584441 160.0 0.0 0.0 0.0 ... 0.0 \n2992 30.918494 34.0 0.0 0.0 0.0 ... 0.0 \n881 37.694642 8.0 0.0 0.0 0.0 ... 0.0 \n2644 36.116424 294.0 0.0 0.0 0.0 ... 0.0 \n2627 35.248375 72.0 0.0 0.0 0.0 ... 0.0 \n3666 46.144809 154.0 0.0 0.0 0.0 ... 0.0 \n2215 39.953617 27.0 0.0 0.0 0.0 ... 0.0 \n1717 39.484097 1288.0 0.0 0.0 0.0 ... 0.0 \n2686 35.775540 48.0 0.0 0.0 0.0 ... 0.0 \n3228 41.899725 124.0 0.0 0.0 0.0 ... 0.0 \n2311 37.959933 63.0 0.0 0.0 0.0 ... 0.0 \n1064 36.718761 27.0 0.0 0.0 0.0 ... 0.0 \n1710 39.484097 1288.0 0.0 0.0 0.0 ... 0.0 \n1651 43.909559 724.0 0.0 0.0 0.0 ... 0.0 \n3391 41.143879 44.0 0.0 0.0 0.0 ... 0.0 \n3793 44.608202 250.0 0.0 0.0 0.0 ... 0.0 \n1022 36.668747 75.0 0.0 0.0 0.0 ... 0.0 \n343 44.106509 199.0 0.0 0.0 0.0 ... 0.0 \n\n 冷凝器型式_水冷 冷凝器型式_水冷-开式循环 冷凝器型式_水冷-闭式循环 冷凝器型式_直接空冷 冷凝器型式_空冷 \\\n2132 1.0 0.0 0.0 0.0 0.0 \n2424 0.0 0.0 1.0 0.0 0.0 \n1626 0.0 0.0 0.0 0.0 0.0 \n319 1.0 0.0 0.0 0.0 0.0 \n891 0.0 1.0 0.0 0.0 0.0 \n2234 0.0 0.0 1.0 0.0 0.0 \n2304 0.0 0.0 1.0 0.0 0.0 \n420 0.0 0.0 1.0 0.0 0.0 \n1039 0.0 0.0 1.0 0.0 0.0 \n3090 0.0 0.0 1.0 0.0 0.0 \n485 0.0 0.0 1.0 0.0 0.0 \n3714 0.0 0.0 1.0 0.0 0.0 \n2076 0.0 0.0 1.0 0.0 0.0 \n3304 0.0 0.0 1.0 0.0 0.0 \n3744 0.0 0.0 1.0 0.0 0.0 \n2572 0.0 0.0 1.0 0.0 0.0 \n3423 0.0 1.0 0.0 0.0 0.0 \n3770 1.0 0.0 0.0 0.0 0.0 \n1422 0.0 0.0 0.0 1.0 0.0 \n3660 1.0 0.0 0.0 0.0 0.0 \n2439 0.0 0.0 0.0 1.0 0.0 \n3365 0.0 0.0 1.0 0.0 0.0 \n1211 0.0 0.0 1.0 0.0 0.0 \n2576 0.0 0.0 1.0 0.0 0.0 \n3395 0.0 0.0 1.0 0.0 0.0 \n1732 0.0 0.0 1.0 0.0 0.0 \n1244 0.0 0.0 1.0 0.0 0.0 \n1227 0.0 0.0 1.0 0.0 0.0 \n1213 0.0 0.0 1.0 0.0 0.0 \n2224 0.0 0.0 1.0 0.0 0.0 \n3381 0.0 0.0 1.0 0.0 0.0 \n1195 0.0 0.0 1.0 0.0 0.0 \n2366 0.0 0.0 1.0 0.0 0.0 \n1453 0.0 0.0 0.0 1.0 0.0 \n1624 0.0 0.0 0.0 0.0 0.0 \n3383 0.0 0.0 1.0 0.0 0.0 \n1128 0.0 0.0 1.0 0.0 0.0 \n2636 0.0 0.0 1.0 0.0 0.0 \n2992 0.0 0.0 1.0 0.0 0.0 \n881 1.0 0.0 0.0 0.0 0.0 \n2644 0.0 0.0 1.0 0.0 0.0 \n2627 0.0 0.0 1.0 0.0 0.0 \n3666 0.0 1.0 0.0 0.0 0.0 \n2215 0.0 0.0 1.0 0.0 0.0 \n1717 0.0 0.0 0.0 0.0 0.0 \n2686 0.0 0.0 1.0 0.0 0.0 \n3228 1.0 0.0 0.0 0.0 0.0 \n2311 0.0 0.0 1.0 0.0 0.0 \n1064 0.0 0.0 1.0 0.0 0.0 \n1710 0.0 0.0 0.0 0.0 0.0 \n1651 0.0 0.0 1.0 0.0 0.0 \n3391 0.0 0.0 1.0 0.0 0.0 \n3793 0.0 0.0 1.0 0.0 0.0 \n1022 0.0 0.0 1.0 0.0 0.0 \n343 0.0 0.0 1.0 0.0 0.0 \n\n 冷凝器型式_间接空冷 power_pred power_real error_rate \n2132 0.0 0.438344 0.461568 0.050316 \n2424 0.0 0.437482 0.460870 0.050747 \n1626 1.0 0.509937 0.537416 0.051130 \n319 0.0 0.362236 0.344575 0.051256 \n891 0.0 0.478009 0.454326 0.052129 \n2234 0.0 0.579227 0.549844 0.053440 \n2304 0.0 0.488333 0.515910 0.053453 \n420 0.0 0.376880 0.357277 0.054869 \n1039 0.0 0.562676 0.595438 0.055023 \n3090 0.0 0.516500 0.489478 0.055205 \n485 0.0 0.515064 0.487882 0.055715 \n3714 0.0 0.489738 0.518801 0.056019 \n2076 0.0 0.536181 0.568241 0.056419 \n3304 0.0 0.380260 0.359795 0.056878 \n3744 0.0 0.413687 0.389536 0.061999 \n2572 0.0 0.484924 0.455817 0.063858 \n3423 0.0 0.464855 0.436654 0.064586 \n3770 0.0 0.444540 0.475513 0.065135 \n1422 0.0 0.478954 0.513112 0.066570 \n3660 0.0 0.420857 0.394264 0.067449 \n2439 0.0 0.552083 0.517015 0.067829 \n3365 0.0 0.365339 0.392430 0.069034 \n1211 0.0 0.525372 0.565201 0.070470 \n2576 0.0 0.535750 0.576367 0.070472 \n3395 0.0 0.362074 0.338230 0.070499 \n1732 0.0 0.466749 0.435941 0.070672 \n1244 0.0 0.519800 0.559412 0.070810 \n1227 0.0 0.471088 0.439672 0.071453 \n1213 0.0 0.584875 0.630051 0.071702 \n2224 0.0 0.547243 0.510040 0.072941 \n3381 0.0 0.378266 0.410316 0.078112 \n1195 0.0 0.518649 0.480565 0.079248 \n2366 0.0 0.505584 0.551764 0.083696 \n1453 0.0 0.536567 0.493686 0.086859 \n1624 1.0 0.474544 0.525190 0.096434 \n3383 0.0 0.428344 0.475805 0.099748 \n1128 0.0 0.484347 0.541039 0.104785 \n2636 0.0 0.465685 0.420098 0.108516 \n2992 0.0 0.590282 0.663618 0.110509 \n881 0.0 0.561782 0.633226 0.112826 \n2644 0.0 0.521570 0.467694 0.115195 \n2627 0.0 0.563966 0.637925 0.115937 \n3666 0.0 0.403054 0.456362 0.116811 \n2215 0.0 0.549092 0.490824 0.118715 \n1717 1.0 0.628243 0.560538 0.120785 \n2686 0.0 0.428897 0.381988 0.122801 \n3228 0.0 0.356203 0.317093 0.123338 \n2311 0.0 0.505898 0.449974 0.124284 \n1064 0.0 0.549820 0.628280 0.124881 \n1710 1.0 0.516507 0.434781 0.187970 \n1651 0.0 0.471082 0.395266 0.191812 \n3391 0.0 0.378657 0.470581 0.195341 \n3793 0.0 0.428770 0.321635 0.333093 \n1022 0.0 0.530251 0.073112 6.252559 \n343 0.0 0.398257 0.038802 9.263788 \n\n[55 rows x 66 columns]",
|
||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>铭牌容量 (MW)</th>\n <th>入炉煤低位热值(kJ/kg)</th>\n <th>燃煤挥发份Var(%)</th>\n <th>燃煤灰份Aar(%)</th>\n <th>longitude</th>\n <th>latitude</th>\n <th>altitude</th>\n <th>所处地区_上海市</th>\n <th>所处地区_云南省</th>\n <th>所处地区_内蒙古</th>\n <th>...</th>\n <th>参数分类_高压</th>\n <th>冷凝器型式_水冷</th>\n <th>冷凝器型式_水冷-开式循环</th>\n <th>冷凝器型式_水冷-闭式循环</th>\n <th>冷凝器型式_直接空冷</th>\n <th>冷凝器型式_空冷</th>\n <th>冷凝器型式_间接空冷</th>\n <th>power_pred</th>\n <th>power_real</th>\n <th>error_rate</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>2132</th>\n <td>300.0</td>\n <td>17602.00</td>\n <td>23.95</td>\n <td>32.830</td>\n <td>118.211355</td>\n <td>39.655509</td>\n <td>26.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.438344</td>\n <td>0.461568</td>\n <td>0.050316</td>\n </tr>\n <tr>\n <th>2424</th>\n <td>350.0</td>\n <td>15525.82</td>\n <td>19.72</td>\n <td>25.540</td>\n <td>118.075445</td>\n <td>40.812210</td>\n <td>338.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.437482</td>\n <td>0.460870</td>\n <td>0.050747</td>\n </tr>\n <tr>\n <th>1626</th>\n <td>330.0</td>\n <td>17997.00</td>\n <td>33.00</td>\n <td>35.000</td>\n <td>82.892729</td>\n <td>41.741365</td>\n <td>1066.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.509937</td>\n <td>0.537416</td>\n <td>0.051130</td>\n </tr>\n <tr>\n <th>319</th>\n <td>350.0</td>\n <td>14187.00</td>\n <td>25.53</td>\n <td>16.710</td>\n <td>125.579363</td>\n <td>43.657507</td>\n <td>208.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.362236</td>\n <td>0.344575</td>\n <td>0.051256</td>\n </tr>\n <tr>\n <th>891</th>\n <td>350.0</td>\n <td>19279.92</td>\n <td>35.49</td>\n <td>27.340</td>\n <td>115.784650</td>\n <td>36.881948</td>\n <td>33.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.478009</td>\n <td>0.454326</td>\n <td>0.052129</td>\n </tr>\n <tr>\n <th>2234</th>\n <td>350.0</td>\n <td>21697.63</td>\n <td>26.43</td>\n <td>12.710</td>\n <td>116.860260</td>\n <td>39.953617</td>\n <td>27.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.579227</td>\n <td>0.549844</td>\n <td>0.053440</td>\n </tr>\n <tr>\n <th>2304</th>\n <td>300.0</td>\n <td>18611.00</td>\n <td>15.26</td>\n <td>29.750</td>\n <td>115.497149</td>\n <td>38.802049</td>\n <td>17.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.488333</td>\n <td>0.515910</td>\n <td>0.053453</td>\n </tr>\n <tr>\n <th>420</th>\n <td>350.0</td>\n <td>14445.00</td>\n <td>45.39</td>\n <td>20.600</td>\n <td>125.162487</td>\n <td>43.784873</td>\n <td>222.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.376880</td>\n <td>0.357277</td>\n <td>0.054869</td>\n </tr>\n <tr>\n <th>1039</th>\n <td>330.0</td>\n <td>21095.00</td>\n <td>25.80</td>\n <td>20.410</td>\n <td>119.276289</td>\n <td>36.668747</td>\n <td>75.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.562676</td>\n <td>0.595438</td>\n <td>0.055023</td>\n </tr>\n <tr>\n <th>3090</th>\n <td>165.0</td>\n <td>18990.00</td>\n <td>33.58</td>\n <td>23.540</td>\n <td>103.624731</td>\n <td>36.134735</td>\n <td>1545.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.516500</td>\n <td>0.489478</td>\n <td>0.055205</td>\n </tr>\n <tr>\n <th>485</th>\n <td>300.0</td>\n <td>20068.81</td>\n <td>25.65</td>\n <td>15.040</td>\n <td>117.059925</td>\n <td>39.157647</td>\n <td>8.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.515064</td>\n <td>0.487882</td>\n <td>0.055715</td>\n </tr>\n <tr>\n <th>3714</th>\n <td>600.0</td>\n <td>16567.18</td>\n <td>34.02</td>\n <td>38.640</td>\n <td>131.063724</td>\n <td>45.766399</td>\n <td>207.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.489738</td>\n <td>0.518801</td>\n <td>0.056019</td>\n </tr>\n <tr>\n <th>2076</th>\n <td>350.0</td>\n <td>20141.10</td>\n <td>42.64</td>\n <td>15.990</td>\n <td>115.113369</td>\n <td>25.926232</td>\n <td>102.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.536181</td>\n <td>0.568241</td>\n <td>0.056419</td>\n </tr>\n <tr>\n <th>3304</th>\n <td>600.0</td>\n <td>12950.00</td>\n <td>48.22</td>\n <td>25.040</td>\n <td>124.150700</td>\n <td>42.540258</td>\n <td>103.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.380260</td>\n <td>0.359795</td>\n <td>0.056878</td>\n </tr>\n <tr>\n <th>3744</th>\n <td>350.0</td>\n <td>15235.27</td>\n <td>23.23</td>\n <td>25.780</td>\n <td>128.768082</td>\n <td>47.746953</td>\n <td>240.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.413687</td>\n <td>0.389536</td>\n <td>0.061999</td>\n </tr>\n <tr>\n <th>2572</th>\n <td>300.0</td>\n <td>19965.73</td>\n <td>10.79</td>\n <td>29.130</td>\n <td>114.283788</td>\n <td>36.128262</td>\n <td>83.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.484924</td>\n <td>0.455817</td>\n <td>0.063858</td>\n </tr>\n <tr>\n <th>3423</th>\n <td>600.0</td>\n <td>17981.89</td>\n <td>26.88</td>\n <td>17.700</td>\n <td>122.123524</td>\n <td>40.311935</td>\n <td>2.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.464855</td>\n <td>0.436654</td>\n <td>0.064586</td>\n </tr>\n <tr>\n <th>3770</th>\n <td>600.0</td>\n <td>15941.38</td>\n <td>23.83</td>\n <td>13.750</td>\n <td>126.575647</td>\n <td>45.918566</td>\n <td>118.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.444540</td>\n <td>0.475513</td>\n <td>0.065135</td>\n </tr>\n <tr>\n <th>1422</th>\n <td>330.0</td>\n <td>18283.00</td>\n <td>13.23</td>\n <td>34.680</td>\n <td>112.761299</td>\n <td>37.634620</td>\n <td>849.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.478954</td>\n <td>0.513112</td>\n <td>0.066570</td>\n </tr>\n <tr>\n <th>3660</th>\n <td>300.0</td>\n <td>16031.00</td>\n <td>21.96</td>\n <td>39.790</td>\n <td>130.397051</td>\n <td>46.805507</td>\n <td>80.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.420857</td>\n <td>0.394264</td>\n <td>0.067449</td>\n </tr>\n <tr>\n <th>2439</th>\n <td>600.0</td>\n <td>19736.25</td>\n <td>9.21</td>\n <td>29.210</td>\n <td>114.437782</td>\n <td>38.038867</td>\n <td>76.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.552083</td>\n <td>0.517015</td>\n <td>0.067829</td>\n </tr>\n <tr>\n <th>3365</th>\n <td>300.0</td>\n <td>13306.62</td>\n <td>23.55</td>\n <td>23.090</td>\n <td>121.228525</td>\n <td>41.143879</td>\n <td>44.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.365339</td>\n <td>0.392430</td>\n <td>0.069034</td>\n </tr>\n <tr>\n <th>1211</th>\n <td>140.0</td>\n <td>20919.00</td>\n <td>19.29</td>\n <td>26.120</td>\n <td>115.920941</td>\n <td>36.466442</td>\n <td>30.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.525372</td>\n <td>0.565201</td>\n <td>0.070470</td>\n </tr>\n <tr>\n <th>2576</th>\n <td>300.0</td>\n <td>21052.00</td>\n <td>10.74</td>\n <td>31.360</td>\n <td>114.283788</td>\n <td>36.128262</td>\n <td>83.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.535750</td>\n <td>0.576367</td>\n <td>0.070472</td>\n </tr>\n <tr>\n <th>3395</th>\n <td>350.0</td>\n <td>13278.00</td>\n <td>47.67</td>\n <td>20.720</td>\n <td>123.821910</td>\n <td>41.354877</td>\n <td>147.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.362074</td>\n <td>0.338230</td>\n <td>0.070499</td>\n </tr>\n <tr>\n <th>1732</th>\n <td>200.0</td>\n <td>17620.64</td>\n <td>19.25</td>\n <td>14.380</td>\n <td>87.660577</td>\n <td>43.750058</td>\n <td>1010.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.466749</td>\n <td>0.435941</td>\n <td>0.070672</td>\n </tr>\n <tr>\n <th>1244</th>\n <td>150.0</td>\n <td>20030.49</td>\n <td>26.13</td>\n <td>27.880</td>\n <td>117.142424</td>\n <td>35.075862</td>\n <td>65.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.519800</td>\n <td>0.559412</td>\n <td>0.070810</td>\n </tr>\n <tr>\n <th>1227</th>\n <td>145.0</td>\n <td>19292.66</td>\n <td>19.44</td>\n <td>26.570</td>\n <td>117.421027</td>\n <td>36.738368</td>\n <td>57.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.471088</td>\n <td>0.439672</td>\n <td>0.071453</td>\n </tr>\n <tr>\n <th>1213</th>\n <td>140.0</td>\n <td>21160.00</td>\n <td>24.26</td>\n <td>20.310</td>\n <td>118.335347</td>\n <td>35.017881</td>\n <td>65.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.584875</td>\n <td>0.630051</td>\n <td>0.071702</td>\n </tr>\n <tr>\n <th>2224</th>\n <td>350.0</td>\n <td>21061.75</td>\n <td>26.06</td>\n <td>14.167</td>\n <td>116.860260</td>\n <td>39.953617</td>\n <td>27.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.547243</td>\n <td>0.510040</td>\n <td>0.072941</td>\n </tr>\n <tr>\n <th>3381</th>\n <td>300.0</td>\n <td>14582.00</td>\n <td>28.34</td>\n <td>23.200</td>\n <td>124.330739</td>\n <td>40.115662</td>\n <td>135.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.378266</td>\n <td>0.410316</td>\n <td>0.078112</td>\n </tr>\n <tr>\n <th>1195</th>\n <td>140.0</td>\n <td>19731.00</td>\n <td>22.23</td>\n <td>26.550</td>\n <td>118.335347</td>\n <td>35.017881</td>\n <td>65.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.518649</td>\n <td>0.480565</td>\n <td>0.079248</td>\n </tr>\n <tr>\n <th>2366</th>\n <td>330.0</td>\n <td>20310.24</td>\n <td>15.53</td>\n <td>32.640</td>\n <td>114.703987</td>\n <td>38.014364</td>\n <td>57.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.505584</td>\n <td>0.551764</td>\n <td>0.083696</td>\n </tr>\n <tr>\n <th>1453</th>\n <td>220.0</td>\n <td>20434.00</td>\n <td>26.70</td>\n <td>27.410</td>\n <td>113.232289</td>\n <td>40.067556</td>\n <td>1061.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.536567</td>\n <td>0.493686</td>\n <td>0.086859</td>\n </tr>\n <tr>\n <th>1624</th>\n <td>330.0</td>\n <td>17470.00</td>\n <td>36.17</td>\n <td>27.620</td>\n <td>82.892729</td>\n <td>41.741365</td>\n <td>1066.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.474544</td>\n <td>0.525190</td>\n <td>0.096434</td>\n </tr>\n <tr>\n <th>3383</th>\n <td>300.0</td>\n <td>15736.00</td>\n <td>43.85</td>\n <td>37.540</td>\n <td>123.817380</td>\n <td>42.347201</td>\n <td>98.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.428344</td>\n <td>0.475805</td>\n <td>0.099748</td>\n </tr>\n <tr>\n <th>1128</th>\n <td>350.0</td>\n <td>20403.00</td>\n <td>36.51</td>\n <td>25.690</td>\n <td>117.149304</td>\n <td>36.084927</td>\n <td>123.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.484347</td>\n <td>0.541039</td>\n <td>0.104785</td>\n </tr>\n <tr>\n <th>2636</th>\n <td>350.0</td>\n <td>18193.00</td>\n <td>16.96</td>\n <td>31.720</td>\n <td>112.409429</td>\n <td>34.584441</td>\n <td>160.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.465685</td>\n <td>0.420098</td>\n <td>0.108516</td>\n </tr>\n <tr>\n <th>2992</th>\n <td>350.0</td>\n <td>23253.68</td>\n <td>23.72</td>\n <td>18.450</td>\n <td>113.672684</td>\n <td>30.918494</td>\n <td>34.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.590282</td>\n <td>0.663618</td>\n <td>0.110509</td>\n </tr>\n <tr>\n <th>881</th>\n <td>165.0</td>\n <td>20822.00</td>\n <td>39.57</td>\n <td>24.600</td>\n <td>118.128354</td>\n <td>37.694642</td>\n <td>8.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.561782</td>\n <td>0.633226</td>\n <td>0.112826</td>\n </tr>\n <tr>\n <th>2644</th>\n <td>350.0</td>\n <td>19871.76</td>\n <td>21.06</td>\n <td>30.780</td>\n <td>113.875986</td>\n <td>36.116424</td>\n <td>294.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.521570</td>\n <td>0.467694</td>\n <td>0.115195</td>\n </tr>\n <tr>\n <th>2627</th>\n <td>330.0</td>\n <td>20682.00</td>\n <td>11.52</td>\n <td>28.850</td>\n <td>113.866062</td>\n <td>35.248375</td>\n <td>72.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.563966</td>\n <td>0.637925</td>\n <td>0.115937</td>\n </tr>\n <tr>\n <th>3666</th>\n <td>330.0</td>\n <td>14813.00</td>\n <td>43.74</td>\n <td>14.460</td>\n <td>124.613843</td>\n <td>46.144809</td>\n <td>154.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.403054</td>\n <td>0.456362</td>\n <td>0.116811</td>\n </tr>\n <tr>\n <th>2215</th>\n <td>315.0</td>\n <td>21691.59</td>\n <td>23.80</td>\n <td>11.900</td>\n <td>116.860260</td>\n <td>39.953617</td>\n <td>27.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.549092</td>\n <td>0.490824</td>\n <td>0.118715</td>\n </tr>\n <tr>\n <th>1717</th>\n <td>350.0</td>\n <td>22600.03</td>\n <td>28.67</td>\n <td>18.680</td>\n <td>76.054876</td>\n <td>39.484097</td>\n <td>1288.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.628243</td>\n <td>0.560538</td>\n <td>0.120785</td>\n </tr>\n <tr>\n <th>2686</th>\n <td>660.0</td>\n <td>17624.15</td>\n <td>31.79</td>\n <td>32.040</td>\n <td>115.270887</td>\n <td>35.775540</td>\n <td>48.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.428897</td>\n <td>0.381988</td>\n <td>0.122801</td>\n </tr>\n <tr>\n <th>3228</th>\n <td>300.0</td>\n <td>13075.00</td>\n <td>28.45</td>\n <td>27.010</td>\n <td>123.943182</td>\n <td>41.899725</td>\n <td>124.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.356203</td>\n <td>0.317093</td>\n <td>0.123338</td>\n </tr>\n <tr>\n <th>2311</th>\n <td>300.0</td>\n <td>19779.00</td>\n <td>17.49</td>\n <td>31.090</td>\n <td>114.525863</td>\n <td>37.959933</td>\n <td>63.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.505898</td>\n <td>0.449974</td>\n <td>0.124284</td>\n </tr>\n <tr>\n <th>1064</th>\n <td>330.0</td>\n <td>22054.00</td>\n <td>20.08</td>\n <td>20.800</td>\n <td>117.103149</td>\n <td>36.718761</td>\n <td>27.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.549820</td>\n <td>0.628280</td>\n <td>0.124881</td>\n </tr>\n <tr>\n <th>1710</th>\n <td>350.0</td>\n <td>20519.75</td>\n <td>24.23</td>\n <td>12.710</td>\n <td>76.054876</td>\n <td>39.484097</td>\n <td>1288.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.516507</td>\n <td>0.434781</td>\n <td>0.187970</td>\n </tr>\n <tr>\n <th>1651</th>\n <td>330.0</td>\n <td>17913.00</td>\n <td>35.08</td>\n <td>22.070</td>\n <td>87.703630</td>\n <td>43.909559</td>\n <td>724.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.471082</td>\n <td>0.395266</td>\n <td>0.191812</td>\n </tr>\n <tr>\n <th>3391</th>\n <td>300.0</td>\n <td>13874.00</td>\n <td>24.01</td>\n <td>20.430</td>\n <td>121.228525</td>\n <td>41.143879</td>\n <td>44.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.378657</td>\n <td>0.470581</td>\n <td>0.195341</td>\n </tr>\n <tr>\n <th>3793</th>\n <td>215.0</td>\n <td>16471.11</td>\n <td>30.10</td>\n <td>38.670</td>\n <td>129.604803</td>\n <td>44.608202</td>\n <td>250.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.428770</td>\n <td>0.321635</td>\n <td>0.333093</td>\n </tr>\n <tr>\n <th>1022</th>\n <td>330.0</td>\n <td>20634.77</td>\n <td>24.66</td>\n <td>25.390</td>\n <td>119.276289</td>\n <td>36.668747</td>\n <td>75.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.530251</td>\n <td>0.073112</td>\n <td>6.252559</td>\n </tr>\n <tr>\n <th>343</th>\n <td>670.0</td>\n <td>14109.00</td>\n <td>45.20</td>\n <td>13.640</td>\n <td>125.941747</td>\n <td>44.106509</td>\n <td>199.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>...</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>1.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.0</td>\n <td>0.398257</td>\n <td>0.038802</td>\n <td>9.263788</td>\n </tr>\n </tbody>\n</table>\n<p>55 rows × 66 columns</p>\n</div>"
|
||
},
|
||
"execution_count": 48,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"test_data[test_data.error_rate > 0.05].sort_values(by='error_rate')"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 49,
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": "(376, 66)"
|
||
},
|
||
"execution_count": 49,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"test_data.shape"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 50,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"power_eva_df = pd.DataFrame.from_records([y_true_xgb, y_pred_xgb]).T\n",
|
||
"power_eva_df.to_csv('./发电测试结果.csv', index=False, encoding='utf-8-sig')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 51,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"gb_model.save_model('./models/power_model.txt')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"### 发热建模"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 52,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"def xgb_cv(max_depth, learning_rate, min_child_weight, subsample, colsample_bytree, reg_alpha, gamma):\n",
|
||
" val = cross_val_score(estimator=XGBRegressor(max_depth=int(max_depth),\n",
|
||
" learning_rate=learning_rate,\n",
|
||
" n_estimators=2000,\n",
|
||
" min_child_weight=min_child_weight,\n",
|
||
" subsample=max(min(subsample, 1), 0),\n",
|
||
" colsample_bytree=max(min(colsample_bytree, 1), 0),\n",
|
||
" reg_alpha=max(reg_alpha, 0), gamma=gamma, objective='reg:squarederror',\n",
|
||
" booster='gbtree',\n",
|
||
" seed=10), X=train[feature_cols], y=train['供热碳排放因子(kg/MJ)'], scoring='r2',\n",
|
||
" cv=10).max()\n",
|
||
" return val"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 53,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"params_xgb = {'objective': 'reg:squarederror',\n",
|
||
" 'booster': 'gbtree',\n",
|
||
" 'eta': 0.005,\n",
|
||
" 'max_depth': 60,\n",
|
||
" 'subsample': 0.5,\n",
|
||
" 'colsample_bytree': 0.9,\n",
|
||
" 'min_child_weight': 30,\n",
|
||
" 'seed': 666}\n",
|
||
"\n",
|
||
"num_boost_round = 2000"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 54,
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"MSE: 2.7E-05, RMSE: 0.0052, MAE: 0.0025, MAPE: 4.651 %, R_2: 0.7287\n",
|
||
"MSE: 2.8E-05, RMSE: 0.0053, MAE: 0.0028, MAPE: 4.129 %, R_2: 0.7189\n",
|
||
"MSE: 2.0E-05, RMSE: 0.0045, MAE: 0.0026, MAPE: 3.629 %, R_2: 0.7839\n",
|
||
"MSE: 1.6E-05, RMSE: 0.004, MAE: 0.0025, MAPE: 3.73 %, R_2: 0.8376\n",
|
||
"MSE: 3.0E-05, RMSE: 0.0054, MAE: 0.0029, MAPE: 5.181 %, R_2: 0.7219\n",
|
||
"MSE: 1.9E-05, RMSE: 0.0044, MAE: 0.0025, MAPE: 3.849 %, R_2: 0.8013\n",
|
||
"MSE: 3.1E-05, RMSE: 0.0056, MAE: 0.0028, MAPE: 5.661 %, R_2: 0.7182\n",
|
||
"MSE: 2.3E-05, RMSE: 0.0048, MAE: 0.0026, MAPE: 4.386 %, R_2: 0.7888\n",
|
||
"MSE: 2.0E-05, RMSE: 0.0045, MAE: 0.0024, MAPE: 3.456 %, R_2: 0.8005\n",
|
||
"MSE: 2.3E-05, RMSE: 0.0048, MAE: 0.0027, MAPE: 3.897 %, R_2: 0.7742\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"eva_list = list()\n",
|
||
"for (train_index, test_index) in kf.split(train_data):\n",
|
||
" train = train_data.loc[train_index]\n",
|
||
" test = train_data.loc[test_index]\n",
|
||
" train, valid = train_test_split(train, test_size=0.11, random_state=42)\n",
|
||
" X_train, Y_train = train[feature_cols], train['供热碳排放因子(kg/MJ)']\n",
|
||
" X_valid, Y_valid = valid[feature_cols], valid['供热碳排放因子(kg/MJ)']\n",
|
||
" X_test, Y_test = valid[feature_cols], valid['供热碳排放因子(kg/MJ)']\n",
|
||
" dtrain = xgb.DMatrix(X_train, Y_train)\n",
|
||
" dvalid = xgb.DMatrix(X_valid, Y_valid)\n",
|
||
" watchlist = [(dvalid, 'eval')]\n",
|
||
" gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n",
|
||
" early_stopping_rounds=100, verbose_eval=False)\n",
|
||
" y_pred = gb_model.predict(xgb.DMatrix(X_test))\n",
|
||
" y_true = Y_test.values\n",
|
||
" MSE = mean_squared_error(y_true, y_pred)\n",
|
||
" RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n",
|
||
" MAE = mean_absolute_error(y_true, y_pred)\n",
|
||
" MAPE = mean_absolute_percentage_error(y_true, y_pred)\n",
|
||
" R_2 = r2_score(y_true, y_pred)\n",
|
||
" print('MSE:', format(MSE, '.1E'), end=', ')\n",
|
||
" print('RMSE:', round(RMSE, 4), end=', ')\n",
|
||
" print('MAE:', round(MAE, 4), end=', ')\n",
|
||
" print('MAPE:', round(MAPE*100, 3), '%', end=', ')\n",
|
||
" print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差\n",
|
||
" eva_list.append([MSE, RMSE, MAE, MAPE, R_2])\n"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 55,
|
||
"outputs": [],
|
||
"source": [
|
||
"\n",
|
||
"dtrain = xgb.DMatrix(train_X, train_y.values[:, 1])\n",
|
||
"dvalid = xgb.DMatrix(valid_X, valid_y.values[:, 1])\n",
|
||
"watchlist = [(dtrain, 'train'), (dvalid, 'eval')]\n",
|
||
"\n",
|
||
"gb_model_heat = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n",
|
||
" early_stopping_rounds=200, verbose_eval=False)"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 56,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"y_pred_heat = gb_model_heat.predict(xgb.DMatrix(test_X[feature_cols]))\n",
|
||
"y_true_heat = test_y.values[:, 1]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 57,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"MSE: 1.7E-05\n",
|
||
"RMSE: 0.0041\n",
|
||
"MAE: 0.0024\n",
|
||
"MAPE: 3.61 %\n",
|
||
"R_2: 0.8188\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"MSE = mean_squared_error(y_true_heat, y_pred_heat)\n",
|
||
"RMSE = np.sqrt(mean_squared_error(y_true_heat, y_pred_heat))\n",
|
||
"MAE = mean_absolute_error(y_true_heat, y_pred_heat)\n",
|
||
"MAPE = mean_absolute_percentage_error(y_true_heat, y_pred_heat)\n",
|
||
"R_2 = r2_score(y_true_heat, y_pred_heat)\n",
|
||
"print('MSE:', format(MSE, '.1E'))\n",
|
||
"print('RMSE:', round(RMSE, 4))\n",
|
||
"print('MAE:', round(MAE, 4))\n",
|
||
"print('MAPE:', round(MAPE*100, 2), '%')\n",
|
||
"print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差a"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 58,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"pd.DataFrame.from_records([y_true_heat, y_pred_heat]).T.to_csv('./供热测试结果.csv', index=False, encoding='utf-8-sig')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 59,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"gb_model_heat.save_model('./models/heat_model.txt')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%% md\n"
|
||
}
|
||
},
|
||
"source": [
|
||
"### 煤种标准化工程"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 60,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.\n",
|
||
" \"\"\"Entry point for launching an IPython kernel.\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"new_values = use_data.groupby(['煤种', '入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)'])['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'].mean()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 61,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": " 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ)\n煤种 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%) \n无烟煤 19827.00 11.18 2539.00 0.561424 0.087794\n烟煤 16733.00 22.53 27.46 0.441511 0.064259\n 16740.00 18.99 37.00 0.487225 0.064535\n 27.93 24.43 0.418457 0.064747\n 16741.00 26.69 25.92 0.433679 0.061822\n... ... ...\n贫煤 21938.00 13.40 22.58 0.615856 0.099905\n 22042.72 12.96 25.69 0.636563 0.079468\n 22149.00 12.43 25.10 0.629733 0.082772\n 22272.51 11.83 22.97 0.627877 0.083234\n 22475.97 8.90 23.98 0.620331 0.086574\n\n[3579 rows x 2 columns]",
|
||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th></th>\n <th></th>\n <th></th>\n <th>发电碳排放因子(kg/kWh)</th>\n <th>供热碳排放因子(kg/MJ)</th>\n </tr>\n <tr>\n <th>煤种</th>\n <th>入炉煤低位热值(kJ/kg)</th>\n <th>燃煤挥发份Var(%)</th>\n <th>燃煤灰份Aar(%)</th>\n <th></th>\n <th></th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>无烟煤</th>\n <th>19827.00</th>\n <th>11.18</th>\n <th>2539.00</th>\n <td>0.561424</td>\n <td>0.087794</td>\n </tr>\n <tr>\n <th rowspan=\"4\" valign=\"top\">烟煤</th>\n <th>16733.00</th>\n <th>22.53</th>\n <th>27.46</th>\n <td>0.441511</td>\n <td>0.064259</td>\n </tr>\n <tr>\n <th rowspan=\"2\" valign=\"top\">16740.00</th>\n <th>18.99</th>\n <th>37.00</th>\n <td>0.487225</td>\n <td>0.064535</td>\n </tr>\n <tr>\n <th>27.93</th>\n <th>24.43</th>\n <td>0.418457</td>\n <td>0.064747</td>\n </tr>\n <tr>\n <th>16741.00</th>\n <th>26.69</th>\n <th>25.92</th>\n <td>0.433679</td>\n <td>0.061822</td>\n </tr>\n <tr>\n <th>...</th>\n <th>...</th>\n <th>...</th>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th rowspan=\"5\" valign=\"top\">贫煤</th>\n <th>21938.00</th>\n <th>13.40</th>\n <th>22.58</th>\n <td>0.615856</td>\n <td>0.099905</td>\n </tr>\n <tr>\n <th>22042.72</th>\n <th>12.96</th>\n <th>25.69</th>\n <td>0.636563</td>\n <td>0.079468</td>\n </tr>\n <tr>\n <th>22149.00</th>\n <th>12.43</th>\n <th>25.10</th>\n <td>0.629733</td>\n <td>0.082772</td>\n </tr>\n <tr>\n <th>22272.51</th>\n <th>11.83</th>\n <th>22.97</th>\n <td>0.627877</td>\n <td>0.083234</td>\n </tr>\n <tr>\n <th>22475.97</th>\n <th>8.90</th>\n <th>23.98</th>\n <td>0.620331</td>\n <td>0.086574</td>\n </tr>\n </tbody>\n</table>\n<p>3579 rows × 2 columns</p>\n</div>"
|
||
},
|
||
"execution_count": 61,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"new_values"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 62,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": " 煤种 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%)\n0 无烟煤 19827.00 11.18 2539.00\n1 烟煤 16733.00 22.53 27.46\n2 烟煤 16740.00 18.99 37.00\n3 烟煤 16740.00 27.93 24.43\n4 烟煤 16741.00 26.69 25.92\n... ... ... ... ...\n3574 贫煤 21938.00 13.40 22.58\n3575 贫煤 22042.72 12.96 25.69\n3576 贫煤 22149.00 12.43 25.10\n3577 贫煤 22272.51 11.83 22.97\n3578 贫煤 22475.97 8.90 23.98\n\n[3579 rows x 4 columns]",
|
||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>煤种</th>\n <th>入炉煤低位热值(kJ/kg)</th>\n <th>燃煤挥发份Var(%)</th>\n <th>燃煤灰份Aar(%)</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>无烟煤</td>\n <td>19827.00</td>\n <td>11.18</td>\n <td>2539.00</td>\n </tr>\n <tr>\n <th>1</th>\n <td>烟煤</td>\n <td>16733.00</td>\n <td>22.53</td>\n <td>27.46</td>\n </tr>\n <tr>\n <th>2</th>\n <td>烟煤</td>\n <td>16740.00</td>\n <td>18.99</td>\n <td>37.00</td>\n </tr>\n <tr>\n <th>3</th>\n <td>烟煤</td>\n <td>16740.00</td>\n <td>27.93</td>\n <td>24.43</td>\n </tr>\n <tr>\n <th>4</th>\n <td>烟煤</td>\n <td>16741.00</td>\n <td>26.69</td>\n <td>25.92</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>3574</th>\n <td>贫煤</td>\n <td>21938.00</td>\n <td>13.40</td>\n <td>22.58</td>\n </tr>\n <tr>\n <th>3575</th>\n <td>贫煤</td>\n <td>22042.72</td>\n <td>12.96</td>\n <td>25.69</td>\n </tr>\n <tr>\n <th>3576</th>\n <td>贫煤</td>\n <td>22149.00</td>\n <td>12.43</td>\n <td>25.10</td>\n </tr>\n <tr>\n <th>3577</th>\n <td>贫煤</td>\n <td>22272.51</td>\n <td>11.83</td>\n <td>22.97</td>\n </tr>\n <tr>\n <th>3578</th>\n <td>贫煤</td>\n <td>22475.97</td>\n <td>8.90</td>\n <td>23.98</td>\n </tr>\n </tbody>\n</table>\n<p>3579 rows × 4 columns</p>\n</div>"
|
||
},
|
||
"execution_count": 62,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"coal_df = new_values.reset_index().drop(columns=['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'])\n",
|
||
"coal_df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 63,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"coal_params_dict = dict()\n",
|
||
"for coal_type in coal_df['煤种'].unique().tolist():\n",
|
||
" options = coal_df[coal_df['煤种']==coal_type][['入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)']].values\n",
|
||
" coal_params_dict[coal_type] = options"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 64,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": " 企业名称 机组编号 铭牌容量 (MW) 机组类型 参数分类 冷凝器型式 入炉煤低位热值(kJ/kg) \\\n0 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 21602.05000 \n1 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 21926.81000 \n2 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 21261.93062 \n3 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 20840.00000 \n4 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 20706.00000 \n... ... ... ... ... ... ... ... \n5689 浙江浙能电力股份有限公司台州发电厂 8 350.0 凝气式 亚临界 水冷-开式循环 21973.00000 \n5690 浙江浙能电力股份有限公司台州发电厂 8 350.0 凝气式 亚临界 水冷-开式循环 21372.00000 \n5691 浙江浙能电力股份有限公司台州发电厂 8 350.0 凝气式 亚临界 水冷-开式循环 20856.00000 \n5692 榆能榆神热电有限公司 1 350.0 抽凝式 超临界 间接空冷 25514.00000 \n5693 榆能榆神热电有限公司 2 350.0 抽凝式 超临界 间接空冷 25514.00000 \n\n 燃煤挥发份Var(%) 燃煤灰份Aar(%) 煤种 所处地区 longitude latitude altitude \\\n0 26.09 16.80 烟煤 江苏省 120.096620 31.942361 1 \n1 26.68 15.41 烟煤 江苏省 120.096620 31.942361 1 \n2 26.46 15.18 烟煤 江苏省 120.096620 31.942361 1 \n3 26.43 14.55 烟煤 江苏省 120.096620 31.942361 1 \n4 26.43 14.96 烟煤 江苏省 120.096620 31.942361 1 \n... ... ... .. ... ... ... ... \n5689 37.43 17.12 烟煤 浙江省 121.465840 28.704623 73 \n5690 39.87 18.01 烟煤 浙江省 121.465840 28.704623 73 \n5691 39.32 19.74 烟煤 浙江省 121.465840 28.704623 73 \n5692 38.84 7.28 烟煤 陕西省 109.820265 38.304383 1151 \n5693 38.84 7.28 烟煤 陕西省 109.820265 38.304383 1151 \n\n 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) 核心设备类型 汽轮机类型 冷却方式 \n0 0.586990 0.076843 煤粉锅炉 凝气式 水冷-开式循环 \n1 0.632859 0.077676 煤粉锅炉 凝气式 水冷-开式循环 \n2 0.609196 0.074823 煤粉锅炉 凝气式 水冷-开式循环 \n3 0.602178 0.081628 煤粉锅炉 凝气式 水冷-开式循环 \n4 0.590254 0.081103 煤粉锅炉 凝气式 水冷-开式循环 \n... ... ... ... ... ... \n5689 0.628300 0.078776 煤粉锅炉 凝气式 水冷-开式循环 \n5690 0.595019 0.076622 煤粉锅炉 凝气式 水冷-开式循环 \n5691 0.565718 0.074772 煤粉锅炉 凝气式 水冷-开式循环 \n5692 0.664456 0.091482 煤粉锅炉 抽凝式 空冷-间接空冷 \n5693 0.661759 0.091483 煤粉锅炉 抽凝式 空冷-间接空冷 \n\n[5041 rows x 19 columns]",
|
||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>企业名称</th>\n <th>机组编号</th>\n <th>铭牌容量 (MW)</th>\n <th>机组类型</th>\n <th>参数分类</th>\n <th>冷凝器型式</th>\n <th>入炉煤低位热值(kJ/kg)</th>\n <th>燃煤挥发份Var(%)</th>\n <th>燃煤灰份Aar(%)</th>\n <th>煤种</th>\n <th>所处地区</th>\n <th>longitude</th>\n <th>latitude</th>\n <th>altitude</th>\n <th>发电碳排放因子(kg/kWh)</th>\n <th>供热碳排放因子(kg/MJ)</th>\n <th>核心设备类型</th>\n <th>汽轮机类型</th>\n <th>冷却方式</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>江苏利港电力有限公司</td>\n <td>1</td>\n <td>350.0</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>21602.05000</td>\n <td>26.09</td>\n <td>16.80</td>\n <td>烟煤</td>\n <td>江苏省</td>\n <td>120.096620</td>\n <td>31.942361</td>\n <td>1</td>\n <td>0.586990</td>\n <td>0.076843</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>水冷-开式循环</td>\n </tr>\n <tr>\n <th>1</th>\n <td>江苏利港电力有限公司</td>\n <td>1</td>\n <td>350.0</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>21926.81000</td>\n <td>26.68</td>\n <td>15.41</td>\n <td>烟煤</td>\n <td>江苏省</td>\n <td>120.096620</td>\n <td>31.942361</td>\n <td>1</td>\n <td>0.632859</td>\n <td>0.077676</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>水冷-开式循环</td>\n </tr>\n <tr>\n <th>2</th>\n <td>江苏利港电力有限公司</td>\n <td>1</td>\n <td>350.0</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>21261.93062</td>\n <td>26.46</td>\n <td>15.18</td>\n <td>烟煤</td>\n <td>江苏省</td>\n <td>120.096620</td>\n <td>31.942361</td>\n <td>1</td>\n <td>0.609196</td>\n <td>0.074823</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>水冷-开式循环</td>\n </tr>\n <tr>\n <th>3</th>\n <td>江苏利港电力有限公司</td>\n <td>1</td>\n <td>350.0</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>20840.00000</td>\n <td>26.43</td>\n <td>14.55</td>\n <td>烟煤</td>\n <td>江苏省</td>\n <td>120.096620</td>\n <td>31.942361</td>\n <td>1</td>\n <td>0.602178</td>\n <td>0.081628</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>水冷-开式循环</td>\n </tr>\n <tr>\n <th>4</th>\n <td>江苏利港电力有限公司</td>\n <td>1</td>\n <td>350.0</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>20706.00000</td>\n <td>26.43</td>\n <td>14.96</td>\n <td>烟煤</td>\n <td>江苏省</td>\n <td>120.096620</td>\n <td>31.942361</td>\n <td>1</td>\n <td>0.590254</td>\n <td>0.081103</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>水冷-开式循环</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>5689</th>\n <td>浙江浙能电力股份有限公司台州发电厂</td>\n <td>8</td>\n <td>350.0</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>21973.00000</td>\n <td>37.43</td>\n <td>17.12</td>\n <td>烟煤</td>\n <td>浙江省</td>\n <td>121.465840</td>\n <td>28.704623</td>\n <td>73</td>\n <td>0.628300</td>\n <td>0.078776</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>水冷-开式循环</td>\n </tr>\n <tr>\n <th>5690</th>\n <td>浙江浙能电力股份有限公司台州发电厂</td>\n <td>8</td>\n <td>350.0</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>21372.00000</td>\n <td>39.87</td>\n <td>18.01</td>\n <td>烟煤</td>\n <td>浙江省</td>\n <td>121.465840</td>\n <td>28.704623</td>\n <td>73</td>\n <td>0.595019</td>\n <td>0.076622</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>水冷-开式循环</td>\n </tr>\n <tr>\n <th>5691</th>\n <td>浙江浙能电力股份有限公司台州发电厂</td>\n <td>8</td>\n <td>350.0</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>20856.00000</td>\n <td>39.32</td>\n <td>19.74</td>\n <td>烟煤</td>\n <td>浙江省</td>\n <td>121.465840</td>\n <td>28.704623</td>\n <td>73</td>\n <td>0.565718</td>\n <td>0.074772</td>\n <td>煤粉锅炉</td>\n <td>凝气式</td>\n <td>水冷-开式循环</td>\n </tr>\n <tr>\n <th>5692</th>\n <td>榆能榆神热电有限公司</td>\n <td>1</td>\n <td>350.0</td>\n <td>抽凝式</td>\n <td>超临界</td>\n <td>间接空冷</td>\n <td>25514.00000</td>\n <td>38.84</td>\n <td>7.28</td>\n <td>烟煤</td>\n <td>陕西省</td>\n <td>109.820265</td>\n <td>38.304383</td>\n <td>1151</td>\n <td>0.664456</td>\n <td>0.091482</td>\n <td>煤粉锅炉</td>\n <td>抽凝式</td>\n <td>空冷-间接空冷</td>\n </tr>\n <tr>\n <th>5693</th>\n <td>榆能榆神热电有限公司</td>\n <td>2</td>\n <td>350.0</td>\n <td>抽凝式</td>\n <td>超临界</td>\n <td>间接空冷</td>\n <td>25514.00000</td>\n <td>38.84</td>\n <td>7.28</td>\n <td>烟煤</td>\n <td>陕西省</td>\n <td>109.820265</td>\n <td>38.304383</td>\n <td>1151</td>\n <td>0.661759</td>\n <td>0.091483</td>\n <td>煤粉锅炉</td>\n <td>抽凝式</td>\n <td>空冷-间接空冷</td>\n </tr>\n </tbody>\n</table>\n<p>5041 rows × 19 columns</p>\n</div>"
|
||
},
|
||
"execution_count": 64,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"use_data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 65,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.\n",
|
||
" \"\"\"Entry point for launching an IPython kernel.\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude 煤种 \\\n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.471140 31.065113 3 烟煤 \n1 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.471140 31.065113 3 烟煤 \n2 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.471140 31.065113 3 烟煤 \n3 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.471140 31.065113 3 烟煤 \n4 上海市 凝气式 亚临界 水冷-开式循环 320.0 121.601480 31.358794 2 褐煤 \n... ... ... ... ... ... ... ... ... .. \n3789 黑龙江省 纯凝式 超高压 水冷 200.0 126.575647 45.918566 118 褐煤 \n3790 黑龙江省 纯凝式 超高压 水冷 210.0 131.695864 46.580444 91 褐煤 \n3791 黑龙江省 背压式 超高压 水冷-开式循环 200.0 123.639146 47.210696 151 褐煤 \n3792 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 129.604803 44.608202 250 褐煤 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 129.604803 44.608202 250 褐煤 \n\n 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) \n0 0.623923 0.078064 \n1 0.639474 0.079308 \n2 0.635351 0.078691 \n3 0.674456 0.085853 \n4 0.506816 0.060934 \n... ... ... \n3789 0.500172 0.064200 \n3790 0.518301 0.063249 \n3791 0.224312 0.053770 \n3792 0.290814 0.068027 \n3793 0.321635 0.067798 \n\n[3794 rows x 11 columns]",
|
||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>所处地区</th>\n <th>机组类型</th>\n <th>参数分类</th>\n <th>冷凝器型式</th>\n <th>铭牌容量 (MW)</th>\n <th>longitude</th>\n <th>latitude</th>\n <th>altitude</th>\n <th>煤种</th>\n <th>发电碳排放因子(kg/kWh)</th>\n <th>供热碳排放因子(kg/MJ)</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>300.0</td>\n <td>121.471140</td>\n <td>31.065113</td>\n <td>3</td>\n <td>烟煤</td>\n <td>0.623923</td>\n <td>0.078064</td>\n </tr>\n <tr>\n <th>1</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>300.0</td>\n <td>121.471140</td>\n <td>31.065113</td>\n <td>3</td>\n <td>烟煤</td>\n <td>0.639474</td>\n <td>0.079308</td>\n </tr>\n <tr>\n <th>2</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>300.0</td>\n <td>121.471140</td>\n <td>31.065113</td>\n <td>3</td>\n <td>烟煤</td>\n <td>0.635351</td>\n <td>0.078691</td>\n </tr>\n <tr>\n <th>3</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>300.0</td>\n <td>121.471140</td>\n <td>31.065113</td>\n <td>3</td>\n <td>烟煤</td>\n <td>0.674456</td>\n <td>0.085853</td>\n </tr>\n <tr>\n <th>4</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>320.0</td>\n <td>121.601480</td>\n <td>31.358794</td>\n <td>2</td>\n <td>褐煤</td>\n <td>0.506816</td>\n <td>0.060934</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>3789</th>\n <td>黑龙江省</td>\n <td>纯凝式</td>\n <td>超高压</td>\n <td>水冷</td>\n <td>200.0</td>\n <td>126.575647</td>\n <td>45.918566</td>\n <td>118</td>\n <td>褐煤</td>\n <td>0.500172</td>\n <td>0.064200</td>\n </tr>\n <tr>\n <th>3790</th>\n <td>黑龙江省</td>\n <td>纯凝式</td>\n <td>超高压</td>\n <td>水冷</td>\n <td>210.0</td>\n <td>131.695864</td>\n <td>46.580444</td>\n <td>91</td>\n <td>褐煤</td>\n <td>0.518301</td>\n <td>0.063249</td>\n </tr>\n <tr>\n <th>3791</th>\n <td>黑龙江省</td>\n <td>背压式</td>\n <td>超高压</td>\n <td>水冷-开式循环</td>\n <td>200.0</td>\n <td>123.639146</td>\n <td>47.210696</td>\n <td>151</td>\n <td>褐煤</td>\n <td>0.224312</td>\n <td>0.053770</td>\n </tr>\n <tr>\n <th>3792</th>\n <td>黑龙江省</td>\n <td>背压式</td>\n <td>超高压</td>\n <td>水冷-闭式循环</td>\n <td>215.0</td>\n <td>129.604803</td>\n <td>44.608202</td>\n <td>250</td>\n <td>褐煤</td>\n <td>0.290814</td>\n <td>0.068027</td>\n </tr>\n <tr>\n <th>3793</th>\n <td>黑龙江省</td>\n <td>背压式</td>\n <td>超高压</td>\n <td>水冷-闭式循环</td>\n <td>215.0</td>\n <td>129.604803</td>\n <td>44.608202</td>\n <td>250</td>\n <td>褐煤</td>\n <td>0.321635</td>\n <td>0.067798</td>\n </tr>\n </tbody>\n</table>\n<p>3794 rows × 11 columns</p>\n</div>"
|
||
},
|
||
"execution_count": 65,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"new_use_data = use_data.groupby(use_cols+['煤种'])['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'].mean().reset_index().drop(columns=['入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)'])\n",
|
||
"new_use_data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 66,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"new_use_data['coal_params'] = new_use_data['煤种'].apply(lambda x: coal_params_dict.get(x))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 67,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"new_use_data.drop(columns='煤种', inplace=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 68,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"new_data = new_use_data.explode(column='coal_params')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 69,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.471140 31.065113 3 \n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.471140 31.065113 3 \n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.471140 31.065113 3 \n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.471140 31.065113 3 \n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.471140 31.065113 3 \n... ... ... ... ... ... ... ... ... \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 129.604803 44.608202 250 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 129.604803 44.608202 250 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 129.604803 44.608202 250 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 129.604803 44.608202 250 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 129.604803 44.608202 250 \n\n 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) coal_params \n0 0.623923 0.078064 [16733.0, 22.53, 27.46] \n0 0.623923 0.078064 [16740.0, 18.99, 37.0] \n0 0.623923 0.078064 [16740.0, 27.93, 24.43] \n0 0.623923 0.078064 [16741.0, 26.69, 25.92] \n0 0.623923 0.078064 [16741.51, 19.51, 35.62] \n... ... ... ... \n3793 0.321635 0.067798 [16723.0, 40.63, 39.94] \n3793 0.321635 0.067798 [16725.0, 26.36, 28.51] \n3793 0.321635 0.067798 [16725.19, 34.59, 37.71] \n3793 0.321635 0.067798 [16725.85, 43.2, 12.0] \n3793 0.321635 0.067798 [16729.0, 51.42, 17.33] \n\n[8019537 rows x 11 columns]",
|
||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>所处地区</th>\n <th>机组类型</th>\n <th>参数分类</th>\n <th>冷凝器型式</th>\n <th>铭牌容量 (MW)</th>\n <th>longitude</th>\n <th>latitude</th>\n <th>altitude</th>\n <th>发电碳排放因子(kg/kWh)</th>\n <th>供热碳排放因子(kg/MJ)</th>\n <th>coal_params</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>300.0</td>\n <td>121.471140</td>\n <td>31.065113</td>\n <td>3</td>\n <td>0.623923</td>\n <td>0.078064</td>\n <td>[16733.0, 22.53, 27.46]</td>\n </tr>\n <tr>\n <th>0</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>300.0</td>\n <td>121.471140</td>\n <td>31.065113</td>\n <td>3</td>\n <td>0.623923</td>\n <td>0.078064</td>\n <td>[16740.0, 18.99, 37.0]</td>\n </tr>\n <tr>\n <th>0</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>300.0</td>\n <td>121.471140</td>\n <td>31.065113</td>\n <td>3</td>\n <td>0.623923</td>\n <td>0.078064</td>\n <td>[16740.0, 27.93, 24.43]</td>\n </tr>\n <tr>\n <th>0</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>300.0</td>\n <td>121.471140</td>\n <td>31.065113</td>\n <td>3</td>\n <td>0.623923</td>\n <td>0.078064</td>\n <td>[16741.0, 26.69, 25.92]</td>\n </tr>\n <tr>\n <th>0</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>300.0</td>\n <td>121.471140</td>\n <td>31.065113</td>\n <td>3</td>\n <td>0.623923</td>\n <td>0.078064</td>\n <td>[16741.51, 19.51, 35.62]</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>3793</th>\n <td>黑龙江省</td>\n <td>背压式</td>\n <td>超高压</td>\n <td>水冷-闭式循环</td>\n <td>215.0</td>\n <td>129.604803</td>\n <td>44.608202</td>\n <td>250</td>\n <td>0.321635</td>\n <td>0.067798</td>\n <td>[16723.0, 40.63, 39.94]</td>\n </tr>\n <tr>\n <th>3793</th>\n <td>黑龙江省</td>\n <td>背压式</td>\n <td>超高压</td>\n <td>水冷-闭式循环</td>\n <td>215.0</td>\n <td>129.604803</td>\n <td>44.608202</td>\n <td>250</td>\n <td>0.321635</td>\n <td>0.067798</td>\n <td>[16725.0, 26.36, 28.51]</td>\n </tr>\n <tr>\n <th>3793</th>\n <td>黑龙江省</td>\n <td>背压式</td>\n <td>超高压</td>\n <td>水冷-闭式循环</td>\n <td>215.0</td>\n <td>129.604803</td>\n <td>44.608202</td>\n <td>250</td>\n <td>0.321635</td>\n <td>0.067798</td>\n <td>[16725.19, 34.59, 37.71]</td>\n </tr>\n <tr>\n <th>3793</th>\n <td>黑龙江省</td>\n <td>背压式</td>\n <td>超高压</td>\n <td>水冷-闭式循环</td>\n <td>215.0</td>\n <td>129.604803</td>\n <td>44.608202</td>\n <td>250</td>\n <td>0.321635</td>\n <td>0.067798</td>\n <td>[16725.85, 43.2, 12.0]</td>\n </tr>\n <tr>\n <th>3793</th>\n <td>黑龙江省</td>\n <td>背压式</td>\n <td>超高压</td>\n <td>水冷-闭式循环</td>\n <td>215.0</td>\n <td>129.604803</td>\n <td>44.608202</td>\n <td>250</td>\n <td>0.321635</td>\n <td>0.067798</td>\n <td>[16729.0, 51.42, 17.33]</td>\n </tr>\n </tbody>\n</table>\n<p>8019537 rows × 11 columns</p>\n</div>"
|
||
},
|
||
"execution_count": 69,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"new_data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 70,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"new_data['入炉煤低位热值(kJ/kg)'] = new_data.coal_params.apply(lambda x: x[0]).values\n",
|
||
"new_data['燃煤挥发份Var(%)'] = new_data.coal_params.apply(lambda x: x[1]).values\n",
|
||
"new_data['燃煤灰份Aar(%)'] = new_data.coal_params.apply(lambda x: x[2]).values"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 71,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"norm_data = new_data.drop(columns='coal_params')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 72,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.47114 31.065113 3 \n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.47114 31.065113 3 \n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.47114 31.065113 3 \n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.47114 31.065113 3 \n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.47114 31.065113 3 \n\n 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%) \n0 0.623923 0.078064 16733.00 22.53 27.46 \n0 0.623923 0.078064 16740.00 18.99 37.00 \n0 0.623923 0.078064 16740.00 27.93 24.43 \n0 0.623923 0.078064 16741.00 26.69 25.92 \n0 0.623923 0.078064 16741.51 19.51 35.62 ",
|
||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>所处地区</th>\n <th>机组类型</th>\n <th>参数分类</th>\n <th>冷凝器型式</th>\n <th>铭牌容量 (MW)</th>\n <th>longitude</th>\n <th>latitude</th>\n <th>altitude</th>\n <th>发电碳排放因子(kg/kWh)</th>\n <th>供热碳排放因子(kg/MJ)</th>\n <th>入炉煤低位热值(kJ/kg)</th>\n <th>燃煤挥发份Var(%)</th>\n <th>燃煤灰份Aar(%)</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>300.0</td>\n <td>121.47114</td>\n <td>31.065113</td>\n <td>3</td>\n <td>0.623923</td>\n <td>0.078064</td>\n <td>16733.00</td>\n <td>22.53</td>\n <td>27.46</td>\n </tr>\n <tr>\n <th>0</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>300.0</td>\n <td>121.47114</td>\n <td>31.065113</td>\n <td>3</td>\n <td>0.623923</td>\n <td>0.078064</td>\n <td>16740.00</td>\n <td>18.99</td>\n <td>37.00</td>\n </tr>\n <tr>\n <th>0</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>300.0</td>\n <td>121.47114</td>\n <td>31.065113</td>\n <td>3</td>\n <td>0.623923</td>\n <td>0.078064</td>\n <td>16740.00</td>\n <td>27.93</td>\n <td>24.43</td>\n </tr>\n <tr>\n <th>0</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>300.0</td>\n <td>121.47114</td>\n <td>31.065113</td>\n <td>3</td>\n <td>0.623923</td>\n <td>0.078064</td>\n <td>16741.00</td>\n <td>26.69</td>\n <td>25.92</td>\n </tr>\n <tr>\n <th>0</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>300.0</td>\n <td>121.47114</td>\n <td>31.065113</td>\n <td>3</td>\n <td>0.623923</td>\n <td>0.078064</td>\n <td>16741.51</td>\n <td>19.51</td>\n <td>35.62</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||
},
|
||
"execution_count": 72,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"norm_data.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 73,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"for col in num_cols:\n",
|
||
" norm_data[col] = np.log1p(norm_data[col])\n",
|
||
" # total_data[col] = (total_data[col] - total_data[col].min()) / (total_data[col].max() - total_data[col].min())\n",
|
||
"norm_data_dummpy = pd.get_dummies(norm_data, columns=object_cols)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 74,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"norm_data_dummpy.drop(columns=['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'], inplace=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 75,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"new_xgb_data = xgb.DMatrix(norm_data_dummpy[feature_cols])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 76,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"norm_data['power_co2_factor'] = gb_model.predict(new_xgb_data)\n",
|
||
"norm_data['heat_co2_factor'] = gb_model_heat.predict(new_xgb_data)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 77,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"normaled_data = norm_data.drop(columns=['入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 78,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n0 上海市 凝气式 亚临界 水冷-开式循环 5.707110 4.807875 3.467769 1.386294 \n0 上海市 凝气式 亚临界 水冷-开式循环 5.707110 4.807875 3.467769 1.386294 \n0 上海市 凝气式 亚临界 水冷-开式循环 5.707110 4.807875 3.467769 1.386294 \n0 上海市 凝气式 亚临界 水冷-开式循环 5.707110 4.807875 3.467769 1.386294 \n0 上海市 凝气式 亚临界 水冷-开式循环 5.707110 4.807875 3.467769 1.386294 \n... ... ... ... ... ... ... ... ... \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 5.375278 4.872176 3.820088 5.525453 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 5.375278 4.872176 3.820088 5.525453 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 5.375278 4.872176 3.820088 5.525453 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 5.375278 4.872176 3.820088 5.525453 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 5.375278 4.872176 3.820088 5.525453 \n\n power_co2_factor heat_co2_factor \n0 0.063166 0.063012 \n0 0.062317 0.062422 \n0 0.062508 0.062922 \n0 0.062466 0.062950 \n0 0.062743 0.063012 \n... ... ... \n3793 0.067768 0.068277 \n3793 0.066563 0.066854 \n3793 0.068115 0.068242 \n3793 0.066680 0.066995 \n3793 0.067563 0.067869 \n\n[8019537 rows x 10 columns]",
|
||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>所处地区</th>\n <th>机组类型</th>\n <th>参数分类</th>\n <th>冷凝器型式</th>\n <th>铭牌容量 (MW)</th>\n <th>longitude</th>\n <th>latitude</th>\n <th>altitude</th>\n <th>power_co2_factor</th>\n <th>heat_co2_factor</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>5.707110</td>\n <td>4.807875</td>\n <td>3.467769</td>\n <td>1.386294</td>\n <td>0.063166</td>\n <td>0.063012</td>\n </tr>\n <tr>\n <th>0</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>5.707110</td>\n <td>4.807875</td>\n <td>3.467769</td>\n <td>1.386294</td>\n <td>0.062317</td>\n <td>0.062422</td>\n </tr>\n <tr>\n <th>0</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>5.707110</td>\n <td>4.807875</td>\n <td>3.467769</td>\n <td>1.386294</td>\n <td>0.062508</td>\n <td>0.062922</td>\n </tr>\n <tr>\n <th>0</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>5.707110</td>\n <td>4.807875</td>\n <td>3.467769</td>\n <td>1.386294</td>\n <td>0.062466</td>\n <td>0.062950</td>\n </tr>\n <tr>\n <th>0</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷-开式循环</td>\n <td>5.707110</td>\n <td>4.807875</td>\n <td>3.467769</td>\n <td>1.386294</td>\n <td>0.062743</td>\n <td>0.063012</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>3793</th>\n <td>黑龙江省</td>\n <td>背压式</td>\n <td>超高压</td>\n <td>水冷-闭式循环</td>\n <td>5.375278</td>\n <td>4.872176</td>\n <td>3.820088</td>\n <td>5.525453</td>\n <td>0.067768</td>\n <td>0.068277</td>\n </tr>\n <tr>\n <th>3793</th>\n <td>黑龙江省</td>\n <td>背压式</td>\n <td>超高压</td>\n <td>水冷-闭式循环</td>\n <td>5.375278</td>\n <td>4.872176</td>\n <td>3.820088</td>\n <td>5.525453</td>\n <td>0.066563</td>\n <td>0.066854</td>\n </tr>\n <tr>\n <th>3793</th>\n <td>黑龙江省</td>\n <td>背压式</td>\n <td>超高压</td>\n <td>水冷-闭式循环</td>\n <td>5.375278</td>\n <td>4.872176</td>\n <td>3.820088</td>\n <td>5.525453</td>\n <td>0.068115</td>\n <td>0.068242</td>\n </tr>\n <tr>\n <th>3793</th>\n <td>黑龙江省</td>\n <td>背压式</td>\n <td>超高压</td>\n <td>水冷-闭式循环</td>\n <td>5.375278</td>\n <td>4.872176</td>\n <td>3.820088</td>\n <td>5.525453</td>\n <td>0.066680</td>\n <td>0.066995</td>\n </tr>\n <tr>\n <th>3793</th>\n <td>黑龙江省</td>\n <td>背压式</td>\n <td>超高压</td>\n <td>水冷-闭式循环</td>\n <td>5.375278</td>\n <td>4.872176</td>\n <td>3.820088</td>\n <td>5.525453</td>\n <td>0.067563</td>\n <td>0.067869</td>\n </tr>\n </tbody>\n</table>\n<p>8019537 rows × 10 columns</p>\n</div>"
|
||
},
|
||
"execution_count": 78,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"normaled_data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 79,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"target_cols = ['power_co2_factor', 'heat_co2_factor']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 80,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"save_data = normaled_data.groupby([x for x in normaled_data.columns if x not in target_cols])[target_cols].mean()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 81,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"save_data.reset_index().to_csv('./results/去煤种化数据.csv', encoding='utf-8-sig', index=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 81,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.7.13"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 4
|
||
} |