From 8237af04c7dacbed6d643afe4c6ab2ca40d16c7b Mon Sep 17 00:00:00 2001 From: zhaojinghao Date: Thu, 11 May 2023 14:18:08 +0800 Subject: [PATCH] add code --- .../data_extract-checkpoint.ipynb | 682 ++++ .../get_altitude-checkpoint.ipynb | 137 + .../xgboost多任务回归-checkpoint.ipynb | 2972 ++++++++++++++++ .../两张表特征对齐-checkpoint.ipynb | 759 ++++ ...attention+LSTM对天数据建模-checkpoint.ipynb | 737 ++++ ...煤种标准化的数据建模及预测-checkpoint.ipynb | 2299 ++++++++++++ .../特征分组建模_lightgbm-checkpoint.ipynb | 3156 +++++++++++++++++ data_extract.ipynb | 892 +++++ evaluation.ipynb | 144 + extract_plant.ipynb | 149 + get_altitude.ipynb | 291 ++ xgboost多任务回归.ipynb | 1809 ++++++++++ 两张表特征对齐.ipynb | 591 +++ 基于attention+LSTM对天数据建模.ipynb | 737 ++++ 基于煤种标准化的数据建模及预测.ipynb | 1536 ++++++++ 特征分组建模_lightgbm.ipynb | 3155 ++++++++++++++++ 省际测试.ipynb | 235 ++ 计算碳排放.ipynb | 701 ++++ 18 files changed, 20982 insertions(+) create mode 100644 .ipynb_checkpoints/data_extract-checkpoint.ipynb create mode 100644 .ipynb_checkpoints/get_altitude-checkpoint.ipynb create mode 100644 .ipynb_checkpoints/xgboost多任务回归-checkpoint.ipynb create mode 100644 .ipynb_checkpoints/两张表特征对齐-checkpoint.ipynb create mode 100644 .ipynb_checkpoints/基于attention+LSTM对天数据建模-checkpoint.ipynb create mode 100644 .ipynb_checkpoints/基于煤种标准化的数据建模及预测-checkpoint.ipynb create mode 100644 .ipynb_checkpoints/特征分组建模_lightgbm-checkpoint.ipynb create mode 100644 data_extract.ipynb create mode 100644 evaluation.ipynb create mode 100644 extract_plant.ipynb create mode 100644 get_altitude.ipynb create mode 100644 xgboost多任务回归.ipynb create mode 100644 两张表特征对齐.ipynb create mode 100644 基于attention+LSTM对天数据建模.ipynb create mode 100644 基于煤种标准化的数据建模及预测.ipynb create mode 100644 特征分组建模_lightgbm.ipynb create mode 100644 省际测试.ipynb create mode 100644 计算碳排放.ipynb diff --git a/.ipynb_checkpoints/data_extract-checkpoint.ipynb b/.ipynb_checkpoints/data_extract-checkpoint.ipynb new file mode 100644 index 0000000..e47808f --- /dev/null +++ b/.ipynb_checkpoints/data_extract-checkpoint.ipynb @@ -0,0 +1,682 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "635" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "file_path = [x for x in os.listdir('./new_data/') if x.endswith('xls') and '经济性' in x]\n", + "len(file_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "data = pd.read_excel(f'./new_data/{file_path[0]}', header=[3,4,5])" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['序号',\n", + " '机组编号',\n", + " '时间',\n", + " '发电量(万kWh)',\n", + " '供电量(万kWh)',\n", + " '标煤量(t)',\n", + " '发电用标煤量(t)',\n", + " '供热用标煤量(t)',\n", + " '利用小时(h)',\n", + " '平均负荷(MW)',\n", + " '出力系数(%)',\n", + " '出力系数(%)',\n", + " '出力系数(%)',\n", + " '工业供热量',\n", + " '工业热电比(%)',\n", + " '采暖供热量',\n", + " '采暖热电比(%)',\n", + " '总热电比(%)',\n", + " '总供热量',\n", + " '供热煤耗',\n", + " '给水泵汽轮机总耗热量',\n", + " '供电煤耗gce/(kWh)',\n", + " '综合厂用电率(%)',\n", + " '发电厂用电率(%)',\n", + " '供热厂用电率(%)',\n", + " '非生产厂用电率(%)',\n", + " '最新THA工况二类修正后汽机热耗率kJ/KWh',\n", + " '点火用油(kg/万kWh)',\n", + " '助燃用油(kg/万kWh)',\n", + " '主蒸汽压力(MPa)',\n", + " '主蒸汽温度(℃)',\n", + " '再热蒸汽温度(℃)',\n", + " '高加投入率(%)',\n", + " '给水温度(℃)',\n", + " '真空严密性V(Pa/min)',\n", + " '真空度(%)',\n", + " '凝汽器端差(℃)',\n", + " '凝结水过冷度(℃)',\n", + " '发电补给水率(%)',\n", + " '发电综合耗水率',\n", + " '排烟温度(℃)',\n", + " '飞灰含碳量(%)',\n", + " '空预器漏风率(%)',\n", + " '过热器减温水量(t/h)',\n", + " '再热器减温水量(t/h)',\n", + " '入厂煤低位热值(kJ/kg)',\n", + " '入炉煤低位热值(kJ/kg)',\n", + " '燃煤挥发份Var(%)',\n", + " '燃煤灰份Aar(%)',\n", + " '燃煤低位热值Qar,net(kJ/kg)',\n", + " '燃煤硫份Sar(%)',\n", + " '锅炉专业主要辅机耗电率 送风机耗电率(%)',\n", + " '锅炉专业主要辅机耗电率 引风机耗电率(%)',\n", + " '锅炉专业主要辅机耗电率 一次风机耗电率(%)',\n", + " '锅炉专业主要辅机耗电率 炉水泵耗电率(%)',\n", + " '锅炉专业主要辅机耗电率 给煤机耗电率(%)',\n", + " '锅炉专业主要辅机耗电率 磨煤机耗电率(%)',\n", + " '锅炉专业主要辅机耗电率 电除尘器耗电率(%)',\n", + " '锅炉专业主要辅机耗电率 除灰系统耗电率(%)',\n", + " '汽机专业主要辅机耗电率 凝结水泵耗电率(%)',\n", + " '汽机专业主要辅机耗电率 前置泵耗电率(%)',\n", + " '汽机专业主要辅机耗电率 电动给水泵耗电率(%)',\n", + " '汽机专业主要辅机耗电率 循环水泵耗电率(%)',\n", + " '汽机专业主要辅机耗电率 空冷风机耗电率(%)',\n", + " '汽机专业主要辅机耗电率 热网循环水泵耗电率(%)',\n", + " '环保专业耗电率 脱硫系统耗电率(%)',\n", + " '环保专业耗电率 脱销系统耗电率(%)',\n", + " '输煤专业耗电率输煤系统耗电率(%)',\n", + " '化学系统耗电率 (%)',\n", + " '化学系统耗电率 (%)']" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cols = [''.join([x for x in y if 'Unnamed' not in x]) for y in data.columns]\n", + "cols" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "data_list = list()\n", + "for file in file_path:\n", + " data = pd.read_excel(f'./new_data/{file}', header=[3,4,5])\n", + " data.columns = cols\n", + " plant = file.split('-')[0]\n", + " data['时间'] = data['时间'].astype(str)\n", + " use_data = data[~data['时间'].str.contains('半年')].copy()\n", + " use_data['电厂名称'] = plant\n", + " data_list.append(use_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
序号机组编号时间发电量(万kWh)供电量(万kWh)标煤量(t)发电用标煤量(t)供热用标煤量(t)利用小时(h)平均负荷(MW)...汽机专业主要辅机耗电率 电动给水泵耗电率(%)汽机专业主要辅机耗电率 循环水泵耗电率(%)汽机专业主要辅机耗电率 空冷风机耗电率(%)汽机专业主要辅机耗电率 热网循环水泵耗电率(%)环保专业耗电率 脱硫系统耗电率(%)环保专业耗电率 脱销系统耗电率(%)输煤专业耗电率输煤系统耗电率(%)化学系统耗电率 (%)化学系统耗电率 (%)电厂名称
011200890133.898438NaNNaNNaNNaN7210.709961113.171409...2.220.220NaNNaN1.1000.00.02NaNNaN万方发电厂(焦作爱依斯万方电力有限公司)
122200891543.898438NaNNaNNaNNaN7323.509766112.801718...2.280.220NaNNaN1.0000.00.02NaNNaN万方发电厂(焦作爱依斯万方电力有限公司)
231200788968.703125NaNNaNNaNNaN3550.750000109.667990...0.000.000NaNNaN0.0000.00.00NaNNaN万方发电厂(焦作爱依斯万方电力有限公司)
342200792342.750000NaNNaNNaNNaN3693.709961112.702716...0.000.000NaNNaN0.0000.00.00NaNNaN万方发电厂(焦作爱依斯万方电力有限公司)
0112021154638.030000146317.63597224.38398069.77199154.614418.229429204.300000...0.000.9360.00.311.0180.00.050.02NaN三河发电有限责任公司
..................................................................
101112005131498.000000NaNNaNNaNNaN6574.899902171.999827...0.000.000NaNNaN0.0000.00.00NaNNaN黔桂发电有限责任公司
111222005143572.000000NaNNaNNaNNaN7178.589844171.555440...0.000.000NaNNaN0.0000.00.00NaNNaN黔桂发电有限责任公司
121332005137886.000000NaNNaNNaNNaN6894.310059174.126182...0.000.000NaNNaN0.0000.00.00NaNNaN黔桂发电有限责任公司
131442005154531.000000NaNNaNNaNNaN7726.549805184.107277...0.000.000NaNNaN0.0000.00.00NaNNaN黔桂发电有限责任公司
141552005155969.000000NaNNaNNaNNaN7798.459961184.192211...0.000.000NaNNaN0.0000.00.00NaNNaN黔桂发电有限责任公司
\n", + "

15758 rows × 71 columns

\n", + "
" + ], + "text/plain": [ + " 序号 机组编号 时间 发电量(万kWh) 供电量(万kWh) 标煤量(t) 发电用标煤量(t) 供热用标煤量(t) \\\n", + "0 1 1 2008 90133.898438 NaN NaN NaN NaN \n", + "1 2 2 2008 91543.898438 NaN NaN NaN NaN \n", + "2 3 1 2007 88968.703125 NaN NaN NaN NaN \n", + "3 4 2 2007 92342.750000 NaN NaN NaN NaN \n", + "0 1 1 2021 154638.030000 146317.63 597224.38 398069.77 199154.61 \n", + ".. .. ... ... ... ... ... ... ... \n", + "10 11 1 2005 131498.000000 NaN NaN NaN NaN \n", + "11 12 2 2005 143572.000000 NaN NaN NaN NaN \n", + "12 13 3 2005 137886.000000 NaN NaN NaN NaN \n", + "13 14 4 2005 154531.000000 NaN NaN NaN NaN \n", + "14 15 5 2005 155969.000000 NaN NaN NaN NaN \n", + "\n", + " 利用小时(h) 平均负荷(MW) ... 汽机专业主要辅机耗电率 电动给水泵耗电率(%) \\\n", + "0 7210.709961 113.171409 ... 2.22 \n", + "1 7323.509766 112.801718 ... 2.28 \n", + "2 3550.750000 109.667990 ... 0.00 \n", + "3 3693.709961 112.702716 ... 0.00 \n", + "0 4418.229429 204.300000 ... 0.00 \n", + ".. ... ... ... ... \n", + "10 6574.899902 171.999827 ... 0.00 \n", + "11 7178.589844 171.555440 ... 0.00 \n", + "12 6894.310059 174.126182 ... 0.00 \n", + "13 7726.549805 184.107277 ... 0.00 \n", + "14 7798.459961 184.192211 ... 0.00 \n", + "\n", + " 汽机专业主要辅机耗电率 循环水泵耗电率(%) 汽机专业主要辅机耗电率 空冷风机耗电率(%) 汽机专业主要辅机耗电率 热网循环水泵耗电率(%) \\\n", + "0 0.220 NaN NaN \n", + "1 0.220 NaN NaN \n", + "2 0.000 NaN NaN \n", + "3 0.000 NaN NaN \n", + "0 0.936 0.0 0.31 \n", + ".. ... ... ... \n", + "10 0.000 NaN NaN \n", + "11 0.000 NaN NaN \n", + "12 0.000 NaN NaN \n", + "13 0.000 NaN NaN \n", + "14 0.000 NaN NaN \n", + "\n", + " 环保专业耗电率 脱硫系统耗电率(%) 环保专业耗电率 脱销系统耗电率(%) 输煤专业耗电率输煤系统耗电率(%) 化学系统耗电率 (%) \\\n", + "0 1.100 0.0 0.02 NaN \n", + "1 1.000 0.0 0.02 NaN \n", + "2 0.000 0.0 0.00 NaN \n", + "3 0.000 0.0 0.00 NaN \n", + "0 1.018 0.0 0.05 0.02 \n", + ".. ... ... ... ... \n", + "10 0.000 0.0 0.00 NaN \n", + "11 0.000 0.0 0.00 NaN \n", + "12 0.000 0.0 0.00 NaN \n", + "13 0.000 0.0 0.00 NaN \n", + "14 0.000 0.0 0.00 NaN \n", + "\n", + " 化学系统耗电率 (%) 电厂名称 \n", + "0 NaN 万方发电厂(焦作爱依斯万方电力有限公司) \n", + "1 NaN 万方发电厂(焦作爱依斯万方电力有限公司) \n", + "2 NaN 万方发电厂(焦作爱依斯万方电力有限公司) \n", + "3 NaN 万方发电厂(焦作爱依斯万方电力有限公司) \n", + "0 NaN 三河发电有限责任公司 \n", + ".. ... ... \n", + "10 NaN 黔桂发电有限责任公司 \n", + "11 NaN 黔桂发电有限责任公司 \n", + "12 NaN 黔桂发电有限责任公司 \n", + "13 NaN 黔桂发电有限责任公司 \n", + "14 NaN 黔桂发电有限责任公司 \n", + "\n", + "[15758 rows x 71 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.concat(data_list, axis=0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 机组信息" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['中国中信集团公司-32020-电厂机组数据查询-20220707.xls',\n", + " '中国中煤能源集团有限公司-34027-电厂机组数据查询-20220707.xls',\n", + " '中国华电集团有限公司-21060-电厂机组数据查询-20220707.xls',\n", + " '中国华能集团有限公司-17021-电厂机组数据查询-20220707.xls',\n", + " '中国大唐集团有限公司-61005-电厂机组数据查询-20220707.xls']" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "unit_data_files = [x for x in os.listdir('./new_data/') if '电厂机组' in x]\n", + "unit_data_files[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['序号',\n", + " '企业编码',\n", + " '电厂名称',\n", + " '简称',\n", + " '机组编号',\n", + " '铭牌容量 (MW)',\n", + " '投产时间',\n", + " '机组类型',\n", + " '参数分类',\n", + " '所处地区',\n", + " '机组产地',\n", + " '锅炉制造厂家',\n", + " '汽轮机制造厂家',\n", + " '发电机制造厂家',\n", + " '主变压器制造厂家',\n", + " '二级公司',\n", + " '所属集团',\n", + " '所属电网',\n", + " '所属电网']" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "unit_samples = pd.read_excel(f'./new_data/{unit_data_files[0]}', header=[3,4])\n", + "unit_cols = [''.join([x for x in y if 'Unnamed' not in x]) for y in unit_samples.columns]\n", + "unit_cols" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "unit_list = list()\n", + "for file in unit_data_files:\n", + " data = pd.read_excel(f'./new_data/{file}', header=[3,4])\n", + " data.columns = cols\n", + " plant = file.split('-')[0]\n", + " data['时间'] = data['时间'].astype(str)\n", + " use_data = data[~data['时间'].str.contains('半年')].copy()\n", + " use_data['电厂名称'] = plant\n", + " unit_list.append(use_data)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/.ipynb_checkpoints/get_altitude-checkpoint.ipynb b/.ipynb_checkpoints/get_altitude-checkpoint.ipynb new file mode 100644 index 0000000..876ef39 --- /dev/null +++ b/.ipynb_checkpoints/get_altitude-checkpoint.ipynb @@ -0,0 +1,137 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "888d089c-a9c8-4d2d-af74-dff1a8ccfefd", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import json\n", + "import time\n", + "from typing import List\n", + "import requests\n", + "import pandas as pd\n", + "\n", + "\n", + "class GetElevation:\n", + "\n", + " @classmethod\n", + " def __SendQuery(cls, latLngString: str) -> json:\n", + " query = ('https://api.opentopodata.org/v1/mapzen?locations={}&interpolation=bilinear'.format(latLngString))\n", + " res = requests.get(query).json()\n", + " if res[\"status\"] != \"OK\":\n", + " raise Exception(res[\"error\"])\n", + " return res\n", + "\n", + " def GetSingleElevation(self, latitude: float, longitude: float) -> float:\n", + " \"\"\"\n", + " 获取单个高程,输入经纬度格式为数值类型,返回值为高程float类型\n", + " :param latitude: 纬度\n", + " :param longitude: 经度\n", + " :return: 高程\n", + " \"\"\"\n", + " if latitude < -90 or latitude > 90:\n", + " raise Exception(\"纬度的范围应在-90-90之间!请检查数据源!\")\n", + " latLngString = str(latitude) + \",\" + str(longitude)\n", + " res = self.__SendQuery(latLngString)\n", + " elevation = res[\"results\"][0][\"elevation\"]\n", + " return elevation\n", + "\n", + " def GetMultiElevation(self, latitude: List[float], longitude: List[float]) -> List[float]:\n", + " \"\"\"\n", + " 获取数组类型的高程,输入经纬度格式为经度数组和纬度数组,返回值为高程数组\n", + " :param latitude:纬度数组\n", + " :param longitude:经度数组\n", + " :return:高程数组\n", + " \"\"\"\n", + " if len(latitude) != len(longitude):\n", + " raise Exception(\"纬度数组和经度数组长度不一致!请检查数据源!\")\n", + " for lat in latitude:\n", + " if lat < -90 or lat > 90:\n", + " raise Exception(\"纬度的范围应在-90-90之间!请检查数据源!\")\n", + " elevationList = []\n", + " hundredNums = len(latitude) // 100\n", + " # 查询整百的高程\n", + " for i in range(hundredNums):\n", + " latLngString = \"\"\n", + " for idx in range(100 * i, 100 * (i + 1)):\n", + " latLngString += (str(latitude[idx]) + \",\" + str(longitude[idx]) + \"|\")\n", + " res = self.__SendQuery(latLngString)\n", + " for idx in range(100):\n", + " elevationList.append(res[\"results\"][idx][\"elevation\"])\n", + " time.sleep(1)\n", + " # 查询剩余的不到100的高程\n", + " latLngString = \"\"\n", + " for i in range(hundredNums * 100, len(latitude)):\n", + " latLngString += (str(latitude[i]) + \",\" + str(longitude[i]) + \"|\")\n", + " res = self.__SendQuery(latLngString)\n", + " for i in range(len(latitude) - hundredNums * 100):\n", + " elevationList.append(res[\"results\"][i][\"elevation\"])\n", + " return elevationList\n", + "\n", + " def ExportToXlsx(self, latLongDf: pd.DataFrame, elevationList: List[float], outputPath: str) -> None:\n", + " \"\"\"\n", + " 如果用户可以传入一个DataFrame数据,可以将返回得到的高程拼接并输出\n", + " :param latLongDf: DataFrame数据\n", + " :param elevationList: 高程数组\n", + " :param outputPath: 输出路径\n", + " :return: 无返回值\n", + " \"\"\"\n", + " latLongDf[\"elevation\"] = elevationList\n", + " latLongDf.to_excel(outputPath, index=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "2a226b08-0c92-483e-b590-29a39dce6298", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "data = pd.read_excel('./lat_lon.xlsx')\n", + "data.columns = ['plant', 'longitude', 'latitude']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "91afb581-1994-47c4-85ca-d3ea3e13e95d", + "metadata": {}, + "outputs": [], + "source": [ + "multiEle = ele.GetMultiElevation(data[\"latitude\"], data[\"longitude\"])\n", + "print(multiEle)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/.ipynb_checkpoints/xgboost多任务回归-checkpoint.ipynb b/.ipynb_checkpoints/xgboost多任务回归-checkpoint.ipynb new file mode 100644 index 0000000..a810857 --- /dev/null +++ b/.ipynb_checkpoints/xgboost多任务回归-checkpoint.ipynb @@ -0,0 +1,2972 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from sklearn.multioutput import MultiOutputRegressor\n", + "import xgboost as xgb\n", + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
企业名称机组编号铭牌容量 (MW)机组类型参数分类冷凝器型式入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)煤种所处地区longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)
5740榆能榆神热电有限公司2350.0抽凝式超临界间接空冷25514.038.847.28烟煤陕西省109.82026538.30438311510.6617590.091483
\n", + "
" + ], + "text/plain": [ + " 企业名称 机组编号 铭牌容量 (MW) 机组类型 参数分类 冷凝器型式 入炉煤低位热值(kJ/kg) \\\n", + "5740 榆能榆神热电有限公司 2 350.0 抽凝式 超临界 间接空冷 25514.0 \n", + "\n", + " 燃煤挥发份Var(%) 燃煤灰份Aar(%) 煤种 所处地区 longitude latitude altitude \\\n", + "5740 38.84 7.28 烟煤 陕西省 109.820265 38.304383 1151 \n", + "\n", + " 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) \n", + "5740 0.661759 0.091483 " + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data = pd.read_excel('train_data.xlsx')\n", + "total_data.tail(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['企业名称', '机组编号', '铭牌容量 (MW)', '机组类型', '参数分类', '冷凝器型式', '入炉煤低位热值(kJ/kg)',\n", + " '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', '煤种', '所处地区', 'longitude', 'latitude',\n", + " 'altitude', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'],\n", + " dtype='object')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "num_cols = ['铭牌容量 (MW)', '入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', 'longitude', 'latitude', 'altitude', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)']\n", + "# object_cols = ['所处地区', '类型', '机组参数', '冷却型式']\n", + "object_cols = ['所处地区', '机组类型', '参数分类', '冷凝器型式']\n", + "# object_cols = ['所处地区', '机组类型', '参数分类', '冷凝器型式', '煤种']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def change_str(x):\n", + " if '空冷' in x:\n", + " return '空冷'\n", + " if '水冷' in x:\n", + " return '水冷'" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "total_data = total_data[total_data['发电碳排放因子(kg/kWh)'] <= 0.9].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['所处地区',\n", + " '机组类型',\n", + " '参数分类',\n", + " '冷凝器型式',\n", + " '铭牌容量 (MW)',\n", + " '入炉煤低位热值(kJ/kg)',\n", + " '燃煤挥发份Var(%)',\n", + " '燃煤灰份Aar(%)',\n", + " 'longitude',\n", + " 'latitude',\n", + " 'altitude']" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "use_cols = object_cols + [x for x in num_cols if '因子' not in x]\n", + "use_cols" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "total_data = total_data[~total_data['供热碳排放因子(kg/MJ)'].isna()].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(5732, 16)" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1092, 14)" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data.groupby(['企业名称', '机组编号']).count().shape" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "total_data['入炉煤低位热值(kJ/kg)'] = total_data['入炉煤低位热值(kJ/kg)'].apply(lambda x: x * 1000 if x < 100 else x * 1)\n", + "total_data['燃煤灰份Aar(%)'] = total_data['燃煤灰份Aar(%)'].apply(lambda x: x / 1000 if x > 10000 else x * 1)\n", + "total_data['燃煤挥发份Var(%)'] = total_data['燃煤挥发份Var(%)'].apply(lambda x: x / 1000 if x > 10000 else x * 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "total_data.altitude = total_data.altitude.apply(lambda x: 0 if x < 0 else x)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(5629, 16)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "use_data = total_data[(total_data['供热碳排放因子(kg/MJ)'] > 0.01)&(total_data['供热碳排放因子(kg/MJ)'] < 0.1)].copy()\n", + "use_data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import seaborn as sns" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "count 5629.000000\n", + "mean 0.070915\n", + "std 0.009967\n", + "min 0.010464\n", + "25% 0.065467\n", + "50% 0.071533\n", + "75% 0.077513\n", + "max 0.099905\n", + "Name: 供热碳排放因子(kg/MJ), dtype: float64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "use_data['供热碳排放因子(kg/MJ)'].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + } + ], + "source": [ + "train_data = use_data.groupby(use_cols)['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'].mean().reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)
0上海市供热式亚临界水冷300.018366.0026.0515.70121.47114031.06511330.5375740.070992
1上海市供热式亚临界水冷300.018426.0026.0515.70121.47114031.06511330.5455160.072476
2上海市供热式亚临界水冷300.019507.0026.4014.95121.47114031.06511330.5958490.064745
3上海市供热式亚临界水冷300.019599.0026.7811.58121.47114031.06511330.5844320.068390
4上海市供热式亚临界水冷300.020125.0024.9214.90121.47114031.06511330.6053690.066996
..........................................
3961黑龙江省纯凝式超高压水冷200.015941.2123.8314.73126.57564745.9185661180.5001720.064200
3962黑龙江省纯凝式超高压水冷210.015355.0042.0036.70131.69586446.580444910.5183010.063249
3963黑龙江省背压式超高压水冷-开式循环200.013396.0023.3915.66123.63914647.2106961510.2243120.053770
3964黑龙江省背压式超高压水冷-闭式循环215.015753.0036.2942.40129.60480344.6082022500.2908140.068027
3965黑龙江省背压式超高压水冷-闭式循环215.016471.1130.1038.67129.60480344.6082022500.3216350.067798
\n", + "

3966 rows × 13 columns

\n", + "
" + ], + "text/plain": [ + " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) \\\n", + "0 上海市 供热式 亚临界 水冷 300.0 18366.00 26.05 \n", + "1 上海市 供热式 亚临界 水冷 300.0 18426.00 26.05 \n", + "2 上海市 供热式 亚临界 水冷 300.0 19507.00 26.40 \n", + "3 上海市 供热式 亚临界 水冷 300.0 19599.00 26.78 \n", + "4 上海市 供热式 亚临界 水冷 300.0 20125.00 24.92 \n", + "... ... ... ... ... ... ... ... \n", + "3961 黑龙江省 纯凝式 超高压 水冷 200.0 15941.21 23.83 \n", + "3962 黑龙江省 纯凝式 超高压 水冷 210.0 15355.00 42.00 \n", + "3963 黑龙江省 背压式 超高压 水冷-开式循环 200.0 13396.00 23.39 \n", + "3964 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 15753.00 36.29 \n", + "3965 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 16471.11 30.10 \n", + "\n", + " 燃煤灰份Aar(%) longitude latitude altitude 发电碳排放因子(kg/kWh) \\\n", + "0 15.70 121.471140 31.065113 3 0.537574 \n", + "1 15.70 121.471140 31.065113 3 0.545516 \n", + "2 14.95 121.471140 31.065113 3 0.595849 \n", + "3 11.58 121.471140 31.065113 3 0.584432 \n", + "4 14.90 121.471140 31.065113 3 0.605369 \n", + "... ... ... ... ... ... \n", + "3961 14.73 126.575647 45.918566 118 0.500172 \n", + "3962 36.70 131.695864 46.580444 91 0.518301 \n", + "3963 15.66 123.639146 47.210696 151 0.224312 \n", + "3964 42.40 129.604803 44.608202 250 0.290814 \n", + "3965 38.67 129.604803 44.608202 250 0.321635 \n", + "\n", + " 供热碳排放因子(kg/MJ) \n", + "0 0.070992 \n", + "1 0.072476 \n", + "2 0.064745 \n", + "3 0.068390 \n", + "4 0.066996 \n", + "... ... \n", + "3961 0.064200 \n", + "3962 0.063249 \n", + "3963 0.053770 \n", + "3964 0.068027 \n", + "3965 0.067798 \n", + "\n", + "[3966 rows x 13 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_data" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "for col in num_cols:\n", + " if '因子' not in col:\n", + " train_data[col] = np.log1p(train_data[col])" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "train_data = train_data[train_data['供热碳排放因子(kg/MJ)']<=0.1].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "train_data = pd.get_dummies(train_data, columns=object_cols).dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "for col in train_data.columns:\n", + " train_data[col] = train_data[col].astype(float)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "feature_cols = [x for x in train_data.columns if '因子' not in x]\n", + "target_cols = [x for x in train_data.columns if '因子' in x]" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "train_data.to_csv('./train_data_processed.csv', encoding='utf-8-sig', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "train, test = train_test_split(train_data.dropna(), test_size=0.1, shuffle=True, random_state=666)\n", + "train, valid = train_test_split(train, test_size=0.2, shuffle=True, random_state=666)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "train_X, train_y = train[feature_cols], train[target_cols]\n", + "valid_X, valid_y = valid[feature_cols], valid[target_cols]\n", + "test_X, test_y = test[feature_cols], test[target_cols]" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "from sklearn.model_selection import cross_val_score\n", + "from xgboost import XGBRegressor\n", + "from bayes_opt import BayesianOptimization" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### 供电建模" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "params_xgb = {'objective': 'reg:squarederror',\n", + " 'booster': 'gbtree',\n", + " 'eta': 0.01,\n", + " 'max_depth': 60,\n", + " 'subsample': 0.85,\n", + " 'colsample_bytree': 0.85,\n", + " 'min_child_weight': 10,\n", + " 'seed': 10}\n", + "\n", + "num_boost_round = 2000\n", + "\n", + "dtrain = xgb.DMatrix(train_X, train_y.values[:, 0])\n", + "dvalid = xgb.DMatrix(valid_X, valid_y.values[:, 0])\n", + "watchlist = [(dtrain, 'train'), (dvalid, 'eval')]\n", + "\n", + "gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", + " early_stopping_rounds=200, verbose_eval=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "y_pred_xgb = gb_model.predict(xgb.DMatrix(test_X))\n", + "y_true_xgb = test_y.values[:, 0]" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE: 5.5E-04\n", + "RMSE: 0.0235\n", + "MAE: 0.0145\n", + "MAPE: 2.99 %\n", + "R_2: 0.9011\n" + ] + } + ], + "source": [ + "MSE = mean_squared_error(y_true_xgb, y_pred_xgb)\n", + "RMSE = np.sqrt(mean_squared_error(y_true_xgb, y_pred_xgb))\n", + "MAE = mean_absolute_error(y_true_xgb, y_pred_xgb)\n", + "MAPE = mean_absolute_percentage_error(y_true_xgb, y_pred_xgb)\n", + "R_2 = r2_score(y_true_xgb, y_pred_xgb)\n", + "print('MSE:', format(MSE, '.1E'))\n", + "print('RMSE:', round(RMSE, 4))\n", + "print('MAE:', round(MAE, 4))\n", + "print('MAPE:', round(MAPE*100, 2), '%')\n", + "print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差a" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "power_eva_df = pd.DataFrame.from_records([y_true_xgb, y_pred_xgb]).T\n", + "power_eva_df.to_csv('./发电测试结果.csv', index=False, encoding='utf-8-sig')" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "gb_model.save_model('./models/power_model.txt')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### 发热建模" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def xgb_cv(max_depth, learning_rate, min_child_weight, subsample, colsample_bytree, reg_alpha, gamma):\n", + " val = cross_val_score(estimator=XGBRegressor(max_depth=int(max_depth),\n", + " learning_rate=learning_rate,\n", + " n_estimators=2000,\n", + " min_child_weight=min_child_weight,\n", + " subsample=max(min(subsample, 1), 0),\n", + " colsample_bytree=max(min(colsample_bytree, 1), 0),\n", + " reg_alpha=max(reg_alpha, 0), gamma=gamma, objective='reg:squarederror',\n", + " booster='gbtree',\n", + " seed=10), X=train[feature_cols], y=train['供热碳排放因子(kg/MJ)'], scoring='r2',\n", + " cv=10).max()\n", + " return val" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "params_xgb = {'objective': 'reg:squarederror',\n", + " 'booster': 'gbtree',\n", + " 'eta': 0.01,\n", + " 'max_depth': 30,\n", + " 'subsample': 0.8,\n", + " 'colsample_bytree': 0.9,\n", + " 'min_child_weight': 10,\n", + " 'seed': 108}\n", + "\n", + "num_boost_round = 2000\n", + "\n", + "dtrain = xgb.DMatrix(train_X, train_y.values[:, 1])\n", + "dvalid = xgb.DMatrix(valid_X, valid_y.values[:, 1])\n", + "watchlist = [(dtrain, 'train'), (dvalid, 'eval')]\n", + "\n", + "gb_model_heat = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", + " early_stopping_rounds=200, verbose_eval=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "y_pred_heat = gb_model_heat.predict(xgb.DMatrix(test_X))\n", + "y_true_heat = test_y.values[:, 1]" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE: 2.9E-05\n", + "RMSE: 0.0054\n", + "MAE: 0.0024\n", + "MAPE: 5.19 %\n", + "R_2: 0.7392\n" + ] + } + ], + "source": [ + "MSE = mean_squared_error(y_true_heat, y_pred_heat)\n", + "RMSE = np.sqrt(mean_squared_error(y_true_heat, y_pred_heat))\n", + "MAE = mean_absolute_error(y_true_heat, y_pred_heat)\n", + "MAPE = mean_absolute_percentage_error(y_true_heat, y_pred_heat)\n", + "R_2 = r2_score(y_true_heat, y_pred_heat)\n", + "print('MSE:', format(MSE, '.1E'))\n", + "print('RMSE:', round(RMSE, 4))\n", + "print('MAE:', round(MAE, 4))\n", + "print('MAPE:', round(MAPE*100, 2), '%')\n", + "print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差a" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "pd.DataFrame.from_records([y_true_heat, y_pred_heat]).T.to_csv('./供热测试结果.csv', index=False, encoding='utf-8-sig')" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "gb_model_heat.save_model('./models/heat_model.txt')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### 煤种标准化工程" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + } + ], + "source": [ + "new_values = use_data.groupby(['煤种', '入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)'])['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)
煤种入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)
无烟煤19827.0011.182539.000.5614240.087794
烟煤16733.0022.5327.460.4415110.064259
16740.0018.9937.000.4872250.064535
27.9324.430.4184570.064747
16741.0026.6925.920.4336790.061822
..................
贫煤22149.0012.4325.100.6297330.082772
22272.5111.8322.970.6278770.083234
22475.978.9023.980.6203310.086574
23215.0011.0019.310.6822210.080249
23791.0011.0019.310.7017950.082240
\n", + "

3936 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ)\n", + "煤种 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%) \n", + "无烟煤 19827.00 11.18 2539.00 0.561424 0.087794\n", + "烟煤 16733.00 22.53 27.46 0.441511 0.064259\n", + " 16740.00 18.99 37.00 0.487225 0.064535\n", + " 27.93 24.43 0.418457 0.064747\n", + " 16741.00 26.69 25.92 0.433679 0.061822\n", + "... ... ...\n", + "贫煤 22149.00 12.43 25.10 0.629733 0.082772\n", + " 22272.51 11.83 22.97 0.627877 0.083234\n", + " 22475.97 8.90 23.98 0.620331 0.086574\n", + " 23215.00 11.00 19.31 0.682221 0.080249\n", + " 23791.00 11.00 19.31 0.701795 0.082240\n", + "\n", + "[3936 rows x 2 columns]" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_values" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
煤种入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)
0无烟煤19827.0011.182539.00
1烟煤16733.0022.5327.46
2烟煤16740.0018.9937.00
3烟煤16740.0027.9324.43
4烟煤16741.0026.6925.92
...............
3931贫煤22149.0012.4325.10
3932贫煤22272.5111.8322.97
3933贫煤22475.978.9023.98
3934贫煤23215.0011.0019.31
3935贫煤23791.0011.0019.31
\n", + "

3936 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " 煤种 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%)\n", + "0 无烟煤 19827.00 11.18 2539.00\n", + "1 烟煤 16733.00 22.53 27.46\n", + "2 烟煤 16740.00 18.99 37.00\n", + "3 烟煤 16740.00 27.93 24.43\n", + "4 烟煤 16741.00 26.69 25.92\n", + "... ... ... ... ...\n", + "3931 贫煤 22149.00 12.43 25.10\n", + "3932 贫煤 22272.51 11.83 22.97\n", + "3933 贫煤 22475.97 8.90 23.98\n", + "3934 贫煤 23215.00 11.00 19.31\n", + "3935 贫煤 23791.00 11.00 19.31\n", + "\n", + "[3936 rows x 4 columns]" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "coal_df = new_values.reset_index().drop(columns=['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'])\n", + "coal_df" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "coal_params_dict = dict()\n", + "for coal_type in coal_df['煤种'].unique().tolist():\n", + " options = coal_df[coal_df['煤种']==coal_type][['入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)']].values\n", + " coal_params_dict[coal_type] = options" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
电厂名称机组编号铭牌容量 (MW)机组类型参数分类冷凝器型式入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)煤种所处地区longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)
0江苏利港电力有限公司1350.0纯凝式亚临界水冷21602.0500026.0916.80烟煤江苏省120.09662031.9423611.00.5869900.076843
1江苏利港电力有限公司1350.0纯凝式亚临界水冷21926.8100026.6815.41烟煤江苏省120.09662031.9423611.00.6328590.077676
2江苏利港电力有限公司1350.0纯凝式亚临界水冷21261.9306226.4615.18烟煤江苏省120.09662031.9423611.00.6091960.074823
3江苏利港电力有限公司1350.0纯凝式亚临界水冷20840.0000026.4314.55烟煤江苏省120.09662031.9423611.00.6021780.081628
4江苏利港电力有限公司1350.0纯凝式亚临界水冷20706.0000026.4314.96烟煤江苏省120.09662031.9423611.00.5902540.081103
...................................................
5736浙江浙能电力股份有限公司台州发电厂8350.0纯凝式亚临界NaN21973.0000037.4317.12烟煤浙江省121.46584028.70462373.00.6283000.078776
5737浙江浙能电力股份有限公司台州发电厂8350.0纯凝式亚临界NaN21372.0000039.8718.01烟煤浙江省121.46584028.70462373.00.5950190.076622
5738浙江浙能电力股份有限公司台州发电厂8350.0纯凝式亚临界NaN20856.0000039.3219.74烟煤浙江省121.46584028.70462373.00.5657180.074772
5739榆能榆神热电有限公司1350.0供热式超临界间接空冷25514.0000038.847.28烟煤陕西省109.82026538.3043831151.00.6644560.091482
5740榆能榆神热电有限公司2350.0供热式超临界间接空冷25514.0000038.847.28烟煤陕西省109.82026538.3043831151.00.6617590.091483
\n", + "

5629 rows × 16 columns

\n", + "
" + ], + "text/plain": [ + " 电厂名称 机组编号 铭牌容量 (MW) 机组类型 参数分类 冷凝器型式 入炉煤低位热值(kJ/kg) \\\n", + "0 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 21602.05000 \n", + "1 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 21926.81000 \n", + "2 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 21261.93062 \n", + "3 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 20840.00000 \n", + "4 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 20706.00000 \n", + "... ... ... ... ... ... ... ... \n", + "5736 浙江浙能电力股份有限公司台州发电厂 8 350.0 纯凝式 亚临界 NaN 21973.00000 \n", + "5737 浙江浙能电力股份有限公司台州发电厂 8 350.0 纯凝式 亚临界 NaN 21372.00000 \n", + "5738 浙江浙能电力股份有限公司台州发电厂 8 350.0 纯凝式 亚临界 NaN 20856.00000 \n", + "5739 榆能榆神热电有限公司 1 350.0 供热式 超临界 间接空冷 25514.00000 \n", + "5740 榆能榆神热电有限公司 2 350.0 供热式 超临界 间接空冷 25514.00000 \n", + "\n", + " 燃煤挥发份Var(%) 燃煤灰份Aar(%) 煤种 所处地区 longitude latitude altitude \\\n", + "0 26.09 16.80 烟煤 江苏省 120.096620 31.942361 1.0 \n", + "1 26.68 15.41 烟煤 江苏省 120.096620 31.942361 1.0 \n", + "2 26.46 15.18 烟煤 江苏省 120.096620 31.942361 1.0 \n", + "3 26.43 14.55 烟煤 江苏省 120.096620 31.942361 1.0 \n", + "4 26.43 14.96 烟煤 江苏省 120.096620 31.942361 1.0 \n", + "... ... ... .. ... ... ... ... \n", + "5736 37.43 17.12 烟煤 浙江省 121.465840 28.704623 73.0 \n", + "5737 39.87 18.01 烟煤 浙江省 121.465840 28.704623 73.0 \n", + "5738 39.32 19.74 烟煤 浙江省 121.465840 28.704623 73.0 \n", + "5739 38.84 7.28 烟煤 陕西省 109.820265 38.304383 1151.0 \n", + "5740 38.84 7.28 烟煤 陕西省 109.820265 38.304383 1151.0 \n", + "\n", + " 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) \n", + "0 0.586990 0.076843 \n", + "1 0.632859 0.077676 \n", + "2 0.609196 0.074823 \n", + "3 0.602178 0.081628 \n", + "4 0.590254 0.081103 \n", + "... ... ... \n", + "5736 0.628300 0.078776 \n", + "5737 0.595019 0.076622 \n", + "5738 0.565718 0.074772 \n", + "5739 0.664456 0.091482 \n", + "5740 0.661759 0.091483 \n", + "\n", + "[5629 rows x 16 columns]" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "use_data" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitude煤种发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)
0上海市供热式亚临界水冷300.0121.47114031.0651133.0烟煤0.5375740.070992
1上海市供热式亚临界水冷300.0121.47114031.0651133.0烟煤0.5455160.072476
2上海市供热式亚临界水冷300.0121.47114031.0651133.0烟煤0.5958490.064745
3上海市供热式亚临界水冷300.0121.47114031.0651133.0烟煤0.5844320.068390
4上海市供热式亚临界水冷300.0121.47114031.0651133.0烟煤0.6053690.066996
....................................
3075黑龙江省纯凝式超高压水冷200.0126.57564745.918566118.0褐煤0.5001720.064200
3076黑龙江省纯凝式超高压水冷200.0129.60480344.608202250.0褐煤0.3782980.069663
3077黑龙江省纯凝式超高压水冷210.0131.69586446.58044491.0褐煤0.5183010.063249
3078黑龙江省纯凝式超高压水冷215.0129.60480344.608202250.0褐煤0.2908140.068027
3079黑龙江省纯凝式超高压水冷215.0129.60480344.608202250.0褐煤0.3216350.067798
\n", + "

3080 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude 煤种 \\\n", + "0 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 烟煤 \n", + "1 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 烟煤 \n", + "2 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 烟煤 \n", + "3 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 烟煤 \n", + "4 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 烟煤 \n", + "... ... ... ... ... ... ... ... ... .. \n", + "3075 黑龙江省 纯凝式 超高压 水冷 200.0 126.575647 45.918566 118.0 褐煤 \n", + "3076 黑龙江省 纯凝式 超高压 水冷 200.0 129.604803 44.608202 250.0 褐煤 \n", + "3077 黑龙江省 纯凝式 超高压 水冷 210.0 131.695864 46.580444 91.0 褐煤 \n", + "3078 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 褐煤 \n", + "3079 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 褐煤 \n", + "\n", + " 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) \n", + "0 0.537574 0.070992 \n", + "1 0.545516 0.072476 \n", + "2 0.595849 0.064745 \n", + "3 0.584432 0.068390 \n", + "4 0.605369 0.066996 \n", + "... ... ... \n", + "3075 0.500172 0.064200 \n", + "3076 0.378298 0.069663 \n", + "3077 0.518301 0.063249 \n", + "3078 0.290814 0.068027 \n", + "3079 0.321635 0.067798 \n", + "\n", + "[3080 rows x 11 columns]" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_use_data = use_data.groupby(use_cols+['煤种'])['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'].mean().reset_index().drop(columns=['入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)'])\n", + "new_use_data" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_use_data['coal_params'] = new_use_data['煤种'].apply(lambda x: coal_params_dict.get(x))" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_use_data.drop(columns='煤种', inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_data = new_use_data.explode(column='coal_params')" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)coal_params
0上海市供热式亚临界水冷300.0121.47114031.0651133.00.5375740.070992[16733.0, 22.53, 27.46]
0上海市供热式亚临界水冷300.0121.47114031.0651133.00.5375740.070992[16740.0, 18.99, 37.0]
0上海市供热式亚临界水冷300.0121.47114031.0651133.00.5375740.070992[16740.0, 27.93, 24.43]
0上海市供热式亚临界水冷300.0121.47114031.0651133.00.5375740.070992[16741.0, 26.69, 25.92]
0上海市供热式亚临界水冷300.0121.47114031.0651133.00.5375740.070992[16741.51, 19.51, 35.62]
....................................
3079黑龙江省纯凝式超高压水冷215.0129.60480344.608202250.00.3216350.067798[16723.0, 40.63, 39.94]
3079黑龙江省纯凝式超高压水冷215.0129.60480344.608202250.00.3216350.067798[16725.0, 26.36, 28.51]
3079黑龙江省纯凝式超高压水冷215.0129.60480344.608202250.00.3216350.067798[16725.19, 34.59, 37.71]
3079黑龙江省纯凝式超高压水冷215.0129.60480344.608202250.00.3216350.067798[16725.85, 43.2, 12.0]
3079黑龙江省纯凝式超高压水冷215.0129.60480344.608202250.00.3216350.067798[16729.0, 51.42, 17.33]
\n", + "

7151079 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n", + "0 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 \n", + "0 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 \n", + "0 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 \n", + "0 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 \n", + "0 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 \n", + "... ... ... ... ... ... ... ... ... \n", + "3079 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 \n", + "3079 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 \n", + "3079 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 \n", + "3079 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 \n", + "3079 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 \n", + "\n", + " 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) coal_params \n", + "0 0.537574 0.070992 [16733.0, 22.53, 27.46] \n", + "0 0.537574 0.070992 [16740.0, 18.99, 37.0] \n", + "0 0.537574 0.070992 [16740.0, 27.93, 24.43] \n", + "0 0.537574 0.070992 [16741.0, 26.69, 25.92] \n", + "0 0.537574 0.070992 [16741.51, 19.51, 35.62] \n", + "... ... ... ... \n", + "3079 0.321635 0.067798 [16723.0, 40.63, 39.94] \n", + "3079 0.321635 0.067798 [16725.0, 26.36, 28.51] \n", + "3079 0.321635 0.067798 [16725.19, 34.59, 37.71] \n", + "3079 0.321635 0.067798 [16725.85, 43.2, 12.0] \n", + "3079 0.321635 0.067798 [16729.0, 51.42, 17.33] \n", + "\n", + "[7151079 rows x 11 columns]" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_data" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_data['入炉煤低位热值(kJ/kg)'] = new_data.coal_params.apply(lambda x: x[0]).values\n", + "new_data['燃煤挥发份Var(%)'] = new_data.coal_params.apply(lambda x: x[1]).values\n", + "new_data['燃煤灰份Aar(%)'] = new_data.coal_params.apply(lambda x: x[2]).values" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "norm_data = new_data.drop(columns='coal_params')" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)
0上海市供热式亚临界水冷300.0121.4711431.0651133.00.5375740.07099216733.0022.5327.46
0上海市供热式亚临界水冷300.0121.4711431.0651133.00.5375740.07099216740.0018.9937.00
0上海市供热式亚临界水冷300.0121.4711431.0651133.00.5375740.07099216740.0027.9324.43
0上海市供热式亚临界水冷300.0121.4711431.0651133.00.5375740.07099216741.0026.6925.92
0上海市供热式亚临界水冷300.0121.4711431.0651133.00.5375740.07099216741.5119.5135.62
\n", + "
" + ], + "text/plain": [ + " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n", + "0 上海市 供热式 亚临界 水冷 300.0 121.47114 31.065113 3.0 \n", + "0 上海市 供热式 亚临界 水冷 300.0 121.47114 31.065113 3.0 \n", + "0 上海市 供热式 亚临界 水冷 300.0 121.47114 31.065113 3.0 \n", + "0 上海市 供热式 亚临界 水冷 300.0 121.47114 31.065113 3.0 \n", + "0 上海市 供热式 亚临界 水冷 300.0 121.47114 31.065113 3.0 \n", + "\n", + " 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%) \n", + "0 0.537574 0.070992 16733.00 22.53 27.46 \n", + "0 0.537574 0.070992 16740.00 18.99 37.00 \n", + "0 0.537574 0.070992 16740.00 27.93 24.43 \n", + "0 0.537574 0.070992 16741.00 26.69 25.92 \n", + "0 0.537574 0.070992 16741.51 19.51 35.62 " + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "norm_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "for col in num_cols:\n", + " norm_data[col] = np.log1p(norm_data[col])\n", + " # total_data[col] = (total_data[col] - total_data[col].min()) / (total_data[col].max() - total_data[col].min())\n", + "norm_data_dummpy = pd.get_dummies(norm_data, columns=object_cols)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "norm_data_dummpy.drop(columns=['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'], inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_xgb_data = xgb.DMatrix(norm_data_dummpy[feature_cols])" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [], + "source": [ + "norm_data['power_co2_factor'] = gb_model.predict(new_xgb_data)\n", + "norm_data['heat_co2_factor'] = gb_model_heat.predict(new_xgb_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "normaled_data = norm_data.drop(columns=['入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'])" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitudepower_co2_factorheat_co2_factor
0上海市供热式亚临界水冷5.7071104.8078753.4677691.3862940.4925580.064411
0上海市供热式亚临界水冷5.7071104.8078753.4677691.3862940.4740820.062117
0上海市供热式亚临界水冷5.7071104.8078753.4677691.3862940.4896230.063859
0上海市供热式亚临界水冷5.7071104.8078753.4677691.3862940.4936150.064382
0上海市供热式亚临界水冷5.7071104.8078753.4677691.3862940.4708830.062354
.................................
3079黑龙江省纯凝式超高压水冷5.3752784.8721763.8200885.5254530.3889120.067787
3079黑龙江省纯凝式超高压水冷5.3752784.8721763.8200885.5254530.3886060.065639
3079黑龙江省纯凝式超高压水冷5.3752784.8721763.8200885.5254530.3809710.068147
3079黑龙江省纯凝式超高压水冷5.3752784.8721763.8200885.5254530.4019730.065844
3079黑龙江省纯凝式超高压水冷5.3752784.8721763.8200885.5254530.3863690.065845
\n", + "

7151079 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n", + "0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n", + "0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n", + "0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n", + "0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n", + "0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n", + "... ... ... ... ... ... ... ... ... \n", + "3079 黑龙江省 纯凝式 超高压 水冷 5.375278 4.872176 3.820088 5.525453 \n", + "3079 黑龙江省 纯凝式 超高压 水冷 5.375278 4.872176 3.820088 5.525453 \n", + "3079 黑龙江省 纯凝式 超高压 水冷 5.375278 4.872176 3.820088 5.525453 \n", + "3079 黑龙江省 纯凝式 超高压 水冷 5.375278 4.872176 3.820088 5.525453 \n", + "3079 黑龙江省 纯凝式 超高压 水冷 5.375278 4.872176 3.820088 5.525453 \n", + "\n", + " power_co2_factor heat_co2_factor \n", + "0 0.492558 0.064411 \n", + "0 0.474082 0.062117 \n", + "0 0.489623 0.063859 \n", + "0 0.493615 0.064382 \n", + "0 0.470883 0.062354 \n", + "... ... ... \n", + "3079 0.388912 0.067787 \n", + "3079 0.388606 0.065639 \n", + "3079 0.380971 0.068147 \n", + "3079 0.401973 0.065844 \n", + "3079 0.386369 0.065845 \n", + "\n", + "[7151079 rows x 10 columns]" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "normaled_data" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": {}, + "outputs": [], + "source": [ + "target_cols = ['power_co2_factor', 'heat_co2_factor']" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "save_data = normaled_data.groupby([x for x in normaled_data.columns if x not in target_cols])[target_cols].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [], + "source": [ + "save_data.reset_index().to_csv('./results/去煤种化数据.csv', encoding='utf-8-sig', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/.ipynb_checkpoints/两张表特征对齐-checkpoint.ipynb b/.ipynb_checkpoints/两张表特征对齐-checkpoint.ipynb new file mode 100644 index 0000000..3384653 --- /dev/null +++ b/.ipynb_checkpoints/两张表特征对齐-checkpoint.ipynb @@ -0,0 +1,759 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "8950aafd-80e8-4078-874c-966efdc4b0ac", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "50832980-f7e1-4a19-a5e0-b8a378ebd39b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
电厂名称机组编号铭牌容量 (MW)机组类型参数分类冷凝器型式入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)煤种所处地区longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)
0江苏利港电力有限公司1350.0纯凝式亚临界水冷21602.0500026.0916.80烟煤江苏省120.0966231.9423611.00.5869900.076843
1江苏利港电力有限公司1350.0纯凝式亚临界水冷21926.8100026.6815.41烟煤江苏省120.0966231.9423611.00.6328590.077676
2江苏利港电力有限公司1350.0纯凝式亚临界水冷21261.9306226.4615.18烟煤江苏省120.0966231.9423611.00.6091960.074823
\n", + "
" + ], + "text/plain": [ + " 电厂名称 机组编号 铭牌容量 (MW) 机组类型 参数分类 冷凝器型式 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) \\\n", + "0 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 21602.05000 26.09 \n", + "1 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 21926.81000 26.68 \n", + "2 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 21261.93062 26.46 \n", + "\n", + " 燃煤灰份Aar(%) 煤种 所处地区 longitude latitude altitude 发电碳排放因子(kg/kWh) \\\n", + "0 16.80 烟煤 江苏省 120.09662 31.942361 1.0 0.586990 \n", + "1 15.41 烟煤 江苏省 120.09662 31.942361 1.0 0.632859 \n", + "2 15.18 烟煤 江苏省 120.09662 31.942361 1.0 0.609196 \n", + "\n", + " 供热碳排放因子(kg/MJ) \n", + "0 0.076843 \n", + "1 0.077676 \n", + "2 0.074823 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data = pd.read_csv('train_data.csv')\n", + "total_data.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "499cac72-c6a3-4b86-8aed-6fc010b12693", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5741, 16)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7ab5d82e-19bd-4aa4-9cd6-d2004718b00d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
发电类型地区城市企业名称机组编号机组状态机组数量单机容量(MW)总容量(MW)核心设备类型汽轮机类型压力参数冷却方式
0煤电安徽省安庆市国能神皖安庆发电有限责任公司1在役1320.0320.0煤粉锅炉凝气式亚临界水冷-开式循环
1煤电安徽省安庆市国能神皖安庆发电有限责任公司2在役1320.0320.0煤粉锅炉凝气式亚临界水冷-开式循环
2煤电安徽省安庆市国能神皖安庆发电有限责任公司3在役11000.01000.0煤粉锅炉凝气式超超临界水冷-闭式循环
\n", + "
" + ], + "text/plain": [ + " 发电类型 地区 城市 企业名称 机组编号 机组状态 机组数量 单机容量(MW) 总容量(MW) 核心设备类型 \\\n", + "0 煤电 安徽省 安庆市 国能神皖安庆发电有限责任公司 1 在役 1 320.0 320.0 煤粉锅炉 \n", + "1 煤电 安徽省 安庆市 国能神皖安庆发电有限责任公司 2 在役 1 320.0 320.0 煤粉锅炉 \n", + "2 煤电 安徽省 安庆市 国能神皖安庆发电有限责任公司 3 在役 1 1000.0 1000.0 煤粉锅炉 \n", + "\n", + " 汽轮机类型 压力参数 冷却方式 \n", + "0 凝气式 亚临界 水冷-开式循环 \n", + "1 凝气式 亚临界 水冷-开式循环 \n", + "2 凝气式 超超临界 水冷-闭式循环 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "unit_data = pd.read_excel('./data/煤电机组情况(含企业名称).xlsx')\n", + "unit_data.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "c4d54203-5343-43df-b594-f6a13e6f47a1", + "metadata": {}, + "outputs": [], + "source": [ + "total_data.rename(columns={'电厂名称':'企业名称'}, inplace=True)\n", + "total_data['机组编号'] = total_data['机组编号'].astype('str')\n", + "unit_data['机组编号'] = unit_data['机组编号'].astype('str')" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "757e26c3-cd1b-48a3-9668-78e13f40436f", + "metadata": {}, + "outputs": [], + "source": [ + "def change_type(x:str):\n", + " if pd.isna(x):\n", + " return x\n", + " x = x.strip()\n", + " if '纯凝' in x:\n", + " return '纯凝式'\n", + " if '供热' in x:\n", + " return '供热式'\n", + " if '煤粉' in x:\n", + " return '煤粉锅炉'\n", + " if x.startswith('循环流化床'):\n", + " return '循环流化床锅炉'\n", + " if '三废' in x:\n", + " return '三废炉'\n", + " if '直接空冷' in x:\n", + " return '直接空冷'\n", + " if '间接空冷' in x:\n", + " return '间接空冷'\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "fcc7c556-ae7b-4be1-9163-709ce1ca084c", + "metadata": {}, + "outputs": [], + "source": [ + "merge_data = total_data.merge(unit_data[['企业名称','机组编号','汽轮机类型', '压力参数', '冷却方式']], how='left', on=['企业名称', '机组编号'])" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "3af6ad2f-a881-4ee6-9a27-ecbe75c97b31", + "metadata": {}, + "outputs": [], + "source": [ + "merge_data['机组类型'] = merge_data.apply(lambda x: x['机组类型'] if pd.isna(x['汽轮机类型']) else x['汽轮机类型'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "dec103bc-c868-4557-ba83-9bbb02f8e9f8", + "metadata": {}, + "outputs": [], + "source": [ + "merge_data['参数分类'] = merge_data.apply(lambda x: x['参数分类'] if pd.isna(x['压力参数']) else x['压力参数'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "454273f0-51ab-4a75-9c44-9ae8b7cc2a79", + "metadata": {}, + "outputs": [], + "source": [ + "merge_data['冷凝器型式'] = merge_data.apply(lambda x: x['冷凝器型式'] if pd.isna(x['冷却方式']) else x['冷却方式'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "d3c9cb26-63b4-4c72-9c5b-d90a2c5867ca", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "水冷-闭式循环 2143\n", + "水冷 1166\n", + "水冷-开式循环 1101\n", + "空冷-直接空冷 492\n", + "直接空冷 241\n", + "空冷-间接空冷 154\n", + "间接空冷 74\n", + "空冷 19\n", + "其他 2\n", + "Name: 冷凝器型式, dtype: int64" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merge_data['冷凝器型式'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "30b2d793-7b44-434a-96e3-c6ce15295881", + "metadata": {}, + "outputs": [], + "source": [ + "use_data = merge_data[merge_data.columns[:-3]].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "fbdf13c0-6174-463b-9dd0-9ed736e6d126", + "metadata": {}, + "outputs": [], + "source": [ + "for col in ['机组类型', '参数分类', '冷凝器型式']:\n", + " use_data[col] = use_data[col].apply(change_type)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "9697f501-1fef-4f24-b9d9-bece28e2c867", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
企业名称机组编号铭牌容量 (MW)机组类型参数分类冷凝器型式入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)煤种所处地区longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)
0江苏利港电力有限公司1350.0凝气式亚临界水冷-开式循环21602.0500026.0916.80烟煤江苏省120.09662031.9423611.00.5869900.076843
1江苏利港电力有限公司1350.0凝气式亚临界水冷-开式循环21926.8100026.6815.41烟煤江苏省120.09662031.9423611.00.6328590.077676
2江苏利港电力有限公司1350.0凝气式亚临界水冷-开式循环21261.9306226.4615.18烟煤江苏省120.09662031.9423611.00.6091960.074823
3江苏利港电力有限公司1350.0凝气式亚临界水冷-开式循环20840.0000026.4314.55烟煤江苏省120.09662031.9423611.00.6021780.081628
4江苏利港电力有限公司1350.0凝气式亚临界水冷-开式循环20706.0000026.4314.96烟煤江苏省120.09662031.9423611.00.5902540.081103
...................................................
5736浙江浙能电力股份有限公司台州发电厂8350.0凝气式亚临界水冷-开式循环21973.0000037.4317.12烟煤浙江省121.46584028.70462373.00.6283000.078776
5737浙江浙能电力股份有限公司台州发电厂8350.0凝气式亚临界水冷-开式循环21372.0000039.8718.01烟煤浙江省121.46584028.70462373.00.5950190.076622
5738浙江浙能电力股份有限公司台州发电厂8350.0凝气式亚临界水冷-开式循环20856.0000039.3219.74烟煤浙江省121.46584028.70462373.00.5657180.074772
5739榆能榆神热电有限公司1350.0抽凝式超临界间接空冷25514.0000038.847.28烟煤陕西省109.82026538.3043831151.00.6644560.091482
5740榆能榆神热电有限公司2350.0抽凝式超临界间接空冷25514.0000038.847.28烟煤陕西省109.82026538.3043831151.00.6617590.091483
\n", + "

5741 rows × 16 columns

\n", + "
" + ], + "text/plain": [ + " 企业名称 机组编号 铭牌容量 (MW) 机组类型 参数分类 冷凝器型式 入炉煤低位热值(kJ/kg) \\\n", + "0 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 21602.05000 \n", + "1 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 21926.81000 \n", + "2 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 21261.93062 \n", + "3 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 20840.00000 \n", + "4 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 20706.00000 \n", + "... ... ... ... ... ... ... ... \n", + "5736 浙江浙能电力股份有限公司台州发电厂 8 350.0 凝气式 亚临界 水冷-开式循环 21973.00000 \n", + "5737 浙江浙能电力股份有限公司台州发电厂 8 350.0 凝气式 亚临界 水冷-开式循环 21372.00000 \n", + "5738 浙江浙能电力股份有限公司台州发电厂 8 350.0 凝气式 亚临界 水冷-开式循环 20856.00000 \n", + "5739 榆能榆神热电有限公司 1 350.0 抽凝式 超临界 间接空冷 25514.00000 \n", + "5740 榆能榆神热电有限公司 2 350.0 抽凝式 超临界 间接空冷 25514.00000 \n", + "\n", + " 燃煤挥发份Var(%) 燃煤灰份Aar(%) 煤种 所处地区 longitude latitude altitude \\\n", + "0 26.09 16.80 烟煤 江苏省 120.096620 31.942361 1.0 \n", + "1 26.68 15.41 烟煤 江苏省 120.096620 31.942361 1.0 \n", + "2 26.46 15.18 烟煤 江苏省 120.096620 31.942361 1.0 \n", + "3 26.43 14.55 烟煤 江苏省 120.096620 31.942361 1.0 \n", + "4 26.43 14.96 烟煤 江苏省 120.096620 31.942361 1.0 \n", + "... ... ... .. ... ... ... ... \n", + "5736 37.43 17.12 烟煤 浙江省 121.465840 28.704623 73.0 \n", + "5737 39.87 18.01 烟煤 浙江省 121.465840 28.704623 73.0 \n", + "5738 39.32 19.74 烟煤 浙江省 121.465840 28.704623 73.0 \n", + "5739 38.84 7.28 烟煤 陕西省 109.820265 38.304383 1151.0 \n", + "5740 38.84 7.28 烟煤 陕西省 109.820265 38.304383 1151.0 \n", + "\n", + " 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) \n", + "0 0.586990 0.076843 \n", + "1 0.632859 0.077676 \n", + "2 0.609196 0.074823 \n", + "3 0.602178 0.081628 \n", + "4 0.590254 0.081103 \n", + "... ... ... \n", + "5736 0.628300 0.078776 \n", + "5737 0.595019 0.076622 \n", + "5738 0.565718 0.074772 \n", + "5739 0.664456 0.091482 \n", + "5740 0.661759 0.091483 \n", + "\n", + "[5741 rows x 16 columns]" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "use_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ff803c5a-5e56-462b-81fc-639877395d69", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/.ipynb_checkpoints/基于attention+LSTM对天数据建模-checkpoint.ipynb b/.ipynb_checkpoints/基于attention+LSTM对天数据建模-checkpoint.ipynb new file mode 100644 index 0000000..10dda4d --- /dev/null +++ b/.ipynb_checkpoints/基于attention+LSTM对天数据建模-checkpoint.ipynb @@ -0,0 +1,737 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "outputs": [ + { + "ename": "ImportError", + "evalue": "cannot import name 'get_config' from 'tensorflow.python.eager.context' (C:\\Users\\zhaojh\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\context.py)", + "output_type": "error", + "traceback": [ + "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[1;31mImportError\u001B[0m Traceback (most recent call last)", + "\u001B[1;32m~\\AppData\\Local\\Temp\\ipykernel_7812\\4169542727.py\u001B[0m in \u001B[0;36m\u001B[1;34m\u001B[0m\n\u001B[0;32m 1\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mnumpy\u001B[0m \u001B[1;32mas\u001B[0m \u001B[0mnp\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 2\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mpandas\u001B[0m \u001B[1;32mas\u001B[0m \u001B[0mpd\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m----> 3\u001B[1;33m \u001B[1;32mimport\u001B[0m \u001B[0mkeras\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 4\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0mkeras\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mlayers\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mDense\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mConv1D\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mInput\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mBidirectional\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mLSTM\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mMultiply\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mDropout\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mFlatten\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mSoftmax\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mLambda\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 5\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0mkeras\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mmodels\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mModel\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\keras\\__init__.py\u001B[0m in \u001B[0;36m\u001B[1;34m\u001B[0m\n\u001B[0;32m 23\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 24\u001B[0m \u001B[1;31m# See b/110718070#comment18 for more details about this import.\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m---> 25\u001B[1;33m \u001B[1;32mfrom\u001B[0m \u001B[0mkeras\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mmodels\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 26\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 27\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0mkeras\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mengine\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0minput_layer\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mInput\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\keras\\models.py\u001B[0m in \u001B[0;36m\u001B[1;34m\u001B[0m\n\u001B[0;32m 17\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 18\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mtensorflow\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mcompat\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mv2\u001B[0m \u001B[1;32mas\u001B[0m \u001B[0mtf\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m---> 19\u001B[1;33m \u001B[1;32mfrom\u001B[0m \u001B[0mkeras\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mbackend\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 20\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0mkeras\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mmetrics\u001B[0m \u001B[1;32mas\u001B[0m \u001B[0mmetrics_module\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 21\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0mkeras\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0moptimizer_v1\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\keras\\backend.py\u001B[0m in \u001B[0;36m\u001B[1;34m\u001B[0m\n\u001B[0;32m 35\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0mtensorflow\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mpython\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mdistribute\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mdistribute_coordinator\u001B[0m \u001B[1;32mas\u001B[0m \u001B[0mdc\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 36\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0mtensorflow\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mpython\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mdistribute\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mdistribute_coordinator_context\u001B[0m \u001B[1;32mas\u001B[0m \u001B[0mdc_context\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m---> 37\u001B[1;33m \u001B[1;32mfrom\u001B[0m \u001B[0mtensorflow\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mpython\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0meager\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mcontext\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mget_config\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 38\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0mtensorflow\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mpython\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mframework\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mconfig\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 39\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0mkeras\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mbackend_config\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;31mImportError\u001B[0m: cannot import name 'get_config' from 'tensorflow.python.eager.context' (C:\\Users\\zhaojh\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\context.py)" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import keras\n", + "from keras.layers import Dense, Conv1D, Input, Bidirectional, LSTM, Multiply, Dropout, Flatten, Softmax, Lambda\n", + "from keras.models import Model" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "data = pd.read_csv('./train_data_processed.csv')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "obj_cols = data.columns[-32:]\n", + "num_cols = [x for x in data.columns if x not in obj_cols]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [], + "source": [ + "maxs = data[num_cols].max()\n", + "mins = data[num_cols].min()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [], + "source": [ + "for col in num_cols:\n", + " data[col] = (data[col] - mins[col]) / (maxs[col] - mins[col])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [], + "source": [ + "optim = keras.optimizers.Adam(learning_rate=5e-4)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [], + "source": [ + "def build_model(n_features, n_outs):\n", + " inputs = Input(shape=(1, n_features))\n", + " x = Conv1D(filters=64, kernel_size=1, activation='relu')(inputs)\n", + " x = Dropout(rate=0.1)(x)\n", + " lstm_out = Bidirectional(LSTM(units=128, return_sequences=True))(x)\n", + " attention_pre = Dense(1, name='attention_vec')(lstm_out)\n", + " attention_probs = Softmax()(attention_pre)\n", + " attention_mul = Multiply()([attention_probs, lstm_out])\n", + " attention_mul = Flatten()(attention_mul)\n", + " output = Dense(32, activation='relu')(attention_mul)\n", + " output = Dense(n_outs, activation='sigmoid')(output)\n", + " model = Model(inputs=[inputs], outputs=output)\n", + " model.summary()\n", + " model.compile(loss='mse', optimizer=optim,)\n", + " return model" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"model_1\"\n", + "__________________________________________________________________________________________________\n", + "Layer (type) Output Shape Param # Connected to \n", + "==================================================================================================\n", + "input_1 (InputLayer) (None, 1, 251) 0 \n", + "__________________________________________________________________________________________________\n", + "conv1d_1 (Conv1D) (None, 1, 64) 16128 input_1[0][0] \n", + "__________________________________________________________________________________________________\n", + "dropout_1 (Dropout) (None, 1, 64) 0 conv1d_1[0][0] \n", + "__________________________________________________________________________________________________\n", + "bidirectional_1 (Bidirectional) (None, 1, 256) 197632 dropout_1[0][0] \n", + "__________________________________________________________________________________________________\n", + "attention_vec (Dense) (None, 1, 1) 257 bidirectional_1[0][0] \n", + "__________________________________________________________________________________________________\n", + "softmax_1 (Softmax) (None, 1, 1) 0 attention_vec[0][0] \n", + "__________________________________________________________________________________________________\n", + "multiply_1 (Multiply) (None, 1, 256) 0 softmax_1[0][0] \n", + " bidirectional_1[0][0] \n", + "__________________________________________________________________________________________________\n", + "flatten_1 (Flatten) (None, 256) 0 multiply_1[0][0] \n", + "__________________________________________________________________________________________________\n", + "dense_1 (Dense) (None, 32) 8224 flatten_1[0][0] \n", + "__________________________________________________________________________________________________\n", + "dense_2 (Dense) (None, 1) 33 dense_1[0][0] \n", + "==================================================================================================\n", + "Total params: 222,274\n", + "Trainable params: 222,274\n", + "Non-trainable params: 0\n", + "__________________________________________________________________________________________________\n" + ] + } + ], + "source": [ + "model = build_model(len(data.columns) - 1, 1)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [], + "source": [ + "from tensorflow.python.keras.utils.vis_utils import plot_model" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": "" + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "plot_model(model, to_file='model.png')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 18, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 19, + "outputs": [ + { + "data": { + "text/plain": "251" + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "feature_cols = [x for x in data.columns if x != '燃料消耗量']\n", + "len(feature_cols)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 20, + "outputs": [], + "source": [ + "train_data, valid = train_test_split(data, test_size=0.2, shuffle=True, random_state=666)\n", + "valid_data, test_data = train_test_split(valid, test_size=0.5, shuffle=True, random_state=666)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 21, + "outputs": [], + "source": [ + "X_train, Y_train = train_data[feature_cols], train_data['燃料消耗量']\n", + "X_valid, Y_valid = valid_data[feature_cols], valid_data['燃料消耗量']\n", + "X_test, Y_test = test_data[feature_cols], test_data['燃料消耗量']" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 22, + "outputs": [ + { + "data": { + "text/plain": "((922, 1, 251), (922, 1))" + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_train = np.expand_dims(X_train.values, axis=1)\n", + "y_train = Y_train.values.reshape(-1, 1)\n", + "x_train.shape, y_train.shape" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 23, + "outputs": [], + "source": [ + "x_valid = np.expand_dims(X_valid.values, axis=1)\n", + "y_valid = Y_valid.values.reshape(-1, 1)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 24, + "outputs": [], + "source": [ + "x_test = np.expand_dims(X_test.values, axis=1)\n", + "y_test = Y_test.values.reshape(-1, 1)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 25, + "outputs": [], + "source": [ + "callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss', patience=int(10)),\n", + " keras.callbacks.ModelCheckpoint('./best_model.h5', monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1)]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 26, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train on 922 samples, validate on 116 samples\n", + "Epoch 1/100\n", + "922/922 [==============================] - 1s 1ms/step - loss: 0.0396 - val_loss: 0.0128\n", + "Epoch 2/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0074 - val_loss: 0.0057\n", + "Epoch 3/100\n", + "922/922 [==============================] - 0s 78us/step - loss: 0.0048 - val_loss: 0.0040\n", + "Epoch 4/100\n", + "922/922 [==============================] - 0s 61us/step - loss: 0.0035 - val_loss: 0.0028\n", + "Epoch 5/100\n", + "922/922 [==============================] - 0s 77us/step - loss: 0.0030 - val_loss: 0.0023\n", + "Epoch 6/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0025 - val_loss: 0.0020\n", + "Epoch 7/100\n", + "922/922 [==============================] - 0s 86us/step - loss: 0.0023 - val_loss: 0.0020\n", + "Epoch 8/100\n", + "922/922 [==============================] - 0s 78us/step - loss: 0.0023 - val_loss: 0.0018\n", + "Epoch 9/100\n", + "922/922 [==============================] - 0s 67us/step - loss: 0.0022 - val_loss: 0.0017\n", + "Epoch 10/100\n", + "922/922 [==============================] - 0s 61us/step - loss: 0.0019 - val_loss: 0.0016\n", + "Epoch 11/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0019 - val_loss: 0.0016\n", + "Epoch 12/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0019 - val_loss: 0.0017\n", + "Epoch 13/100\n", + "922/922 [==============================] - 0s 52us/step - loss: 0.0019 - val_loss: 0.0017\n", + "Epoch 14/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0018 - val_loss: 0.0015\n", + "Epoch 15/100\n", + "922/922 [==============================] - 0s 87us/step - loss: 0.0018 - val_loss: 0.0015\n", + "Epoch 16/100\n", + "922/922 [==============================] - 0s 52us/step - loss: 0.0016 - val_loss: 0.0013\n", + "Epoch 17/100\n", + "922/922 [==============================] - 0s 68us/step - loss: 0.0015 - val_loss: 0.0014\n", + "Epoch 18/100\n", + "922/922 [==============================] - 0s 73us/step - loss: 0.0014 - val_loss: 0.0013\n", + "Epoch 19/100\n", + "922/922 [==============================] - 0s 52us/step - loss: 0.0017 - val_loss: 0.0014\n", + "Epoch 20/100\n", + "922/922 [==============================] - 0s 70us/step - loss: 0.0017 - val_loss: 0.0013\n", + "Epoch 21/100\n", + "922/922 [==============================] - 0s 73us/step - loss: 0.0015 - val_loss: 0.0013\n", + "Epoch 22/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0014 - val_loss: 0.0013\n", + "Epoch 23/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0014 - val_loss: 0.0012\n", + "Epoch 24/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0013 - val_loss: 0.0014\n", + "Epoch 25/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0013 - val_loss: 0.0012\n", + "Epoch 26/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0013 - val_loss: 0.0011\n", + "Epoch 27/100\n", + "922/922 [==============================] - 0s 65us/step - loss: 0.0013 - val_loss: 0.0012\n", + "Epoch 28/100\n", + "922/922 [==============================] - 0s 89us/step - loss: 0.0013 - val_loss: 0.0013\n", + "Epoch 29/100\n", + "922/922 [==============================] - 0s 52us/step - loss: 0.0013 - val_loss: 0.0011\n", + "Epoch 30/100\n", + "922/922 [==============================] - 0s 70us/step - loss: 0.0013 - val_loss: 0.0012\n", + "Epoch 31/100\n", + "922/922 [==============================] - 0s 60us/step - loss: 0.0014 - val_loss: 0.0013\n", + "Epoch 32/100\n", + "922/922 [==============================] - 0s 70us/step - loss: 0.0012 - val_loss: 0.0012\n", + "Epoch 33/100\n", + "922/922 [==============================] - 0s 51us/step - loss: 0.0013 - val_loss: 0.0012\n", + "Epoch 34/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0012 - val_loss: 0.0012\n", + "Epoch 35/100\n", + "922/922 [==============================] - 0s 60us/step - loss: 0.0013 - val_loss: 0.0014\n", + "Epoch 36/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0012 - val_loss: 0.0011\n", + "Epoch 37/100\n", + "922/922 [==============================] - 0s 62us/step - loss: 0.0011 - val_loss: 0.0011\n", + "Epoch 38/100\n", + "922/922 [==============================] - 0s 77us/step - loss: 0.0012 - val_loss: 0.0012\n", + "Epoch 39/100\n", + "922/922 [==============================] - 0s 60us/step - loss: 0.0013 - val_loss: 0.0011\n", + "Epoch 40/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0012 - val_loss: 0.0011\n", + "Epoch 41/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0012 - val_loss: 0.0014\n", + "Epoch 42/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0013 - val_loss: 0.0014\n", + "Epoch 43/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0013 - val_loss: 0.0011\n", + "Epoch 44/100\n", + "922/922 [==============================] - 0s 51us/step - loss: 0.0012 - val_loss: 0.0011\n", + "Epoch 45/100\n", + "922/922 [==============================] - 0s 95us/step - loss: 0.0011 - val_loss: 0.0011\n", + "Epoch 46/100\n", + "922/922 [==============================] - 0s 68us/step - loss: 0.0011 - val_loss: 0.0011\n", + "Epoch 47/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0011 - val_loss: 0.0012\n", + "Epoch 48/100\n", + "922/922 [==============================] - 0s 66us/step - loss: 0.0011 - val_loss: 0.0010\n", + "Epoch 49/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 9.9569e-04 - val_loss: 9.4151e-04\n", + "Epoch 50/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0010 - val_loss: 0.0011\n", + "Epoch 51/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0010 - val_loss: 0.0011\n", + "Epoch 52/100\n", + "922/922 [==============================] - 0s 77us/step - loss: 0.0010 - val_loss: 9.9602e-04\n", + "Epoch 53/100\n", + "922/922 [==============================] - 0s 52us/step - loss: 9.3358e-04 - val_loss: 0.0012\n", + "Epoch 54/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0010 - val_loss: 9.6337e-04\n", + "Epoch 55/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 9.9897e-04 - val_loss: 9.9428e-04\n", + "Epoch 56/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0010 - val_loss: 9.1976e-04\n", + "Epoch 57/100\n", + "922/922 [==============================] - 0s 60us/step - loss: 9.8333e-04 - val_loss: 0.0011\n", + "Epoch 58/100\n", + "922/922 [==============================] - 0s 86us/step - loss: 0.0011 - val_loss: 0.0010\n", + "Epoch 59/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0010 - val_loss: 0.0010\n", + "Epoch 60/100\n", + "922/922 [==============================] - 0s 51us/step - loss: 9.6106e-04 - val_loss: 9.5494e-04\n", + "Epoch 61/100\n", + "922/922 [==============================] - 0s 87us/step - loss: 9.1071e-04 - val_loss: 8.9771e-04\n", + "Epoch 62/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 9.1379e-04 - val_loss: 9.4967e-04\n", + "Epoch 63/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 9.3075e-04 - val_loss: 9.1627e-04\n", + "Epoch 64/100\n", + "922/922 [==============================] - 0s 78us/step - loss: 8.8605e-04 - val_loss: 9.3663e-04\n", + "Epoch 65/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 9.5708e-04 - val_loss: 0.0011\n", + "Epoch 66/100\n", + "922/922 [==============================] - 0s 68us/step - loss: 9.5701e-04 - val_loss: 8.9826e-04\n", + "Epoch 67/100\n", + "922/922 [==============================] - 0s 60us/step - loss: 9.4454e-04 - val_loss: 0.0011\n", + "Epoch 68/100\n", + "922/922 [==============================] - 0s 74us/step - loss: 9.5393e-04 - val_loss: 9.7981e-04\n", + "Epoch 69/100\n", + "922/922 [==============================] - 0s 104us/step - loss: 9.5125e-04 - val_loss: 0.0010\n", + "Epoch 70/100\n", + "922/922 [==============================] - 0s 78us/step - loss: 9.5720e-04 - val_loss: 9.7615e-04\n", + "Epoch 71/100\n", + "922/922 [==============================] - 0s 64us/step - loss: 9.2241e-04 - val_loss: 0.0010\n" + ] + }, + { + "data": { + "text/plain": "" + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.fit(x_train, y_train, epochs=100, batch_size=32, validation_data=(x_test, y_test), shuffle=True,\n", + " callbacks=callbacks)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 27, + "outputs": [], + "source": [ + "y_pred = model.predict(x_test)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 28, + "outputs": [ + { + "data": { + "text/plain": "array([0.30161506, 0.12067786, 0.43900865, 0.4143401 , 0.11434203,\n 0.87028706, 0.15387392, 0.86223227, 0.8570186 , 0.4433931 ,\n 0.7649788 , 0.36369222, 0.33063045, 0.7437426 , 0.3493362 ,\n 0.7671248 , 0.7743846 , 0.12363896, 0.4528606 , 0.75037146,\n 0.45131576, 0.34990048, 0.7552419 , 0.7508755 , 0.7558205 ,\n 0.3391131 , 0.76618046, 0.38217723, 0.30887872, 0.36930698,\n 0.3591324 , 0.83817935, 0.9321221 , 0.35430533, 0.11836711,\n 0.764429 , 0.7478696 , 0.74976325, 0.3656214 , 0.3482211 ,\n 0.3658831 , 0.35415024, 0.29030812, 0.7965492 , 0.9372817 ,\n 0.11179626, 0.33758143, 0.305908 , 0.12149343, 0.41378874,\n 0.09611899, 0.36266702, 0.76215094, 0.41939664, 0.7642038 ,\n 0.36630815, 0.36369124, 0.775969 , 0.7431689 , 0.49149197,\n 0.35072863, 0.7608663 , 0.88904417, 0.11546668, 0.32508087,\n 0.78478754, 0.2949888 , 0.9328996 , 0.26087016, 0.15387377,\n 0.7867287 , 0.35840425, 0.8485855 , 0.36474293, 0.86086893,\n 0.85082245, 0.37929475, 0.88898706, 0.44798538, 0.74498856,\n 0.7642088 , 0.9374167 , 0.24628928, 0.1150094 , 0.35409844,\n 0.34573317, 0.1182591 , 0.35334843, 0.8806509 , 0.3744196 ,\n 0.12240422, 0.7410463 , 0.3571657 , 0.44970232, 0.8927134 ,\n 0.76465344, 0.7640152 , 0.33744502, 0.7715051 , 0.44094718,\n 0.33831298, 0.93699497, 0.30656263, 0.10126469, 0.8242742 ,\n 0.85100025, 0.42451733, 0.362445 , 0.77677643, 0.40487826,\n 0.78558755, 0.339495 , 0.8240729 , 0.7534524 , 0.93057597,\n 0.3128613 ], dtype=float32)" + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred = np.squeeze(y_pred)\n", + "y_pred" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 29, + "outputs": [], + "source": [ + "from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 30, + "outputs": [], + "source": [ + "y_true = np.squeeze(y_test)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 31, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE: 1.02E-03\n", + "RMSE: 0.032\n", + "MAE: 0.0236\n", + "MAPE: 7.07%\n", + "R_2: 0.9858\n" + ] + } + ], + "source": [ + "MSE = mean_squared_error(y_true, y_pred)\n", + "RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n", + "MAE = mean_absolute_error(y_true, y_pred)\n", + "MAPE = mean_absolute_percentage_error(y_true, y_pred)\n", + "R_2 = r2_score(y_true, y_pred)\n", + "print(f\"MSE: {format(MSE, '.2E')}\")\n", + "print(f'RMSE: {round(RMSE, 4)}')\n", + "print(f'MAE: {round(MAE, 4)}')\n", + "print(f'MAPE: {round(MAPE * 100, 2)}%')\n", + "print(f'R_2: {round(R_2, 4)}')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 32, + "outputs": [], + "source": [ + "def recover(x, col='燃料消耗量'):\n", + " return np.expm1(x * (maxs[col] - mins[col]) + mins[col])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 33, + "outputs": [], + "source": [ + "y_true_recover = recover(y_true)\n", + "y_pred_recover = recover(y_pred)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 34, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE: 7.76E+04\n", + "RMSE: 278.5442\n", + "MAE: 166.5543\n", + "MAPE: 9.54%\n", + "R_2: 0.9717\n" + ] + } + ], + "source": [ + "MSE = mean_squared_error(y_true_recover, y_pred_recover)\n", + "RMSE = np.sqrt(mean_squared_error(y_true_recover, y_pred_recover))\n", + "MAE = mean_absolute_error(y_true_recover, y_pred_recover)\n", + "MAPE = mean_absolute_percentage_error(y_true_recover, y_pred_recover)\n", + "R_2 = r2_score(y_true_recover, y_pred_recover)\n", + "print(f\"MSE: {format(MSE, '.2E')}\")\n", + "print(f'RMSE: {round(RMSE, 4)}')\n", + "print(f'MAE: {round(MAE, 4)}')\n", + "print(f'MAPE: {round(MAPE * 100, 2)}%')\n", + "print(f'R_2: {round(R_2, 4)}')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/.ipynb_checkpoints/基于煤种标准化的数据建模及预测-checkpoint.ipynb b/.ipynb_checkpoints/基于煤种标准化的数据建模及预测-checkpoint.ipynb new file mode 100644 index 0000000..8a018b7 --- /dev/null +++ b/.ipynb_checkpoints/基于煤种标准化的数据建模及预测-checkpoint.ipynb @@ -0,0 +1,2299 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import xgboost as xgb\n", + "import seaborn as sns\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitudepower_co2_factorheat_co2_factor
0上海市供热式亚临界水冷5.7071104.8078753.4677691.3862940.5743320.072680
1上海市凝气式亚临界水冷5.7071104.8078753.4677691.3862940.5821640.072391
2上海市凝气式亚临界水冷5.7714414.8089393.4768861.0986120.5692810.071041
3上海市凝气式超超临界水冷6.9087554.8073563.4583731.6094380.5062500.070460
4上海市纯凝式亚临界水冷5.8607864.8078393.4786272.8332130.5652260.073717
\n", + "
" + ], + "text/plain": [ + " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n", + "0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n", + "1 上海市 凝气式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n", + "2 上海市 凝气式 亚临界 水冷 5.771441 4.808939 3.476886 1.098612 \n", + "3 上海市 凝气式 超超临界 水冷 6.908755 4.807356 3.458373 1.609438 \n", + "4 上海市 纯凝式 亚临界 水冷 5.860786 4.807839 3.478627 2.833213 \n", + "\n", + " power_co2_factor heat_co2_factor \n", + "0 0.574332 0.072680 \n", + "1 0.582164 0.072391 \n", + "2 0.569281 0.071041 \n", + "3 0.506250 0.070460 \n", + "4 0.565226 0.073717 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv('./results/去煤种化数据.csv')\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(['所处地区', '机组类型', '参数分类', '冷凝器型式'],\n", + " Index(['铭牌容量 (MW)', 'longitude', 'latitude', 'altitude'], dtype='object'))" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "object_cols = data.columns[:4].tolist()\n", + "num_cols = data.columns[4:8]\n", + "object_cols, num_cols" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "test_data = pd.read_excel('./data/煤电机组情况(含企业名称).xlsx',)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "test_geo_info = pd.read_excel('./data/电厂地理信息.xlsx')\n", + "test_geo_info.rename(columns={'name':'企业名称'}, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "test_data = test_data.merge(test_geo_info, how='left', on='企业名称').drop(columns='address')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "test_data_cp = test_data.copy()\n", + "test_data = test_data[['地区', '汽轮机类型', '压力参数', '冷却方式', '单机容量(MW)', 'lat', 'lng', 'altitude']].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "test_data.columns = data.columns[:8].tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "test_data['na_cols'] = test_data.isna().sum(axis=1).values" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "110838.446" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_data[test_data.na_cols <= 1]['铭牌容量 (MW)'].sum() /10" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_test_data = test_data[test_data.na_cols <= 1].drop(columns='na_cols').reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "水冷 413\n", + "空冷 110\n", + "其他 1\n", + "Name: 冷凝器型式, dtype: int64" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data['冷凝器型式'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "水冷-闭式循环 2125\n", + "其他 1076\n", + "水冷-开式循环 972\n", + "空冷-直接空冷 586\n", + "空冷-间接空冷 264\n", + "水冷 52\n", + "空冷 14\n", + "间接空冷 4\n", + "直接空冷 2\n", + "Name: 冷凝器型式, dtype: int64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_test_data['冷凝器型式'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def change_type(x:str):\n", + " if '水冷' in x:\n", + " return '水冷'\n", + " elif '空冷' in x:\n", + " return \"空冷\"\n", + " else:\n", + " return '其他'" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_test_data.fillna('其他', inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_test_data['冷凝器型式'] = new_test_data['冷凝器型式'].apply(change_type)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "亚临界 265\n", + "超临界 156\n", + "超超临界 69\n", + "超高压 32\n", + "高压 2\n", + "Name: 参数分类, dtype: int64" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data['参数分类'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "高压 1538\n", + "亚临界 1075\n", + "中压 1069\n", + "超临界 608\n", + "超高压 447\n", + "超超临界 358\n", + "Name: 参数分类, dtype: int64" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_test_data['参数分类'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_test_data['机组类型'] = new_test_data['机组类型'].apply(lambda x: x if x.endswith('式') else x + '式')" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "for col in num_cols:\n", + " new_test_data[col] = new_test_data[col].apply(lambda x: 0 if x<0 else x)\n", + " new_test_data[col] = np.log1p(new_test_data[col])" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitude
0安徽省凝气式亚临界水冷5.7714413.4515834.7720942.397895
1安徽省凝气式亚临界水冷5.7714413.4515834.7720942.397895
2安徽省凝气式超超临界水冷6.9087553.4515834.7720942.397895
3安徽省凝气式超超临界水冷6.9087553.4515834.7720942.397895
4安徽省抽凝式高压水冷3.7135723.4515834.7720942.397895
...........................
5090重庆市抽凝式高压水冷3.9120233.4274894.6823535.645447
5091重庆市抽凝式高压水冷3.2580973.4276664.6823065.627621
5092重庆市抽背式高压水冷3.2580973.4276664.6823065.627621
5093重庆市背压式高压其他3.4339873.4287154.6822085.690359
5094重庆市抽凝式高压水冷4.8362823.4287154.6822085.690359
\n", + "

5095 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude\n", + "0 安徽省 凝气式 亚临界 水冷 5.771441 3.451583 4.772094 2.397895\n", + "1 安徽省 凝气式 亚临界 水冷 5.771441 3.451583 4.772094 2.397895\n", + "2 安徽省 凝气式 超超临界 水冷 6.908755 3.451583 4.772094 2.397895\n", + "3 安徽省 凝气式 超超临界 水冷 6.908755 3.451583 4.772094 2.397895\n", + "4 安徽省 抽凝式 高压 水冷 3.713572 3.451583 4.772094 2.397895\n", + "... ... ... ... ... ... ... ... ...\n", + "5090 重庆市 抽凝式 高压 水冷 3.912023 3.427489 4.682353 5.645447\n", + "5091 重庆市 抽凝式 高压 水冷 3.258097 3.427666 4.682306 5.627621\n", + "5092 重庆市 抽背式 高压 水冷 3.258097 3.427666 4.682306 5.627621\n", + "5093 重庆市 背压式 高压 其他 3.433987 3.428715 4.682208 5.690359\n", + "5094 重庆市 抽凝式 高压 水冷 4.836282 3.428715 4.682208 5.690359\n", + "\n", + "[5095 rows x 8 columns]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_test_data" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitudepower_co2_factorheat_co2_factor
0上海市供热式亚临界水冷5.7071104.8078753.4677691.3862940.5743320.072680
1上海市凝气式亚临界水冷5.7071104.8078753.4677691.3862940.5821640.072391
2上海市凝气式亚临界水冷5.7714414.8089393.4768861.0986120.5692810.071041
3上海市凝气式超超临界水冷6.9087554.8073563.4583731.6094380.5062500.070460
4上海市纯凝式亚临界水冷5.8607864.8078393.4786272.8332130.5652260.073717
.................................
5090重庆市抽凝式高压水冷3.9120233.4274894.6823535.645447NaNNaN
5091重庆市抽凝式高压水冷3.2580973.4276664.6823065.627621NaNNaN
5092重庆市抽背式高压水冷3.2580973.4276664.6823065.627621NaNNaN
5093重庆市背压式高压其他3.4339873.4287154.6822085.690359NaNNaN
5094重庆市抽凝式高压水冷4.8362823.4287154.6822085.690359NaNNaN
\n", + "

5619 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n", + "0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n", + "1 上海市 凝气式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n", + "2 上海市 凝气式 亚临界 水冷 5.771441 4.808939 3.476886 1.098612 \n", + "3 上海市 凝气式 超超临界 水冷 6.908755 4.807356 3.458373 1.609438 \n", + "4 上海市 纯凝式 亚临界 水冷 5.860786 4.807839 3.478627 2.833213 \n", + "... ... ... ... ... ... ... ... ... \n", + "5090 重庆市 抽凝式 高压 水冷 3.912023 3.427489 4.682353 5.645447 \n", + "5091 重庆市 抽凝式 高压 水冷 3.258097 3.427666 4.682306 5.627621 \n", + "5092 重庆市 抽背式 高压 水冷 3.258097 3.427666 4.682306 5.627621 \n", + "5093 重庆市 背压式 高压 其他 3.433987 3.428715 4.682208 5.690359 \n", + "5094 重庆市 抽凝式 高压 水冷 4.836282 3.428715 4.682208 5.690359 \n", + "\n", + " power_co2_factor heat_co2_factor \n", + "0 0.574332 0.072680 \n", + "1 0.582164 0.072391 \n", + "2 0.569281 0.071041 \n", + "3 0.506250 0.070460 \n", + "4 0.565226 0.073717 \n", + "... ... ... \n", + "5090 NaN NaN \n", + "5091 NaN NaN \n", + "5092 NaN NaN \n", + "5093 NaN NaN \n", + "5094 NaN NaN \n", + "\n", + "[5619 rows x 10 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merge_data = pd.concat([data, new_test_data], axis=0)\n", + "merge_data" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
铭牌容量 (MW)longitudelatitudealtitudepower_co2_factorheat_co2_factor所处地区_上海市所处地区_云南省所处地区_内蒙古所处地区_内蒙古自治区...机组类型_背压式参数分类_中压参数分类_亚临界参数分类_超临界参数分类_超超临界参数分类_超高压参数分类_高压冷凝器型式_其他冷凝器型式_水冷冷凝器型式_空冷
05.7071104.8078753.4677691.3862940.5743320.0726801000...0010000010
15.7071104.8078753.4677691.3862940.5821640.0723911000...0010000010
25.7714414.8089393.4768861.0986120.5692810.0710411000...0010000010
36.9087554.8073563.4583731.6094380.5062500.0704601000...0000100010
45.8607864.8078393.4786272.8332130.5652260.0737171000...0010000010
..................................................................
50903.9120233.4274894.6823535.645447NaNNaN0000...0000001010
50913.2580973.4276664.6823065.627621NaNNaN0000...0000001010
50923.2580973.4276664.6823065.627621NaNNaN0000...0000001010
50933.4339873.4287154.6822085.690359NaNNaN0000...1000001100
50944.8362823.4287154.6822085.690359NaNNaN0000...0000001010
\n", + "

5619 rows × 63 columns

\n", + "
" + ], + "text/plain": [ + " 铭牌容量 (MW) longitude latitude altitude power_co2_factor \\\n", + "0 5.707110 4.807875 3.467769 1.386294 0.574332 \n", + "1 5.707110 4.807875 3.467769 1.386294 0.582164 \n", + "2 5.771441 4.808939 3.476886 1.098612 0.569281 \n", + "3 6.908755 4.807356 3.458373 1.609438 0.506250 \n", + "4 5.860786 4.807839 3.478627 2.833213 0.565226 \n", + "... ... ... ... ... ... \n", + "5090 3.912023 3.427489 4.682353 5.645447 NaN \n", + "5091 3.258097 3.427666 4.682306 5.627621 NaN \n", + "5092 3.258097 3.427666 4.682306 5.627621 NaN \n", + "5093 3.433987 3.428715 4.682208 5.690359 NaN \n", + "5094 4.836282 3.428715 4.682208 5.690359 NaN \n", + "\n", + " heat_co2_factor 所处地区_上海市 所处地区_云南省 所处地区_内蒙古 所处地区_内蒙古自治区 ... \\\n", + "0 0.072680 1 0 0 0 ... \n", + "1 0.072391 1 0 0 0 ... \n", + "2 0.071041 1 0 0 0 ... \n", + "3 0.070460 1 0 0 0 ... \n", + "4 0.073717 1 0 0 0 ... \n", + "... ... ... ... ... ... ... \n", + "5090 NaN 0 0 0 0 ... \n", + "5091 NaN 0 0 0 0 ... \n", + "5092 NaN 0 0 0 0 ... \n", + "5093 NaN 0 0 0 0 ... \n", + "5094 NaN 0 0 0 0 ... \n", + "\n", + " 机组类型_背压式 参数分类_中压 参数分类_亚临界 参数分类_超临界 参数分类_超超临界 参数分类_超高压 参数分类_高压 \\\n", + "0 0 0 1 0 0 0 0 \n", + "1 0 0 1 0 0 0 0 \n", + "2 0 0 1 0 0 0 0 \n", + "3 0 0 0 0 1 0 0 \n", + "4 0 0 1 0 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "5090 0 0 0 0 0 0 1 \n", + "5091 0 0 0 0 0 0 1 \n", + "5092 0 0 0 0 0 0 1 \n", + "5093 1 0 0 0 0 0 1 \n", + "5094 0 0 0 0 0 0 1 \n", + "\n", + " 冷凝器型式_其他 冷凝器型式_水冷 冷凝器型式_空冷 \n", + "0 0 1 0 \n", + "1 0 1 0 \n", + "2 0 1 0 \n", + "3 0 1 0 \n", + "4 0 1 0 \n", + "... ... ... ... \n", + "5090 0 1 0 \n", + "5091 0 1 0 \n", + "5092 0 1 0 \n", + "5093 1 0 0 \n", + "5094 0 1 0 \n", + "\n", + "[5619 rows x 63 columns]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "use_data = pd.get_dummies(merge_data, columns=object_cols)\n", + "use_data" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "use_data.to_csv('./去煤种化后的训练数据.csv', encoding='utf-8-sig', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "train_set = use_data[~use_data.power_co2_factor.isna()].copy()\n", + "test_set = use_data[use_data.power_co2_factor.isna()].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "feature_cols = [x for x in train_set.columns if 'factor' not in x]" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "train_data = train_set.copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "metadata": {}, + "outputs": [], + "source": [ + "train, valid = train_test_split(train_data.dropna(), test_size=0.1, shuffle=True, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dtest = xgb.DMatrix(test_set[feature_cols])" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "params_xgb = {'objective': 'reg:squarederror',\n", + " 'booster': 'gbtree',\n", + " 'eta': 0.005,\n", + " 'max_depth': 15,\n", + " 'subsample': 0.9,\n", + " 'colsample_bytree': 0.9,\n", + " 'min_child_weight': 1,\n", + " 'seed': 42}\n", + "\n", + "num_boost_round = 1200\n", + "\n", + "dtrain = xgb.DMatrix(train[feature_cols], train['power_co2_factor'].values)\n", + "dvalid = xgb.DMatrix(valid[feature_cols], valid['power_co2_factor'].values)\n", + "watchlist = [(dtrain, 'train'), (dvalid, 'eval')]\n", + "\n", + "gb_model_power = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", + " early_stopping_rounds=100, verbose_eval=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_test_data['power_co2_factor'] = gb_model_power.predict(dtest)" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitudepredictionpower_co2_factor
0安徽省凝气式亚临界水冷5.7714413.4515834.7720942.3978950.5632670.513529
1安徽省凝气式亚临界水冷5.7714413.4515834.7720942.3978950.5632670.513529
2安徽省凝气式超超临界水冷6.9087553.4515834.7720942.3978950.5588720.478943
3安徽省凝气式超超临界水冷6.9087553.4515834.7720942.3978950.5588720.478943
4安徽省抽凝式高压水冷3.7135723.4515834.7720942.3978950.5635010.510681
.................................
5090重庆市抽凝式高压水冷3.9120233.4274894.6823535.6454470.5624920.512501
5091重庆市抽凝式高压水冷3.2580973.4276664.6823065.6276210.5624920.512513
5092重庆市抽背式高压水冷3.2580973.4276664.6823065.6276210.5625970.514091
5093重庆市背压式高压其他3.4339873.4287154.6822085.6903590.5605150.509951
5094重庆市抽凝式高压水冷4.8362823.4287154.6822085.6903590.5619200.511886
\n", + "

5095 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n", + "0 安徽省 凝气式 亚临界 水冷 5.771441 3.451583 4.772094 2.397895 \n", + "1 安徽省 凝气式 亚临界 水冷 5.771441 3.451583 4.772094 2.397895 \n", + "2 安徽省 凝气式 超超临界 水冷 6.908755 3.451583 4.772094 2.397895 \n", + "3 安徽省 凝气式 超超临界 水冷 6.908755 3.451583 4.772094 2.397895 \n", + "4 安徽省 抽凝式 高压 水冷 3.713572 3.451583 4.772094 2.397895 \n", + "... ... ... ... ... ... ... ... ... \n", + "5090 重庆市 抽凝式 高压 水冷 3.912023 3.427489 4.682353 5.645447 \n", + "5091 重庆市 抽凝式 高压 水冷 3.258097 3.427666 4.682306 5.627621 \n", + "5092 重庆市 抽背式 高压 水冷 3.258097 3.427666 4.682306 5.627621 \n", + "5093 重庆市 背压式 高压 其他 3.433987 3.428715 4.682208 5.690359 \n", + "5094 重庆市 抽凝式 高压 水冷 4.836282 3.428715 4.682208 5.690359 \n", + "\n", + " prediction power_co2_factor \n", + "0 0.563267 0.513529 \n", + "1 0.563267 0.513529 \n", + "2 0.558872 0.478943 \n", + "3 0.558872 0.478943 \n", + "4 0.563501 0.510681 \n", + "... ... ... \n", + "5090 0.562492 0.512501 \n", + "5091 0.562492 0.512513 \n", + "5092 0.562597 0.514091 \n", + "5093 0.560515 0.509951 \n", + "5094 0.561920 0.511886 \n", + "\n", + "[5095 rows x 10 columns]" + ] + }, + "execution_count": 130, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_test_data" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": {}, + "outputs": [], + "source": [ + "params_xgb = {'objective': 'reg:squarederror',\n", + " 'booster': 'gbtree',\n", + " 'eta': 0.01,\n", + " 'max_depth': 30,\n", + " 'subsample': 0.8,\n", + " 'colsample_bytree': 0.9,\n", + " 'min_child_weight': 10,\n", + " 'seed': 108}\n", + "\n", + "num_boost_round = 1200\n", + "\n", + "dtrain = xgb.DMatrix(train[feature_cols], train['heat_co2_factor'].values)\n", + "dvalid = xgb.DMatrix(valid[feature_cols], valid['heat_co2_factor'].values)\n", + "watchlist = [(dtrain, 'train'), (dvalid, 'eval')]\n", + "\n", + "gb_model_heat = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", + " early_stopping_rounds=100, verbose_eval=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [], + "source": [ + "new_test_data['heat_co2_factor'] = gb_model_heat.predict(dtest)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import seaborn as sns\n", + "import matplotlib.pyplot as plt\n", + "import datetime as dt\n", + "\n", + "plt.rcParams['font.sans-serif'] = ['SimHei']\n", + "plt.rcParams['axes.unicode_minus'] = False" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "plot_data = new_test_data[['地区', 'prediction']].copy()\n", + "plot_data['地区'] = plot_data['地区'].apply(lambda x: jieba.lcut(x.strip(), cut_all=True)[0] if not pd.isna(x) else pd.NA)\n", + "plot_data.columns = ['省份', 'CO2排放强度(kg/MJ)']\n", + "total_plot_data = pd.concat([plot_data, add_data])" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "total_plot_data['CO2排放强度(kg/MJ)'] = total_plot_data['CO2排放强度(kg/MJ)'].astype(float)\n", + "total_plot_data['省份'] = total_plot_data['省份'].apply(lambda x: x if x != '内蒙' else '内蒙古')" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
省份CO2排放强度(kg/MJ)
0安徽0.224686
1安徽0.198733
2安徽0.198733
3安徽0.224686
4安徽0.224686
.........
848新疆0.196452
849辽宁0.185688
850内蒙古0.181214
851山东0.347570
852浙江0.251777
\n", + "

3156 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " 省份 CO2排放强度(kg/MJ)\n", + "0 安徽 0.224686\n", + "1 安徽 0.198733\n", + "2 安徽 0.198733\n", + "3 安徽 0.224686\n", + "4 安徽 0.224686\n", + ".. ... ...\n", + "848 新疆 0.196452\n", + "849 辽宁 0.185688\n", + "850 内蒙古 0.181214\n", + "851 山东 0.347570\n", + "852 浙江 0.251777\n", + "\n", + "[3156 rows x 2 columns]" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_plot_data" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(20, 10))\n", + "sns.violinplot(x='省份',\n", + " y='CO2排放强度(kg/MJ)',\n", + " data=total_plot_data,\n", + " scale='width',\n", + " palette='Set2',\n", + " inner='quartile')\n", + "\n", + "# Decoration\n", + "plt.title('各省(市、自治区)燃煤发电CO2排放强度预测', fontsize=18)\n", + "plt.savefig('./figure/各省预测值.png')" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "from pyecharts.charts import *\n", + "from pyecharts import options as opts\n", + "from pyecharts.commons.utils import JsCode\n", + "from pyecharts.globals import ThemeType, ChartType" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "data = pd.read_excel('././././data/机组预测结果.xlsx', sheet_name=2)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.28918716" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data['prediction'].max()" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "地区\n", + "海南省 0.232931\n", + "贵州省 0.234717\n", + "广东省 0.236436\n", + "上海市 0.237024\n", + "甘肃省 0.238403\n", + "湖北省 0.241863\n", + "福建省 0.242241\n", + "湖南省 0.243759\n", + "宁夏回族自治区 0.245063\n", + "云南省 0.245078\n", + "山西省 0.245322\n", + "新疆维吾尔自治区 0.246324\n", + "安徽省 0.246587\n", + "河南省 0.246775\n", + "陕西省 0.248594\n", + "天津市 0.248690\n", + "内蒙古自治区 0.250336\n", + "江西省 0.250423\n", + "广西壮族自治区 0.251057\n", + "河北省 0.251093\n", + "重庆市 0.254725\n", + "辽宁省 0.258904\n", + "四川省 0.259836\n", + "江苏省 0.261171\n", + "吉林省 0.263193\n", + "青海省 0.265025\n", + "山东省 0.265427\n", + "浙江省 0.269908\n", + "黑龙江省 0.272978\n", + "Name: prediction, dtype: float64" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.groupby('地区')['prediction'].mean().sort_values()" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "map_dict = data.groupby('地区')['机组容量'].mean().to_dict()" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "{'上海市': 379.20512820512783,\n", + " '云南省': 278.30434782608654,\n", + " '内蒙古自治区': 230.6120649651969,\n", + " '吉林省': 135.31451612903209,\n", + " '四川省': 185.37333333333314,\n", + " '天津市': 253.24468085106344,\n", + " '宁夏回族自治区': 279.0265486725658,\n", + " '安徽省': 316.693939393939,\n", + " '山东省': 125.49526066350698,\n", + " '山西省': 263.3278985507243,\n", + " '广东省': 387.3771428571424,\n", + " '广西壮族自治区': 246.94999999999965,\n", + " '新疆维吾尔自治区': 240.52545454545407,\n", + " '江苏省': 198.3687817258881,\n", + " '江西省': 270.88586956521675,\n", + " '河北省': 225.52138248847896,\n", + " '河南省': 286.94827586206867,\n", + " '浙江省': 143.89827272727254,\n", + " '海南省': 256.1538461538457,\n", + " '湖北省': 315.7307692307689,\n", + " '湖南省': 292.77333333333297,\n", + " '甘肃省': 287.76923076923043,\n", + " '福建省': 335.7640449438199,\n", + " '贵州省': 359.5219780219774,\n", + " '辽宁省': 176.5326203208554,\n", + " '重庆市': 215.15079365079336,\n", + " '陕西省': 260.96775862068927,\n", + " '青海省': 122.01612903225785,\n", + " '黑龙江省': 84.8353658536584}" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "map_dict" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "from pyecharts.faker import Faker" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "map_co2 = map_dict = data.groupby('地区')['prediction'].mean().to_dict()" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# 需要引用的库\n", + "from pyecharts import options as opts\n", + "from pyecharts.charts import Map\n", + "\n", + "# 设置不同的系列,和系列中区域对应的数量值\n", + "pair_data1 = [[x, float(y)] for x, y in zip(map_co2.keys(), map_co2.values())]\n", + "\n", + "def create_map():\n", + " '''\n", + " 作用:生成地图\n", + " '''\n", + " ( # 大小设置\n", + " Map()\n", + " .add(\n", + " series_name=\"各地区机组平均碳排放强度\",\n", + " data_pair=[[x[0], x[1]*1000] for x in pair_data1],\n", + " maptype=\"china\"\n", + " )\n", + "\n", + " # 全局配置项\n", + " .set_global_opts(\n", + " # 设置标题\n", + " title_opts=opts.TitleOpts(title=\"各地区机组平均碳排放强度(单位:g/MJ)\", subtitle='港、澳、台、西藏数据暂缺'),\n", + " # 设置标准显示\n", + " visualmap_opts=opts.VisualMapOpts(max_=220, min_=280, is_piecewise=False)\n", + " )\n", + " # 系列配置项\n", + " .set_series_opts(\n", + " # 标签名称显示,默认为True\n", + " label_opts=opts.LabelOpts(is_show=True, color=\"blue\")\n", + " )\n", + " # 生成本地html文件\n", + " .render(\"co2.html\")\n", + " )\n", + "\n", + "\n", + "create_map()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/.ipynb_checkpoints/特征分组建模_lightgbm-checkpoint.ipynb b/.ipynb_checkpoints/特征分组建模_lightgbm-checkpoint.ipynb new file mode 100644 index 0000000..0d03e1b --- /dev/null +++ b/.ipynb_checkpoints/特征分组建模_lightgbm-checkpoint.ipynb @@ -0,0 +1,3156 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import warnings\n", + "\n", + "warnings.filterwarnings(\"ignore\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import lightgbm as lgb\n", + "import numpy as np\n", + "import xgboost as xgb\n", + "import seaborn as sns\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " 铭牌容量 (MW) 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%) longitude latitude \\\n0 5.70711 9.818311 3.297687 2.815409 4.807875 3.467769 \n1 5.70711 9.821572 3.297687 2.815409 4.807875 3.467769 \n2 5.70711 9.878580 3.310543 2.769459 4.807875 3.467769 \n3 5.70711 9.883285 3.324316 2.532108 4.807875 3.467769 \n4 5.70711 9.909768 3.255015 2.766319 4.807875 3.467769 \n\n altitude 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) 所处地区_上海市 ... 机组类型_供热式 \\\n0 1.386294 0.537574 0.070992 1.0 ... 1.0 \n1 1.386294 0.545516 0.072476 1.0 ... 1.0 \n2 1.386294 0.595849 0.064745 1.0 ... 1.0 \n3 1.386294 0.584432 0.068390 1.0 ... 1.0 \n4 1.386294 0.605369 0.066996 1.0 ... 1.0 \n\n 机组类型_纯凝式 参数分类_亚临界 参数分类_超临界 参数分类_超超临界 参数分类_超高压 参数分类_高压 冷凝器型式_水冷 \\\n0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 \n1 0.0 1.0 0.0 0.0 0.0 0.0 1.0 \n2 0.0 1.0 0.0 0.0 0.0 0.0 1.0 \n3 0.0 1.0 0.0 0.0 0.0 0.0 1.0 \n4 0.0 1.0 0.0 0.0 0.0 0.0 1.0 \n\n 冷凝器型式_直接空冷 冷凝器型式_间接空冷 \n0 0.0 0.0 \n1 0.0 0.0 \n2 0.0 0.0 \n3 0.0 0.0 \n4 0.0 0.0 \n\n[5 rows x 60 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
铭牌容量 (MW)入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)所处地区_上海市...机组类型_供热式机组类型_纯凝式参数分类_亚临界参数分类_超临界参数分类_超超临界参数分类_超高压参数分类_高压冷凝器型式_水冷冷凝器型式_直接空冷冷凝器型式_间接空冷
05.707119.8183113.2976872.8154094.8078753.4677691.3862940.5375740.0709921.0...1.00.01.00.00.00.00.01.00.00.0
15.707119.8215723.2976872.8154094.8078753.4677691.3862940.5455160.0724761.0...1.00.01.00.00.00.00.01.00.00.0
25.707119.8785803.3105432.7694594.8078753.4677691.3862940.5958490.0647451.0...1.00.01.00.00.00.00.01.00.00.0
35.707119.8832853.3243162.5321084.8078753.4677691.3862940.5844320.0683901.0...1.00.01.00.00.00.00.01.00.00.0
45.707119.9097683.2550152.7663194.8078753.4677691.3862940.6053690.0669961.0...1.00.01.00.00.00.00.01.00.00.0
\n

5 rows × 60 columns

\n
" + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data = pd.read_csv('./train_data_processed.csv')\n", + "total_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "(3080, 60)" + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [ + { + "data": { + "text/plain": "Index(['铭牌容量 (MW)', '入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', 'longitude',\n 'latitude', 'altitude', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)', '所处地区_上海市',\n '所处地区_云南省', '所处地区_内蒙古', '所处地区_内蒙古自治区', '所处地区_北京市', '所处地区_吉林省',\n '所处地区_四川省', '所处地区_天津市', '所处地区_宁夏', '所处地区_宁夏回族自治区', '所处地区_安徽省',\n '所处地区_山东省', '所处地区_山西', '所处地区_山西省', '所处地区_广东省', '所处地区_广西', '所处地区_广西省',\n '所处地区_新疆', '所处地区_新疆维吾尔自治区', '所处地区_江苏省', '所处地区_江西省', '所处地区_河北',\n '所处地区_河北省', '所处地区_河南', '所处地区_河南省', '所处地区_浙江省', '所处地区_海南省', '所处地区_湖北',\n '所处地区_湖北省', '所处地区_湖南', '所处地区_湖南省', '所处地区_甘肃省', '所处地区_福建省', '所处地区_贵州省',\n '所处地区_辽宁省', '所处地区_重庆市', '所处地区_陕西省', '所处地区_青海省', '所处地区_黑龙江', '所处地区_黑龙江省',\n '机组类型_供热', '机组类型_供热式', '机组类型_纯凝式', '参数分类_亚临界', '参数分类_超临界', '参数分类_超超临界',\n '参数分类_超高压', '参数分类_高压', '冷凝器型式_水冷', '冷凝器型式_直接空冷', '冷凝器型式_间接空冷'],\n dtype='object')" + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data.columns" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [], + "source": [ + "feature_cols = [x for x in total_data.columns if '因子' not in x]\n", + "target_cols = [x for x in total_data.columns if x not in feature_cols]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [ + { + "data": { + "text/plain": " 铭牌容量 (MW) 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%) longitude latitude \\\n0 4.615121 9.527411 3.823629 3.007661 4.834910 3.862442 \n1 4.836282 9.920745 3.625673 3.201526 4.700990 3.563714 \n2 4.836282 9.923023 3.623807 3.231200 4.700990 3.563714 \n3 4.836282 9.932727 3.272227 3.236716 4.700990 3.563714 \n4 4.836282 9.936819 3.278653 3.173460 4.700990 3.563714 \n... ... ... ... ... ... ... \n3075 6.966967 9.754581 3.100543 3.378270 4.676091 3.667429 \n3076 6.966967 9.755162 3.082827 3.361070 4.676091 3.667429 \n3077 6.966967 9.762903 3.095125 3.288775 4.676091 3.667429 \n3078 6.966967 9.776506 3.096934 3.328268 4.676091 3.667429 \n3079 6.966967 9.792277 3.073156 3.384051 4.676091 3.667429 \n\n altitude 所处地区_上海市 所处地区_云南省 所处地区_内蒙古 ... 参数分类_亚临界 参数分类_超临界 \\\n0 4.983607 0.0 0.0 0.0 ... 0.0 0.0 \n1 5.981414 0.0 0.0 0.0 ... 0.0 0.0 \n2 5.981414 0.0 0.0 0.0 ... 0.0 0.0 \n3 5.981414 0.0 0.0 0.0 ... 0.0 0.0 \n4 5.981414 0.0 0.0 0.0 ... 0.0 0.0 \n... ... ... ... ... ... ... ... \n3075 7.020191 0.0 0.0 0.0 ... 0.0 0.0 \n3076 7.020191 0.0 0.0 0.0 ... 0.0 0.0 \n3077 7.020191 0.0 0.0 0.0 ... 0.0 0.0 \n3078 7.020191 0.0 0.0 0.0 ... 0.0 0.0 \n3079 7.020191 0.0 0.0 0.0 ... 0.0 0.0 \n\n 参数分类_超超临界 参数分类_超高压 参数分类_高压 冷凝器型式_水冷 冷凝器型式_直接空冷 冷凝器型式_间接空冷 \\\n0 0.0 0.0 1.0 1.0 0.0 0.0 \n1 0.0 1.0 0.0 1.0 0.0 0.0 \n2 0.0 1.0 0.0 1.0 0.0 0.0 \n3 0.0 1.0 0.0 1.0 0.0 0.0 \n4 0.0 1.0 0.0 1.0 0.0 0.0 \n... ... ... ... ... ... ... \n3075 1.0 0.0 0.0 0.0 1.0 0.0 \n3076 1.0 0.0 0.0 0.0 1.0 0.0 \n3077 1.0 0.0 0.0 0.0 1.0 0.0 \n3078 1.0 0.0 0.0 0.0 1.0 0.0 \n3079 1.0 0.0 0.0 0.0 1.0 0.0 \n\n 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) \n0 0.483547 0.058613 \n1 0.575553 0.085880 \n2 0.607741 0.084890 \n3 0.595382 0.082342 \n4 0.578838 0.082685 \n... ... ... \n3075 0.426880 0.061722 \n3076 0.456768 0.060739 \n3077 0.455534 0.061277 \n3078 0.450064 0.062032 \n3079 0.468720 0.063016 \n\n[3080 rows x 60 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
铭牌容量 (MW)入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)longitudelatitudealtitude所处地区_上海市所处地区_云南省所处地区_内蒙古...参数分类_亚临界参数分类_超临界参数分类_超超临界参数分类_超高压参数分类_高压冷凝器型式_水冷冷凝器型式_直接空冷冷凝器型式_间接空冷发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)
04.6151219.5274113.8236293.0076614.8349103.8624424.9836070.00.00.0...0.00.00.00.01.01.00.00.00.4835470.058613
14.8362829.9207453.6256733.2015264.7009903.5637145.9814140.00.00.0...0.00.00.01.00.01.00.00.00.5755530.085880
24.8362829.9230233.6238073.2312004.7009903.5637145.9814140.00.00.0...0.00.00.01.00.01.00.00.00.6077410.084890
34.8362829.9327273.2722273.2367164.7009903.5637145.9814140.00.00.0...0.00.00.01.00.01.00.00.00.5953820.082342
44.8362829.9368193.2786533.1734604.7009903.5637145.9814140.00.00.0...0.00.00.01.00.01.00.00.00.5788380.082685
..................................................................
30756.9669679.7545813.1005433.3782704.6760913.6674297.0201910.00.00.0...0.00.01.00.00.00.01.00.00.4268800.061722
30766.9669679.7551623.0828273.3610704.6760913.6674297.0201910.00.00.0...0.00.01.00.00.00.01.00.00.4567680.060739
30776.9669679.7629033.0951253.2887754.6760913.6674297.0201910.00.00.0...0.00.01.00.00.00.01.00.00.4555340.061277
30786.9669679.7765063.0969343.3282684.6760913.6674297.0201910.00.00.0...0.00.01.00.00.00.01.00.00.4500640.062032
30796.9669679.7922773.0731563.3840514.6760913.6674297.0201910.00.00.0...0.00.01.00.00.00.01.00.00.4687200.063016
\n

3080 rows × 60 columns

\n
" + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "use_data = total_data.groupby(feature_cols)[target_cols].mean().reset_index()\n", + "use_data" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [], + "source": [ + "for col in use_data.columns:\n", + " use_data[col] = use_data[col].astype(float)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "train_data, test_data = train_test_split(use_data.dropna(), test_size=0.1, shuffle=True, random_state=666)\n", + "train_data, valid_data = train_test_split(train_data.dropna(), test_size=0.2, shuffle=True, random_state=666)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "X_train, Y_train = train_data[feature_cols], train_data[target_cols[0]]\n", + "X_valid, Y_valid = valid_data[feature_cols], valid_data[target_cols[0]]\n", + "X_test, Y_test = test_data[feature_cols], test_data[target_cols[0]]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "lgb_train = lgb.Dataset(X_train, Y_train)\n", + "lgb_eval = lgb.Dataset(X_valid, Y_valid)\n", + "lgb_test = lgb.Dataset(X_test, Y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "params_gbm = {\n", + " 'task': 'train',\n", + " 'boosting_type': 'gbdt', # 设置提升类型\n", + " 'objective': 'l1', # 目标函数\n", + " 'metric': {'rmse'}, # 评估函数\n", + " 'max_depth': 12,\n", + " 'num_leaves': 20, # 叶子节点数\n", + " 'learning_rate': 0.05, # 学习速率\n", + " 'feature_fraction': 0.9, # 建树的特征选择比例\n", + " 'bagging_fraction': 0.9, # 建树的样本采样比例\n", + " 'bagging_freq': 10, # k 意味着每 k 次迭代执行bagging\n", + " 'verbose': -1 # <0 显示致命的, =0 显示错误 (警告), >0 显示信息\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1]\tvalid_0's rmse: 0.0692875\n", + "Training until validation scores don't improve for 100 rounds\n", + "[2]\tvalid_0's rmse: 0.06714\n", + "[3]\tvalid_0's rmse: 0.0646839\n", + "[4]\tvalid_0's rmse: 0.0623338\n", + "[5]\tvalid_0's rmse: 0.0600964\n", + "[6]\tvalid_0's rmse: 0.0580108\n", + "[7]\tvalid_0's rmse: 0.056067\n", + "[8]\tvalid_0's rmse: 0.0544344\n", + "[9]\tvalid_0's rmse: 0.0529408\n", + "[10]\tvalid_0's rmse: 0.051276\n", + "[11]\tvalid_0's rmse: 0.0497692\n", + "[12]\tvalid_0's rmse: 0.0483588\n", + "[13]\tvalid_0's rmse: 0.0470211\n", + "[14]\tvalid_0's rmse: 0.0460061\n", + "[15]\tvalid_0's rmse: 0.0448745\n", + "[16]\tvalid_0's rmse: 0.043796\n", + "[17]\tvalid_0's rmse: 0.0428645\n", + "[18]\tvalid_0's rmse: 0.0419008\n", + "[19]\tvalid_0's rmse: 0.0409544\n", + "[20]\tvalid_0's rmse: 0.0400698\n", + "[21]\tvalid_0's rmse: 0.0392848\n", + "[22]\tvalid_0's rmse: 0.038578\n", + "[23]\tvalid_0's rmse: 0.0378727\n", + "[24]\tvalid_0's rmse: 0.0371929\n", + "[25]\tvalid_0's rmse: 0.0366533\n", + "[26]\tvalid_0's rmse: 0.0360842\n", + "[27]\tvalid_0's rmse: 0.0355757\n", + "[28]\tvalid_0's rmse: 0.0350562\n", + "[29]\tvalid_0's rmse: 0.0345382\n", + "[30]\tvalid_0's rmse: 0.0340975\n", + "[31]\tvalid_0's rmse: 0.0337632\n", + "[32]\tvalid_0's rmse: 0.0334232\n", + "[33]\tvalid_0's rmse: 0.0330998\n", + "[34]\tvalid_0's rmse: 0.0328678\n", + "[35]\tvalid_0's rmse: 0.0325827\n", + "[36]\tvalid_0's rmse: 0.0323483\n", + "[37]\tvalid_0's rmse: 0.0321363\n", + "[38]\tvalid_0's rmse: 0.0318823\n", + "[39]\tvalid_0's rmse: 0.0316983\n", + "[40]\tvalid_0's rmse: 0.0315094\n", + "[41]\tvalid_0's rmse: 0.0313339\n", + "[42]\tvalid_0's rmse: 0.0311663\n", + "[43]\tvalid_0's rmse: 0.031002\n", + "[44]\tvalid_0's rmse: 0.0308446\n", + "[45]\tvalid_0's rmse: 0.0307193\n", + "[46]\tvalid_0's rmse: 0.03058\n", + "[47]\tvalid_0's rmse: 0.0304975\n", + "[48]\tvalid_0's rmse: 0.0303807\n", + "[49]\tvalid_0's rmse: 0.0302476\n", + "[50]\tvalid_0's rmse: 0.0301379\n", + "[51]\tvalid_0's rmse: 0.03\n", + "[52]\tvalid_0's rmse: 0.0299129\n", + "[53]\tvalid_0's rmse: 0.0298092\n", + "[54]\tvalid_0's rmse: 0.0297318\n", + "[55]\tvalid_0's rmse: 0.0296587\n", + "[56]\tvalid_0's rmse: 0.0295906\n", + "[57]\tvalid_0's rmse: 0.0295262\n", + "[58]\tvalid_0's rmse: 0.0294317\n", + "[59]\tvalid_0's rmse: 0.0293666\n", + "[60]\tvalid_0's rmse: 0.029295\n", + "[61]\tvalid_0's rmse: 0.0292621\n", + "[62]\tvalid_0's rmse: 0.0291822\n", + "[63]\tvalid_0's rmse: 0.0291453\n", + "[64]\tvalid_0's rmse: 0.029071\n", + "[65]\tvalid_0's rmse: 0.0289955\n", + "[66]\tvalid_0's rmse: 0.0289425\n", + "[67]\tvalid_0's rmse: 0.0288803\n", + "[68]\tvalid_0's rmse: 0.0288438\n", + "[69]\tvalid_0's rmse: 0.0288004\n", + "[70]\tvalid_0's rmse: 0.0287685\n", + "[71]\tvalid_0's rmse: 0.0287379\n", + "[72]\tvalid_0's rmse: 0.0286942\n", + "[73]\tvalid_0's rmse: 0.028654\n", + "[74]\tvalid_0's rmse: 0.0286255\n", + "[75]\tvalid_0's rmse: 0.0285826\n", + "[76]\tvalid_0's rmse: 0.0285438\n", + "[77]\tvalid_0's rmse: 0.0284903\n", + "[78]\tvalid_0's rmse: 0.0284767\n", + "[79]\tvalid_0's rmse: 0.0284401\n", + "[80]\tvalid_0's rmse: 0.0284152\n", + "[81]\tvalid_0's rmse: 0.0283845\n", + "[82]\tvalid_0's rmse: 0.028375\n", + "[83]\tvalid_0's rmse: 0.0283271\n", + "[84]\tvalid_0's rmse: 0.0283098\n", + "[85]\tvalid_0's rmse: 0.0282848\n", + "[86]\tvalid_0's rmse: 0.0282564\n", + "[87]\tvalid_0's rmse: 0.0282311\n", + "[88]\tvalid_0's rmse: 0.0281999\n", + "[89]\tvalid_0's rmse: 0.0281744\n", + "[90]\tvalid_0's rmse: 0.0281694\n", + "[91]\tvalid_0's rmse: 0.0281849\n", + "[92]\tvalid_0's rmse: 0.0281936\n", + "[93]\tvalid_0's rmse: 0.0281859\n", + "[94]\tvalid_0's rmse: 0.028193\n", + "[95]\tvalid_0's rmse: 0.0281768\n", + "[96]\tvalid_0's rmse: 0.0281729\n", + "[97]\tvalid_0's rmse: 0.0281829\n", + "[98]\tvalid_0's rmse: 0.0281698\n", + "[99]\tvalid_0's rmse: 0.0281678\n", + "[100]\tvalid_0's rmse: 0.0281451\n", + "[101]\tvalid_0's rmse: 0.0281243\n", + "[102]\tvalid_0's rmse: 0.028098\n", + "[103]\tvalid_0's rmse: 0.028089\n", + "[104]\tvalid_0's rmse: 0.0280947\n", + "[105]\tvalid_0's rmse: 0.0280915\n", + "[106]\tvalid_0's rmse: 0.0280942\n", + "[107]\tvalid_0's rmse: 0.0280905\n", + "[108]\tvalid_0's rmse: 0.0280888\n", + "[109]\tvalid_0's rmse: 0.0280827\n", + "[110]\tvalid_0's rmse: 0.028075\n", + "[111]\tvalid_0's rmse: 0.0280506\n", + "[112]\tvalid_0's rmse: 0.0280414\n", + "[113]\tvalid_0's rmse: 0.0280254\n", + "[114]\tvalid_0's rmse: 0.0280016\n", + "[115]\tvalid_0's rmse: 0.0279858\n", + "[116]\tvalid_0's rmse: 0.027973\n", + "[117]\tvalid_0's rmse: 0.027962\n", + "[118]\tvalid_0's rmse: 0.0279404\n", + "[119]\tvalid_0's rmse: 0.0279082\n", + "[120]\tvalid_0's rmse: 0.0279064\n", + "[121]\tvalid_0's rmse: 0.0279041\n", + "[122]\tvalid_0's rmse: 0.0278874\n", + "[123]\tvalid_0's rmse: 0.0278608\n", + "[124]\tvalid_0's rmse: 0.0278517\n", + "[125]\tvalid_0's rmse: 0.0278507\n", + "[126]\tvalid_0's rmse: 0.0278408\n", + "[127]\tvalid_0's rmse: 0.0278322\n", + "[128]\tvalid_0's rmse: 0.0278089\n", + "[129]\tvalid_0's rmse: 0.0278084\n", + "[130]\tvalid_0's rmse: 0.0277843\n", + "[131]\tvalid_0's rmse: 0.0277892\n", + "[132]\tvalid_0's rmse: 0.0277827\n", + "[133]\tvalid_0's rmse: 0.0277758\n", + "[134]\tvalid_0's rmse: 0.0277766\n", + "[135]\tvalid_0's rmse: 0.0277853\n", + "[136]\tvalid_0's rmse: 0.0277744\n", + "[137]\tvalid_0's rmse: 0.0277624\n", + "[138]\tvalid_0's rmse: 0.0277481\n", + "[139]\tvalid_0's rmse: 0.027733\n", + "[140]\tvalid_0's rmse: 0.0277201\n", + "[141]\tvalid_0's rmse: 0.0277112\n", + "[142]\tvalid_0's rmse: 0.0277081\n", + "[143]\tvalid_0's rmse: 0.0276965\n", + "[144]\tvalid_0's rmse: 0.0276911\n", + "[145]\tvalid_0's rmse: 0.0276786\n", + "[146]\tvalid_0's rmse: 0.0276798\n", + "[147]\tvalid_0's rmse: 0.0276724\n", + "[148]\tvalid_0's rmse: 0.0276479\n", + "[149]\tvalid_0's rmse: 0.0276436\n", + "[150]\tvalid_0's rmse: 0.0276115\n", + "[151]\tvalid_0's rmse: 0.0275966\n", + "[152]\tvalid_0's rmse: 0.0275874\n", + "[153]\tvalid_0's rmse: 0.0275693\n", + "[154]\tvalid_0's rmse: 0.0275769\n", + "[155]\tvalid_0's rmse: 0.0275677\n", + "[156]\tvalid_0's rmse: 0.0275517\n", + "[157]\tvalid_0's rmse: 0.0275422\n", + "[158]\tvalid_0's rmse: 0.0275326\n", + "[159]\tvalid_0's rmse: 0.0275205\n", + "[160]\tvalid_0's rmse: 0.0275234\n", + "[161]\tvalid_0's rmse: 0.0275164\n", + "[162]\tvalid_0's rmse: 0.0275097\n", + "[163]\tvalid_0's rmse: 0.0275092\n", + "[164]\tvalid_0's rmse: 0.0274879\n", + "[165]\tvalid_0's rmse: 0.0274696\n", + "[166]\tvalid_0's rmse: 0.0274685\n", + "[167]\tvalid_0's rmse: 0.0274698\n", + "[168]\tvalid_0's rmse: 0.0274655\n", + "[169]\tvalid_0's rmse: 0.0274796\n", + "[170]\tvalid_0's rmse: 0.0274609\n", + "[171]\tvalid_0's rmse: 0.0274455\n", + "[172]\tvalid_0's rmse: 0.0274493\n", + "[173]\tvalid_0's rmse: 0.0274369\n", + "[174]\tvalid_0's rmse: 0.0274299\n", + "[175]\tvalid_0's rmse: 0.0274234\n", + "[176]\tvalid_0's rmse: 0.0274104\n", + "[177]\tvalid_0's rmse: 0.0273984\n", + "[178]\tvalid_0's rmse: 0.0273957\n", + "[179]\tvalid_0's rmse: 0.0273894\n", + "[180]\tvalid_0's rmse: 0.0273696\n", + "[181]\tvalid_0's rmse: 0.0273432\n", + "[182]\tvalid_0's rmse: 0.027342\n", + "[183]\tvalid_0's rmse: 0.0273113\n", + "[184]\tvalid_0's rmse: 0.0273034\n", + "[185]\tvalid_0's rmse: 0.0272787\n", + "[186]\tvalid_0's rmse: 0.027264\n", + "[187]\tvalid_0's rmse: 0.0272687\n", + "[188]\tvalid_0's rmse: 0.0272646\n", + "[189]\tvalid_0's rmse: 0.027269\n", + "[190]\tvalid_0's rmse: 0.0272657\n", + "[191]\tvalid_0's rmse: 0.0272644\n", + "[192]\tvalid_0's rmse: 0.027266\n", + "[193]\tvalid_0's rmse: 0.0272565\n", + "[194]\tvalid_0's rmse: 0.0272468\n", + "[195]\tvalid_0's rmse: 0.0272463\n", + "[196]\tvalid_0's rmse: 0.027222\n", + "[197]\tvalid_0's rmse: 0.0271824\n", + "[198]\tvalid_0's rmse: 0.02718\n", + "[199]\tvalid_0's rmse: 0.0271605\n", + "[200]\tvalid_0's rmse: 0.0271487\n", + "[201]\tvalid_0's rmse: 0.0271442\n", + "[202]\tvalid_0's rmse: 0.0271446\n", + "[203]\tvalid_0's rmse: 0.0271367\n", + "[204]\tvalid_0's rmse: 0.0271474\n", + "[205]\tvalid_0's rmse: 0.0271404\n", + "[206]\tvalid_0's rmse: 0.0271376\n", + "[207]\tvalid_0's rmse: 0.0271251\n", + "[208]\tvalid_0's rmse: 0.0271296\n", + "[209]\tvalid_0's rmse: 0.0271322\n", + "[210]\tvalid_0's rmse: 0.0271364\n", + "[211]\tvalid_0's rmse: 0.027128\n", + "[212]\tvalid_0's rmse: 0.0271156\n", + "[213]\tvalid_0's rmse: 0.0271112\n", + "[214]\tvalid_0's rmse: 0.0271093\n", + "[215]\tvalid_0's rmse: 0.0271047\n", + "[216]\tvalid_0's rmse: 0.0270906\n", + "[217]\tvalid_0's rmse: 0.0270941\n", + "[218]\tvalid_0's rmse: 0.0270903\n", + "[219]\tvalid_0's rmse: 0.0270865\n", + "[220]\tvalid_0's rmse: 0.0270923\n", + "[221]\tvalid_0's rmse: 0.0270943\n", + "[222]\tvalid_0's rmse: 0.0270857\n", + "[223]\tvalid_0's rmse: 0.0270803\n", + "[224]\tvalid_0's rmse: 0.0270701\n", + "[225]\tvalid_0's rmse: 0.0270644\n", + "[226]\tvalid_0's rmse: 0.0270723\n", + "[227]\tvalid_0's rmse: 0.0270654\n", + "[228]\tvalid_0's rmse: 0.027069\n", + "[229]\tvalid_0's rmse: 0.0270634\n", + "[230]\tvalid_0's rmse: 0.027059\n", + "[231]\tvalid_0's rmse: 0.0270559\n", + "[232]\tvalid_0's rmse: 0.0270541\n", + "[233]\tvalid_0's rmse: 0.0270546\n", + "[234]\tvalid_0's rmse: 0.0270555\n", + "[235]\tvalid_0's rmse: 0.0270554\n", + "[236]\tvalid_0's rmse: 0.0270527\n", + "[237]\tvalid_0's rmse: 0.027045\n", + "[238]\tvalid_0's rmse: 0.0270457\n", + "[239]\tvalid_0's rmse: 0.0270406\n", + "[240]\tvalid_0's rmse: 0.0270462\n", + "[241]\tvalid_0's rmse: 0.0270405\n", + "[242]\tvalid_0's rmse: 0.0270448\n", + "[243]\tvalid_0's rmse: 0.0270406\n", + "[244]\tvalid_0's rmse: 0.0270415\n", + "[245]\tvalid_0's rmse: 0.0270421\n", + "[246]\tvalid_0's rmse: 0.0270327\n", + "[247]\tvalid_0's rmse: 0.0270246\n", + "[248]\tvalid_0's rmse: 0.0270194\n", + "[249]\tvalid_0's rmse: 0.0270177\n", + "[250]\tvalid_0's rmse: 0.0270092\n", + "[251]\tvalid_0's rmse: 0.0270089\n", + "[252]\tvalid_0's rmse: 0.0270085\n", + "[253]\tvalid_0's rmse: 0.0269901\n", + "[254]\tvalid_0's rmse: 0.0269891\n", + "[255]\tvalid_0's rmse: 0.0269845\n", + "[256]\tvalid_0's rmse: 0.0269845\n", + "[257]\tvalid_0's rmse: 0.0269555\n", + "[258]\tvalid_0's rmse: 0.026949\n", + "[259]\tvalid_0's rmse: 0.0269442\n", + "[260]\tvalid_0's rmse: 0.0269473\n", + "[261]\tvalid_0's rmse: 0.026946\n", + "[262]\tvalid_0's rmse: 0.0269368\n", + "[263]\tvalid_0's rmse: 0.0269311\n", + "[264]\tvalid_0's rmse: 0.0269294\n", + "[265]\tvalid_0's rmse: 0.0269236\n", + "[266]\tvalid_0's rmse: 0.0269203\n", + "[267]\tvalid_0's rmse: 0.0269202\n", + "[268]\tvalid_0's rmse: 0.0269171\n", + "[269]\tvalid_0's rmse: 0.0269116\n", + "[270]\tvalid_0's rmse: 0.026909\n", + "[271]\tvalid_0's rmse: 0.0269102\n", + "[272]\tvalid_0's rmse: 0.0269057\n", + "[273]\tvalid_0's rmse: 0.0269039\n", + "[274]\tvalid_0's rmse: 0.0269003\n", + "[275]\tvalid_0's rmse: 0.0268963\n", + "[276]\tvalid_0's rmse: 0.0268905\n", + "[277]\tvalid_0's rmse: 0.0268955\n", + "[278]\tvalid_0's rmse: 0.0268977\n", + "[279]\tvalid_0's rmse: 0.0269015\n", + "[280]\tvalid_0's rmse: 0.0269013\n", + "[281]\tvalid_0's rmse: 0.0268988\n", + "[282]\tvalid_0's rmse: 0.0268985\n", + "[283]\tvalid_0's rmse: 0.0268988\n", + "[284]\tvalid_0's rmse: 0.0268935\n", + "[285]\tvalid_0's rmse: 0.0268928\n", + "[286]\tvalid_0's rmse: 0.0268898\n", + "[287]\tvalid_0's rmse: 0.0268862\n", + "[288]\tvalid_0's rmse: 0.0268827\n", + "[289]\tvalid_0's rmse: 0.0268775\n", + "[290]\tvalid_0's rmse: 0.0268797\n", + "[291]\tvalid_0's rmse: 0.0268748\n", + "[292]\tvalid_0's rmse: 0.0268375\n", + "[293]\tvalid_0's rmse: 0.026812\n", + "[294]\tvalid_0's rmse: 0.0268085\n", + "[295]\tvalid_0's rmse: 0.0268076\n", + "[296]\tvalid_0's rmse: 0.026803\n", + "[297]\tvalid_0's rmse: 0.0267955\n", + "[298]\tvalid_0's rmse: 0.0267948\n", + "[299]\tvalid_0's rmse: 0.0267962\n", + "[300]\tvalid_0's rmse: 0.0267929\n", + "[301]\tvalid_0's rmse: 0.026792\n", + "[302]\tvalid_0's rmse: 0.026785\n", + "[303]\tvalid_0's rmse: 0.0267811\n", + "[304]\tvalid_0's rmse: 0.0267687\n", + "[305]\tvalid_0's rmse: 0.0267677\n", + "[306]\tvalid_0's rmse: 0.0267618\n", + "[307]\tvalid_0's rmse: 0.0267611\n", + "[308]\tvalid_0's rmse: 0.0267278\n", + "[309]\tvalid_0's rmse: 0.026727\n", + "[310]\tvalid_0's rmse: 0.0267222\n", + "[311]\tvalid_0's rmse: 0.0267172\n", + "[312]\tvalid_0's rmse: 0.0267138\n", + "[313]\tvalid_0's rmse: 0.0267119\n", + "[314]\tvalid_0's rmse: 0.0267091\n", + "[315]\tvalid_0's rmse: 0.0267093\n", + "[316]\tvalid_0's rmse: 0.0267089\n", + "[317]\tvalid_0's rmse: 0.0267078\n", + "[318]\tvalid_0's rmse: 0.0267068\n", + "[319]\tvalid_0's rmse: 0.0267062\n", + "[320]\tvalid_0's rmse: 0.0267035\n", + "[321]\tvalid_0's rmse: 0.0267021\n", + "[322]\tvalid_0's rmse: 0.0266997\n", + "[323]\tvalid_0's rmse: 0.026701\n", + "[324]\tvalid_0's rmse: 0.0266997\n", + "[325]\tvalid_0's rmse: 0.0266999\n", + "[326]\tvalid_0's rmse: 0.0267043\n", + "[327]\tvalid_0's rmse: 0.0267048\n", + "[328]\tvalid_0's rmse: 0.0266922\n", + "[329]\tvalid_0's rmse: 0.0266828\n", + "[330]\tvalid_0's rmse: 0.0266837\n", + "[331]\tvalid_0's rmse: 0.0266863\n", + "[332]\tvalid_0's rmse: 0.0266764\n", + "[333]\tvalid_0's rmse: 0.0266769\n", + "[334]\tvalid_0's rmse: 0.0266686\n", + "[335]\tvalid_0's rmse: 0.0266701\n", + "[336]\tvalid_0's rmse: 0.0266739\n", + "[337]\tvalid_0's rmse: 0.0266749\n", + "[338]\tvalid_0's rmse: 0.0266749\n", + "[339]\tvalid_0's rmse: 0.0266745\n", + "[340]\tvalid_0's rmse: 0.0266731\n", + "[341]\tvalid_0's rmse: 0.0266707\n", + "[342]\tvalid_0's rmse: 0.0266627\n", + "[343]\tvalid_0's rmse: 0.0266618\n", + "[344]\tvalid_0's rmse: 0.0266607\n", + "[345]\tvalid_0's rmse: 0.0266595\n", + "[346]\tvalid_0's rmse: 0.0266483\n", + "[347]\tvalid_0's rmse: 0.0266501\n", + "[348]\tvalid_0's rmse: 0.0266484\n", + "[349]\tvalid_0's rmse: 0.0266469\n", + "[350]\tvalid_0's rmse: 0.0266446\n", + "[351]\tvalid_0's rmse: 0.0266422\n", + "[352]\tvalid_0's rmse: 0.0266445\n", + "[353]\tvalid_0's rmse: 0.026642\n", + "[354]\tvalid_0's rmse: 0.0266332\n", + "[355]\tvalid_0's rmse: 0.0266333\n", + "[356]\tvalid_0's rmse: 0.0266291\n", + "[357]\tvalid_0's rmse: 0.0266298\n", + "[358]\tvalid_0's rmse: 0.0266302\n", + "[359]\tvalid_0's rmse: 0.026626\n", + "[360]\tvalid_0's rmse: 0.0266191\n", + "[361]\tvalid_0's rmse: 0.0266188\n", + "[362]\tvalid_0's rmse: 0.0266132\n", + "[363]\tvalid_0's rmse: 0.0266094\n", + "[364]\tvalid_0's rmse: 0.0266022\n", + "[365]\tvalid_0's rmse: 0.0266027\n", + "[366]\tvalid_0's rmse: 0.0266001\n", + "[367]\tvalid_0's rmse: 0.0266011\n", + "[368]\tvalid_0's rmse: 0.0265957\n", + "[369]\tvalid_0's rmse: 0.026593\n", + "[370]\tvalid_0's rmse: 0.0265889\n", + "[371]\tvalid_0's rmse: 0.0265887\n", + "[372]\tvalid_0's rmse: 0.0265821\n", + "[373]\tvalid_0's rmse: 0.026579\n", + "[374]\tvalid_0's rmse: 0.0265765\n", + "[375]\tvalid_0's rmse: 0.0265742\n", + "[376]\tvalid_0's rmse: 0.0265724\n", + "[377]\tvalid_0's rmse: 0.0265683\n", + "[378]\tvalid_0's rmse: 0.0265671\n", + "[379]\tvalid_0's rmse: 0.0265605\n", + "[380]\tvalid_0's rmse: 0.026561\n", + "[381]\tvalid_0's rmse: 0.0265544\n", + "[382]\tvalid_0's rmse: 0.026555\n", + "[383]\tvalid_0's rmse: 0.0265526\n", + "[384]\tvalid_0's rmse: 0.0265483\n", + "[385]\tvalid_0's rmse: 0.0265519\n", + "[386]\tvalid_0's rmse: 0.0265494\n", + "[387]\tvalid_0's rmse: 0.0265502\n", + "[388]\tvalid_0's rmse: 0.0265525\n", + "[389]\tvalid_0's rmse: 0.0265567\n", + "[390]\tvalid_0's rmse: 0.0265403\n", + "[391]\tvalid_0's rmse: 0.0265361\n", + "[392]\tvalid_0's rmse: 0.0265342\n", + "[393]\tvalid_0's rmse: 0.026529\n", + "[394]\tvalid_0's rmse: 0.0265267\n", + "[395]\tvalid_0's rmse: 0.0265303\n", + "[396]\tvalid_0's rmse: 0.0265306\n", + "[397]\tvalid_0's rmse: 0.0265338\n", + "[398]\tvalid_0's rmse: 0.0265294\n", + "[399]\tvalid_0's rmse: 0.0265253\n", + "[400]\tvalid_0's rmse: 0.0265248\n", + "[401]\tvalid_0's rmse: 0.0265266\n", + "[402]\tvalid_0's rmse: 0.0265279\n", + "[403]\tvalid_0's rmse: 0.0265289\n", + "[404]\tvalid_0's rmse: 0.0265279\n", + "[405]\tvalid_0's rmse: 0.0265228\n", + "[406]\tvalid_0's rmse: 0.0265323\n", + "[407]\tvalid_0's rmse: 0.0265335\n", + "[408]\tvalid_0's rmse: 0.0265318\n", + "[409]\tvalid_0's rmse: 0.0265298\n", + "[410]\tvalid_0's rmse: 0.0265275\n", + "[411]\tvalid_0's rmse: 0.0265259\n", + "[412]\tvalid_0's rmse: 0.0265261\n", + "[413]\tvalid_0's rmse: 0.0265267\n", + "[414]\tvalid_0's rmse: 0.0265261\n", + "[415]\tvalid_0's rmse: 0.0265255\n", + "[416]\tvalid_0's rmse: 0.0265275\n", + "[417]\tvalid_0's rmse: 0.0265225\n", + "[418]\tvalid_0's rmse: 0.0265226\n", + "[419]\tvalid_0's rmse: 0.0265222\n", + "[420]\tvalid_0's rmse: 0.026521\n", + "[421]\tvalid_0's rmse: 0.0265169\n", + "[422]\tvalid_0's rmse: 0.0265139\n", + "[423]\tvalid_0's rmse: 0.0265126\n", + "[424]\tvalid_0's rmse: 0.0265136\n", + "[425]\tvalid_0's rmse: 0.0265079\n", + "[426]\tvalid_0's rmse: 0.0265017\n", + "[427]\tvalid_0's rmse: 0.0264914\n", + "[428]\tvalid_0's rmse: 0.026489\n", + "[429]\tvalid_0's rmse: 0.0264918\n", + "[430]\tvalid_0's rmse: 0.0264906\n", + "[431]\tvalid_0's rmse: 0.0264809\n", + "[432]\tvalid_0's rmse: 0.0264809\n", + "[433]\tvalid_0's rmse: 0.0264819\n", + "[434]\tvalid_0's rmse: 0.0264775\n", + "[435]\tvalid_0's rmse: 0.0264744\n", + "[436]\tvalid_0's rmse: 0.026474\n", + "[437]\tvalid_0's rmse: 0.0264713\n", + "[438]\tvalid_0's rmse: 0.0264702\n", + "[439]\tvalid_0's rmse: 0.0264686\n", + "[440]\tvalid_0's rmse: 0.0264654\n", + "[441]\tvalid_0's rmse: 0.0264663\n", + "[442]\tvalid_0's rmse: 0.0264543\n", + "[443]\tvalid_0's rmse: 0.0264538\n", + "[444]\tvalid_0's rmse: 0.0264507\n", + "[445]\tvalid_0's rmse: 0.0264509\n", + "[446]\tvalid_0's rmse: 0.0264456\n", + "[447]\tvalid_0's rmse: 0.0264483\n", + "[448]\tvalid_0's rmse: 0.0264169\n", + "[449]\tvalid_0's rmse: 0.0264151\n", + "[450]\tvalid_0's rmse: 0.0264172\n", + "[451]\tvalid_0's rmse: 0.0264171\n", + "[452]\tvalid_0's rmse: 0.0264175\n", + "[453]\tvalid_0's rmse: 0.0264149\n", + "[454]\tvalid_0's rmse: 0.0264144\n", + "[455]\tvalid_0's rmse: 0.0264154\n", + "[456]\tvalid_0's rmse: 0.0264147\n", + "[457]\tvalid_0's rmse: 0.0264118\n", + "[458]\tvalid_0's rmse: 0.0264138\n", + "[459]\tvalid_0's rmse: 0.0264151\n", + "[460]\tvalid_0's rmse: 0.026415\n", + "[461]\tvalid_0's rmse: 0.0264159\n", + "[462]\tvalid_0's rmse: 0.0264121\n", + "[463]\tvalid_0's rmse: 0.026414\n", + "[464]\tvalid_0's rmse: 0.0264093\n", + "[465]\tvalid_0's rmse: 0.0264118\n", + "[466]\tvalid_0's rmse: 0.0264118\n", + "[467]\tvalid_0's rmse: 0.0264099\n", + "[468]\tvalid_0's rmse: 0.0264113\n", + "[469]\tvalid_0's rmse: 0.0264101\n", + "[470]\tvalid_0's rmse: 0.0264118\n", + "[471]\tvalid_0's rmse: 0.0264092\n", + "[472]\tvalid_0's rmse: 0.0264044\n", + "[473]\tvalid_0's rmse: 0.0263975\n", + "[474]\tvalid_0's rmse: 0.0263909\n", + "[475]\tvalid_0's rmse: 0.0263866\n", + "[476]\tvalid_0's rmse: 0.0263848\n", + "[477]\tvalid_0's rmse: 0.0263839\n", + "[478]\tvalid_0's rmse: 0.0263787\n", + "[479]\tvalid_0's rmse: 0.0263797\n", + "[480]\tvalid_0's rmse: 0.0263769\n", + "[481]\tvalid_0's rmse: 0.0263744\n", + "[482]\tvalid_0's rmse: 0.0263693\n", + "[483]\tvalid_0's rmse: 0.0263673\n", + "[484]\tvalid_0's rmse: 0.0263626\n", + "[485]\tvalid_0's rmse: 0.0263591\n", + "[486]\tvalid_0's rmse: 0.0263569\n", + "[487]\tvalid_0's rmse: 0.0263557\n", + "[488]\tvalid_0's rmse: 0.0263559\n", + "[489]\tvalid_0's rmse: 0.026358\n", + "[490]\tvalid_0's rmse: 0.0263566\n", + "[491]\tvalid_0's rmse: 0.0263564\n", + "[492]\tvalid_0's rmse: 0.0263568\n", + "[493]\tvalid_0's rmse: 0.0263562\n", + "[494]\tvalid_0's rmse: 0.0263561\n", + "[495]\tvalid_0's rmse: 0.0263508\n", + "[496]\tvalid_0's rmse: 0.0263498\n", + "[497]\tvalid_0's rmse: 0.026346\n", + "[498]\tvalid_0's rmse: 0.0263474\n", + "[499]\tvalid_0's rmse: 0.026346\n", + "[500]\tvalid_0's rmse: 0.026342\n", + "[501]\tvalid_0's rmse: 0.0263415\n", + "[502]\tvalid_0's rmse: 0.0263404\n", + "[503]\tvalid_0's rmse: 0.0263355\n", + "[504]\tvalid_0's rmse: 0.0263363\n", + "[505]\tvalid_0's rmse: 0.0263362\n", + "[506]\tvalid_0's rmse: 0.0263356\n", + "[507]\tvalid_0's rmse: 0.0263345\n", + "[508]\tvalid_0's rmse: 0.0263343\n", + "[509]\tvalid_0's rmse: 0.0263294\n", + "[510]\tvalid_0's rmse: 0.0263279\n", + "[511]\tvalid_0's rmse: 0.0263274\n", + "[512]\tvalid_0's rmse: 0.0263227\n", + "[513]\tvalid_0's rmse: 0.0263228\n", + "[514]\tvalid_0's rmse: 0.0263178\n", + "[515]\tvalid_0's rmse: 0.0263175\n", + "[516]\tvalid_0's rmse: 0.0263152\n", + "[517]\tvalid_0's rmse: 0.0263062\n", + "[518]\tvalid_0's rmse: 0.0263098\n", + "[519]\tvalid_0's rmse: 0.0263065\n", + "[520]\tvalid_0's rmse: 0.0263043\n", + "[521]\tvalid_0's rmse: 0.0263029\n", + "[522]\tvalid_0's rmse: 0.0263005\n", + "[523]\tvalid_0's rmse: 0.0263013\n", + "[524]\tvalid_0's rmse: 0.0263\n", + "[525]\tvalid_0's rmse: 0.0262944\n", + "[526]\tvalid_0's rmse: 0.0262956\n", + "[527]\tvalid_0's rmse: 0.0262945\n", + "[528]\tvalid_0's rmse: 0.0262948\n", + "[529]\tvalid_0's rmse: 0.0262927\n", + "[530]\tvalid_0's rmse: 0.0262942\n", + "[531]\tvalid_0's rmse: 0.0262821\n", + "[532]\tvalid_0's rmse: 0.0262828\n", + "[533]\tvalid_0's rmse: 0.0262794\n", + "[534]\tvalid_0's rmse: 0.0262778\n", + "[535]\tvalid_0's rmse: 0.0262769\n", + "[536]\tvalid_0's rmse: 0.0262763\n", + "[537]\tvalid_0's rmse: 0.0262754\n", + "[538]\tvalid_0's rmse: 0.026275\n", + "[539]\tvalid_0's rmse: 0.0262742\n", + "[540]\tvalid_0's rmse: 0.02625\n", + "[541]\tvalid_0's rmse: 0.0262449\n", + "[542]\tvalid_0's rmse: 0.0262456\n", + "[543]\tvalid_0's rmse: 0.0262468\n", + "[544]\tvalid_0's rmse: 0.0262448\n", + "[545]\tvalid_0's rmse: 0.0262438\n", + "[546]\tvalid_0's rmse: 0.0262417\n", + "[547]\tvalid_0's rmse: 0.026231\n", + "[548]\tvalid_0's rmse: 0.0262339\n", + "[549]\tvalid_0's rmse: 0.0262327\n", + "[550]\tvalid_0's rmse: 0.0262289\n", + "[551]\tvalid_0's rmse: 0.0262244\n", + "[552]\tvalid_0's rmse: 0.0262075\n", + "[553]\tvalid_0's rmse: 0.0262031\n", + "[554]\tvalid_0's rmse: 0.0262028\n", + "[555]\tvalid_0's rmse: 0.0261984\n", + "[556]\tvalid_0's rmse: 0.0261981\n", + "[557]\tvalid_0's rmse: 0.0261977\n", + "[558]\tvalid_0's rmse: 0.0262004\n", + "[559]\tvalid_0's rmse: 0.0261955\n", + "[560]\tvalid_0's rmse: 0.0261955\n", + "[561]\tvalid_0's rmse: 0.0261947\n", + "[562]\tvalid_0's rmse: 0.0261983\n", + "[563]\tvalid_0's rmse: 0.0261981\n", + "[564]\tvalid_0's rmse: 0.0261992\n", + "[565]\tvalid_0's rmse: 0.0261974\n", + "[566]\tvalid_0's rmse: 0.0261936\n", + "[567]\tvalid_0's rmse: 0.0261954\n", + "[568]\tvalid_0's rmse: 0.0261987\n", + "[569]\tvalid_0's rmse: 0.0261837\n", + "[570]\tvalid_0's rmse: 0.0261839\n", + "[571]\tvalid_0's rmse: 0.026185\n", + "[572]\tvalid_0's rmse: 0.0261849\n", + "[573]\tvalid_0's rmse: 0.0261842\n", + "[574]\tvalid_0's rmse: 0.0261826\n", + "[575]\tvalid_0's rmse: 0.0261834\n", + "[576]\tvalid_0's rmse: 0.0261825\n", + "[577]\tvalid_0's rmse: 0.0261717\n", + "[578]\tvalid_0's rmse: 0.026171\n", + "[579]\tvalid_0's rmse: 0.0261609\n", + "[580]\tvalid_0's rmse: 0.02616\n", + "[581]\tvalid_0's rmse: 0.0261573\n", + "[582]\tvalid_0's rmse: 0.026159\n", + "[583]\tvalid_0's rmse: 0.0261576\n", + "[584]\tvalid_0's rmse: 0.0261557\n", + "[585]\tvalid_0's rmse: 0.0261582\n", + "[586]\tvalid_0's rmse: 0.026158\n", + "[587]\tvalid_0's rmse: 0.0261573\n", + "[588]\tvalid_0's rmse: 0.0261571\n", + "[589]\tvalid_0's rmse: 0.0261535\n", + "[590]\tvalid_0's rmse: 0.0261534\n", + "[591]\tvalid_0's rmse: 0.0261534\n", + "[592]\tvalid_0's rmse: 0.0261436\n", + "[593]\tvalid_0's rmse: 0.0261423\n", + "[594]\tvalid_0's rmse: 0.0261409\n", + "[595]\tvalid_0's rmse: 0.0261377\n", + "[596]\tvalid_0's rmse: 0.0261358\n", + "[597]\tvalid_0's rmse: 0.0261367\n", + "[598]\tvalid_0's rmse: 0.026137\n", + "[599]\tvalid_0's rmse: 0.0261357\n", + "[600]\tvalid_0's rmse: 0.0261344\n", + "[601]\tvalid_0's rmse: 0.0261345\n", + "[602]\tvalid_0's rmse: 0.026133\n", + "[603]\tvalid_0's rmse: 0.0261313\n", + "[604]\tvalid_0's rmse: 0.0261344\n", + "[605]\tvalid_0's rmse: 0.0261339\n", + "[606]\tvalid_0's rmse: 0.0261321\n", + "[607]\tvalid_0's rmse: 0.0261288\n", + "[608]\tvalid_0's rmse: 0.0261285\n", + "[609]\tvalid_0's rmse: 0.0261298\n", + "[610]\tvalid_0's rmse: 0.026131\n", + "[611]\tvalid_0's rmse: 0.0261265\n", + "[612]\tvalid_0's rmse: 0.0261043\n", + "[613]\tvalid_0's rmse: 0.0261023\n", + "[614]\tvalid_0's rmse: 0.0261013\n", + "[615]\tvalid_0's rmse: 0.0260971\n", + "[616]\tvalid_0's rmse: 0.0260979\n", + "[617]\tvalid_0's rmse: 0.0260987\n", + "[618]\tvalid_0's rmse: 0.0260728\n", + "[619]\tvalid_0's rmse: 0.026069\n", + "[620]\tvalid_0's rmse: 0.0260678\n", + "[621]\tvalid_0's rmse: 0.0260587\n", + "[622]\tvalid_0's rmse: 0.0260571\n", + "[623]\tvalid_0's rmse: 0.0260564\n", + "[624]\tvalid_0's rmse: 0.026054\n", + "[625]\tvalid_0's rmse: 0.0260544\n", + "[626]\tvalid_0's rmse: 0.0260502\n", + "[627]\tvalid_0's rmse: 0.0260444\n", + "[628]\tvalid_0's rmse: 0.026044\n", + "[629]\tvalid_0's rmse: 0.02604\n", + "[630]\tvalid_0's rmse: 0.0260386\n", + "[631]\tvalid_0's rmse: 0.0260394\n", + "[632]\tvalid_0's rmse: 0.0260378\n", + "[633]\tvalid_0's rmse: 0.0260397\n", + "[634]\tvalid_0's rmse: 0.0260395\n", + "[635]\tvalid_0's rmse: 0.0260398\n", + "[636]\tvalid_0's rmse: 0.0260376\n", + "[637]\tvalid_0's rmse: 0.026039\n", + "[638]\tvalid_0's rmse: 0.0260362\n", + "[639]\tvalid_0's rmse: 0.0260345\n", + "[640]\tvalid_0's rmse: 0.0260342\n", + "[641]\tvalid_0's rmse: 0.0260336\n", + "[642]\tvalid_0's rmse: 0.0260337\n", + "[643]\tvalid_0's rmse: 0.0260325\n", + "[644]\tvalid_0's rmse: 0.0260305\n", + "[645]\tvalid_0's rmse: 0.0260308\n", + "[646]\tvalid_0's rmse: 0.0260319\n", + "[647]\tvalid_0's rmse: 0.0260334\n", + "[648]\tvalid_0's rmse: 0.0260338\n", + "[649]\tvalid_0's rmse: 0.0260325\n", + "[650]\tvalid_0's rmse: 0.0260265\n", + "[651]\tvalid_0's rmse: 0.0260269\n", + "[652]\tvalid_0's rmse: 0.0260251\n", + "[653]\tvalid_0's rmse: 0.0260252\n", + "[654]\tvalid_0's rmse: 0.0260251\n", + "[655]\tvalid_0's rmse: 0.0260257\n", + "[656]\tvalid_0's rmse: 0.0260234\n", + "[657]\tvalid_0's rmse: 0.0260219\n", + "[658]\tvalid_0's rmse: 0.0260211\n", + "[659]\tvalid_0's rmse: 0.0260209\n", + "[660]\tvalid_0's rmse: 0.0260217\n", + "[661]\tvalid_0's rmse: 0.0260234\n", + "[662]\tvalid_0's rmse: 0.0260244\n", + "[663]\tvalid_0's rmse: 0.0260219\n", + "[664]\tvalid_0's rmse: 0.0260216\n", + "[665]\tvalid_0's rmse: 0.026023\n", + "[666]\tvalid_0's rmse: 0.026025\n", + "[667]\tvalid_0's rmse: 0.0260245\n", + "[668]\tvalid_0's rmse: 0.026022\n", + "[669]\tvalid_0's rmse: 0.0260216\n", + "[670]\tvalid_0's rmse: 0.0260231\n", + "[671]\tvalid_0's rmse: 0.0260226\n", + "[672]\tvalid_0's rmse: 0.0260197\n", + "[673]\tvalid_0's rmse: 0.0260191\n", + "[674]\tvalid_0's rmse: 0.0260193\n", + "[675]\tvalid_0's rmse: 0.0260178\n", + "[676]\tvalid_0's rmse: 0.0260171\n", + "[677]\tvalid_0's rmse: 0.0260153\n", + "[678]\tvalid_0's rmse: 0.0260153\n", + "[679]\tvalid_0's rmse: 0.026013\n", + "[680]\tvalid_0's rmse: 0.0260116\n", + "[681]\tvalid_0's rmse: 0.0260089\n", + "[682]\tvalid_0's rmse: 0.0260046\n", + "[683]\tvalid_0's rmse: 0.0260029\n", + "[684]\tvalid_0's rmse: 0.0260038\n", + "[685]\tvalid_0's rmse: 0.0260018\n", + "[686]\tvalid_0's rmse: 0.0260058\n", + "[687]\tvalid_0's rmse: 0.0260083\n", + "[688]\tvalid_0's rmse: 0.0260081\n", + "[689]\tvalid_0's rmse: 0.0260076\n", + "[690]\tvalid_0's rmse: 0.0260032\n", + "[691]\tvalid_0's rmse: 0.0260018\n", + "[692]\tvalid_0's rmse: 0.0260013\n", + "[693]\tvalid_0's rmse: 0.0260024\n", + "[694]\tvalid_0's rmse: 0.026003\n", + "[695]\tvalid_0's rmse: 0.0260023\n", + "[696]\tvalid_0's rmse: 0.0260022\n", + "[697]\tvalid_0's rmse: 0.0260018\n", + "[698]\tvalid_0's rmse: 0.0260004\n", + "[699]\tvalid_0's rmse: 0.0259998\n", + "[700]\tvalid_0's rmse: 0.0259961\n", + "[701]\tvalid_0's rmse: 0.0259964\n", + "[702]\tvalid_0's rmse: 0.0259942\n", + "[703]\tvalid_0's rmse: 0.0259951\n", + "[704]\tvalid_0's rmse: 0.0259918\n", + "[705]\tvalid_0's rmse: 0.0259913\n", + "[706]\tvalid_0's rmse: 0.0259895\n", + "[707]\tvalid_0's rmse: 0.0259881\n", + "[708]\tvalid_0's rmse: 0.0259869\n", + "[709]\tvalid_0's rmse: 0.0259796\n", + "[710]\tvalid_0's rmse: 0.0259789\n", + "[711]\tvalid_0's rmse: 0.0259766\n", + "[712]\tvalid_0's rmse: 0.0259758\n", + "[713]\tvalid_0's rmse: 0.0259746\n", + "[714]\tvalid_0's rmse: 0.0259744\n", + "[715]\tvalid_0's rmse: 0.0259761\n", + "[716]\tvalid_0's rmse: 0.0259832\n", + "[717]\tvalid_0's rmse: 0.0259813\n", + "[718]\tvalid_0's rmse: 0.0259823\n", + "[719]\tvalid_0's rmse: 0.0259815\n", + "[720]\tvalid_0's rmse: 0.0259701\n", + "[721]\tvalid_0's rmse: 0.0259693\n", + "[722]\tvalid_0's rmse: 0.0259679\n", + "[723]\tvalid_0's rmse: 0.0259668\n", + "[724]\tvalid_0's rmse: 0.0259646\n", + "[725]\tvalid_0's rmse: 0.0259639\n", + "[726]\tvalid_0's rmse: 0.0259672\n", + "[727]\tvalid_0's rmse: 0.025969\n", + "[728]\tvalid_0's rmse: 0.0259709\n", + "[729]\tvalid_0's rmse: 0.0259705\n", + "[730]\tvalid_0's rmse: 0.0259611\n", + "[731]\tvalid_0's rmse: 0.0259601\n", + "[732]\tvalid_0's rmse: 0.0259605\n", + "[733]\tvalid_0's rmse: 0.02596\n", + "[734]\tvalid_0's rmse: 0.0259589\n", + "[735]\tvalid_0's rmse: 0.0259593\n", + "[736]\tvalid_0's rmse: 0.0259612\n", + "[737]\tvalid_0's rmse: 0.0259617\n", + "[738]\tvalid_0's rmse: 0.0259604\n", + "[739]\tvalid_0's rmse: 0.0259609\n", + "[740]\tvalid_0's rmse: 0.0259575\n", + "[741]\tvalid_0's rmse: 0.0259552\n", + "[742]\tvalid_0's rmse: 0.025958\n", + "[743]\tvalid_0's rmse: 0.0259575\n", + "[744]\tvalid_0's rmse: 0.0259551\n", + "[745]\tvalid_0's rmse: 0.0259555\n", + "[746]\tvalid_0's rmse: 0.0259564\n", + "[747]\tvalid_0's rmse: 0.0259554\n", + "[748]\tvalid_0's rmse: 0.0259536\n", + "[749]\tvalid_0's rmse: 0.0259524\n", + "[750]\tvalid_0's rmse: 0.0259526\n", + "[751]\tvalid_0's rmse: 0.0259521\n", + "[752]\tvalid_0's rmse: 0.0259515\n", + "[753]\tvalid_0's rmse: 0.0259512\n", + "[754]\tvalid_0's rmse: 0.0259504\n", + "[755]\tvalid_0's rmse: 0.0259508\n", + "[756]\tvalid_0's rmse: 0.0259495\n", + "[757]\tvalid_0's rmse: 0.0259432\n", + "[758]\tvalid_0's rmse: 0.0259428\n", + "[759]\tvalid_0's rmse: 0.0259422\n", + "[760]\tvalid_0's rmse: 0.0259443\n", + "[761]\tvalid_0's rmse: 0.0259459\n", + "[762]\tvalid_0's rmse: 0.0259443\n", + "[763]\tvalid_0's rmse: 0.0259442\n", + "[764]\tvalid_0's rmse: 0.0259432\n", + "[765]\tvalid_0's rmse: 0.025944\n", + "[766]\tvalid_0's rmse: 0.0259433\n", + "[767]\tvalid_0's rmse: 0.0259438\n", + "[768]\tvalid_0's rmse: 0.0259408\n", + "[769]\tvalid_0's rmse: 0.0259404\n", + "[770]\tvalid_0's rmse: 0.0259398\n", + "[771]\tvalid_0's rmse: 0.0259375\n", + "[772]\tvalid_0's rmse: 0.025935\n", + "[773]\tvalid_0's rmse: 0.0259347\n", + "[774]\tvalid_0's rmse: 0.0259332\n", + "[775]\tvalid_0's rmse: 0.0259335\n", + "[776]\tvalid_0's rmse: 0.0259349\n", + "[777]\tvalid_0's rmse: 0.0259345\n", + "[778]\tvalid_0's rmse: 0.0259353\n", + "[779]\tvalid_0's rmse: 0.0259353\n", + "[780]\tvalid_0's rmse: 0.0259354\n", + "[781]\tvalid_0's rmse: 0.025935\n", + "[782]\tvalid_0's rmse: 0.0259362\n", + "[783]\tvalid_0's rmse: 0.0259348\n", + "[784]\tvalid_0's rmse: 0.0259347\n", + "[785]\tvalid_0's rmse: 0.0259361\n", + "[786]\tvalid_0's rmse: 0.0259417\n", + "[787]\tvalid_0's rmse: 0.0259418\n", + "[788]\tvalid_0's rmse: 0.0259422\n", + "[789]\tvalid_0's rmse: 0.0259422\n", + "[790]\tvalid_0's rmse: 0.0259419\n", + "[791]\tvalid_0's rmse: 0.0259409\n", + "[792]\tvalid_0's rmse: 0.0259409\n", + "[793]\tvalid_0's rmse: 0.0259433\n", + "[794]\tvalid_0's rmse: 0.0259438\n", + "[795]\tvalid_0's rmse: 0.0259415\n", + "[796]\tvalid_0's rmse: 0.0259423\n", + "[797]\tvalid_0's rmse: 0.0259435\n", + "[798]\tvalid_0's rmse: 0.0259416\n", + "[799]\tvalid_0's rmse: 0.0259469\n", + "[800]\tvalid_0's rmse: 0.0259488\n", + "[801]\tvalid_0's rmse: 0.0259505\n", + "[802]\tvalid_0's rmse: 0.025947\n", + "[803]\tvalid_0's rmse: 0.0259453\n", + "[804]\tvalid_0's rmse: 0.0259434\n", + "[805]\tvalid_0's rmse: 0.0259429\n", + "[806]\tvalid_0's rmse: 0.0259445\n", + "[807]\tvalid_0's rmse: 0.0259469\n", + "[808]\tvalid_0's rmse: 0.0259436\n", + "[809]\tvalid_0's rmse: 0.0259414\n", + "[810]\tvalid_0's rmse: 0.0259419\n", + "[811]\tvalid_0's rmse: 0.0259498\n", + "[812]\tvalid_0's rmse: 0.0259524\n", + "[813]\tvalid_0's rmse: 0.025951\n", + "[814]\tvalid_0's rmse: 0.0259468\n", + "[815]\tvalid_0's rmse: 0.0259462\n", + "[816]\tvalid_0's rmse: 0.0259387\n", + "[817]\tvalid_0's rmse: 0.0259382\n", + "[818]\tvalid_0's rmse: 0.0259381\n", + "[819]\tvalid_0's rmse: 0.0259391\n", + "[820]\tvalid_0's rmse: 0.0259437\n", + "[821]\tvalid_0's rmse: 0.0259455\n", + "[822]\tvalid_0's rmse: 0.0259458\n", + "[823]\tvalid_0's rmse: 0.0259459\n", + "[824]\tvalid_0's rmse: 0.0259441\n", + "[825]\tvalid_0's rmse: 0.0259408\n", + "[826]\tvalid_0's rmse: 0.0259412\n", + "[827]\tvalid_0's rmse: 0.0259419\n", + "[828]\tvalid_0's rmse: 0.0259434\n", + "[829]\tvalid_0's rmse: 0.0259429\n", + "[830]\tvalid_0's rmse: 0.0259448\n", + "[831]\tvalid_0's rmse: 0.0259442\n", + "[832]\tvalid_0's rmse: 0.0259424\n", + "[833]\tvalid_0's rmse: 0.0259416\n", + "[834]\tvalid_0's rmse: 0.0259425\n", + "[835]\tvalid_0's rmse: 0.025941\n", + "[836]\tvalid_0's rmse: 0.02594\n", + "[837]\tvalid_0's rmse: 0.0259396\n", + "[838]\tvalid_0's rmse: 0.0259382\n", + "[839]\tvalid_0's rmse: 0.0259367\n", + "[840]\tvalid_0's rmse: 0.0259381\n", + "[841]\tvalid_0's rmse: 0.0259379\n", + "[842]\tvalid_0's rmse: 0.0259268\n", + "[843]\tvalid_0's rmse: 0.0259259\n", + "[844]\tvalid_0's rmse: 0.0259228\n", + "[845]\tvalid_0's rmse: 0.0259228\n", + "[846]\tvalid_0's rmse: 0.0259187\n", + "[847]\tvalid_0's rmse: 0.0259171\n", + "[848]\tvalid_0's rmse: 0.0259177\n", + "[849]\tvalid_0's rmse: 0.0259164\n", + "[850]\tvalid_0's rmse: 0.0259161\n", + "[851]\tvalid_0's rmse: 0.0259161\n", + "[852]\tvalid_0's rmse: 0.0259147\n", + "[853]\tvalid_0's rmse: 0.0259145\n", + "[854]\tvalid_0's rmse: 0.0259144\n", + "[855]\tvalid_0's rmse: 0.0259125\n", + "[856]\tvalid_0's rmse: 0.0259127\n", + "[857]\tvalid_0's rmse: 0.0259115\n", + "[858]\tvalid_0's rmse: 0.0259104\n", + "[859]\tvalid_0's rmse: 0.0259119\n", + "[860]\tvalid_0's rmse: 0.0259109\n", + "[861]\tvalid_0's rmse: 0.02591\n", + "[862]\tvalid_0's rmse: 0.0259099\n", + "[863]\tvalid_0's rmse: 0.0259097\n", + "[864]\tvalid_0's rmse: 0.0259133\n", + "[865]\tvalid_0's rmse: 0.0259116\n", + "[866]\tvalid_0's rmse: 0.0259111\n", + "[867]\tvalid_0's rmse: 0.0259095\n", + "[868]\tvalid_0's rmse: 0.0258982\n", + "[869]\tvalid_0's rmse: 0.0258979\n", + "[870]\tvalid_0's rmse: 0.0258956\n", + "[871]\tvalid_0's rmse: 0.0258967\n", + "[872]\tvalid_0's rmse: 0.0258972\n", + "[873]\tvalid_0's rmse: 0.0258971\n", + "[874]\tvalid_0's rmse: 0.0259015\n", + "[875]\tvalid_0's rmse: 0.0258999\n", + "[876]\tvalid_0's rmse: 0.0258987\n", + "[877]\tvalid_0's rmse: 0.0258987\n", + "[878]\tvalid_0's rmse: 0.0258985\n", + "[879]\tvalid_0's rmse: 0.0259\n", + "[880]\tvalid_0's rmse: 0.0259008\n", + "[881]\tvalid_0's rmse: 0.0259018\n", + "[882]\tvalid_0's rmse: 0.0259037\n", + "[883]\tvalid_0's rmse: 0.0259048\n", + "[884]\tvalid_0's rmse: 0.0259063\n", + "[885]\tvalid_0's rmse: 0.0259055\n", + "[886]\tvalid_0's rmse: 0.0259052\n", + "[887]\tvalid_0's rmse: 0.0259047\n", + "[888]\tvalid_0's rmse: 0.0259042\n", + "[889]\tvalid_0's rmse: 0.0259046\n", + "[890]\tvalid_0's rmse: 0.0259049\n", + "[891]\tvalid_0's rmse: 0.0259044\n", + "[892]\tvalid_0's rmse: 0.0259046\n", + "[893]\tvalid_0's rmse: 0.0259035\n", + "[894]\tvalid_0's rmse: 0.0259016\n", + "[895]\tvalid_0's rmse: 0.0259031\n", + "[896]\tvalid_0's rmse: 0.0259025\n", + "[897]\tvalid_0's rmse: 0.0259047\n", + "[898]\tvalid_0's rmse: 0.0259051\n", + "[899]\tvalid_0's rmse: 0.0259101\n", + "[900]\tvalid_0's rmse: 0.0259099\n", + "[901]\tvalid_0's rmse: 0.0259106\n", + "[902]\tvalid_0's rmse: 0.0259101\n", + "[903]\tvalid_0's rmse: 0.0259044\n", + "[904]\tvalid_0's rmse: 0.0259034\n", + "[905]\tvalid_0's rmse: 0.0259038\n", + "[906]\tvalid_0's rmse: 0.0259047\n", + "[907]\tvalid_0's rmse: 0.0259061\n", + "[908]\tvalid_0's rmse: 0.025906\n", + "[909]\tvalid_0's rmse: 0.025901\n", + "[910]\tvalid_0's rmse: 0.0258971\n", + "[911]\tvalid_0's rmse: 0.0258968\n", + "[912]\tvalid_0's rmse: 0.0258973\n", + "[913]\tvalid_0's rmse: 0.0258965\n", + "[914]\tvalid_0's rmse: 0.025898\n", + "[915]\tvalid_0's rmse: 0.0258982\n", + "[916]\tvalid_0's rmse: 0.0258981\n", + "[917]\tvalid_0's rmse: 0.0258952\n", + "[918]\tvalid_0's rmse: 0.0258949\n", + "[919]\tvalid_0's rmse: 0.0258947\n", + "[920]\tvalid_0's rmse: 0.0258959\n", + "[921]\tvalid_0's rmse: 0.0258954\n", + "[922]\tvalid_0's rmse: 0.0258947\n", + "[923]\tvalid_0's rmse: 0.0258946\n", + "[924]\tvalid_0's rmse: 0.0258931\n", + "[925]\tvalid_0's rmse: 0.0258945\n", + "[926]\tvalid_0's rmse: 0.0258925\n", + "[927]\tvalid_0's rmse: 0.0258899\n", + "[928]\tvalid_0's rmse: 0.0258898\n", + "[929]\tvalid_0's rmse: 0.0258914\n", + "[930]\tvalid_0's rmse: 0.0258912\n", + "[931]\tvalid_0's rmse: 0.025892\n", + "[932]\tvalid_0's rmse: 0.025893\n", + "[933]\tvalid_0's rmse: 0.0258918\n", + "[934]\tvalid_0's rmse: 0.0258882\n", + "[935]\tvalid_0's rmse: 0.0258882\n", + "[936]\tvalid_0's rmse: 0.0258871\n", + "[937]\tvalid_0's rmse: 0.0258879\n", + "[938]\tvalid_0's rmse: 0.0258857\n", + "[939]\tvalid_0's rmse: 0.0258855\n", + "[940]\tvalid_0's rmse: 0.0258856\n", + "[941]\tvalid_0's rmse: 0.0258855\n", + "[942]\tvalid_0's rmse: 0.0258857\n", + "[943]\tvalid_0's rmse: 0.0258857\n", + "[944]\tvalid_0's rmse: 0.0258861\n", + "[945]\tvalid_0's rmse: 0.0258858\n", + "[946]\tvalid_0's rmse: 0.0258865\n", + "[947]\tvalid_0's rmse: 0.0258875\n", + "[948]\tvalid_0's rmse: 0.0258872\n", + "[949]\tvalid_0's rmse: 0.0258872\n", + "[950]\tvalid_0's rmse: 0.0258866\n", + "[951]\tvalid_0's rmse: 0.0258888\n", + "[952]\tvalid_0's rmse: 0.0258892\n", + "[953]\tvalid_0's rmse: 0.0258835\n", + "[954]\tvalid_0's rmse: 0.0258817\n", + "[955]\tvalid_0's rmse: 0.0258817\n", + "[956]\tvalid_0's rmse: 0.0258786\n", + "[957]\tvalid_0's rmse: 0.0258788\n", + "[958]\tvalid_0's rmse: 0.0258788\n", + "[959]\tvalid_0's rmse: 0.0258798\n", + "[960]\tvalid_0's rmse: 0.0258797\n", + "[961]\tvalid_0's rmse: 0.0258797\n", + "[962]\tvalid_0's rmse: 0.0258776\n", + "[963]\tvalid_0's rmse: 0.0258773\n", + "[964]\tvalid_0's rmse: 0.025877\n", + "[965]\tvalid_0's rmse: 0.0258773\n", + "[966]\tvalid_0's rmse: 0.025879\n", + "[967]\tvalid_0's rmse: 0.0258802\n", + "[968]\tvalid_0's rmse: 0.0258794\n", + "[969]\tvalid_0's rmse: 0.02588\n", + "[970]\tvalid_0's rmse: 0.0258797\n", + "[971]\tvalid_0's rmse: 0.0258782\n", + "[972]\tvalid_0's rmse: 0.0258827\n", + "[973]\tvalid_0's rmse: 0.0258842\n", + "[974]\tvalid_0's rmse: 0.0258837\n", + "[975]\tvalid_0's rmse: 0.0258827\n", + "[976]\tvalid_0's rmse: 0.0258818\n", + "[977]\tvalid_0's rmse: 0.0258811\n", + "[978]\tvalid_0's rmse: 0.0258813\n", + "[979]\tvalid_0's rmse: 0.0258813\n", + "[980]\tvalid_0's rmse: 0.0258805\n", + "[981]\tvalid_0's rmse: 0.0258805\n", + "[982]\tvalid_0's rmse: 0.0258791\n", + "[983]\tvalid_0's rmse: 0.0258764\n", + "[984]\tvalid_0's rmse: 0.0258765\n", + "[985]\tvalid_0's rmse: 0.0258748\n", + "[986]\tvalid_0's rmse: 0.025877\n", + "[987]\tvalid_0's rmse: 0.025878\n", + "[988]\tvalid_0's rmse: 0.0258776\n", + "[989]\tvalid_0's rmse: 0.0258761\n", + "[990]\tvalid_0's rmse: 0.0258762\n", + "[991]\tvalid_0's rmse: 0.0258591\n", + "[992]\tvalid_0's rmse: 0.0258595\n", + "[993]\tvalid_0's rmse: 0.0258594\n", + "[994]\tvalid_0's rmse: 0.0258605\n", + "[995]\tvalid_0's rmse: 0.02586\n", + "[996]\tvalid_0's rmse: 0.0258582\n", + "[997]\tvalid_0's rmse: 0.0258576\n", + "[998]\tvalid_0's rmse: 0.0258556\n", + "[999]\tvalid_0's rmse: 0.0258562\n", + "[1000]\tvalid_0's rmse: 0.0258543\n", + "[1001]\tvalid_0's rmse: 0.0258523\n", + "[1002]\tvalid_0's rmse: 0.0258534\n", + "[1003]\tvalid_0's rmse: 0.0258537\n", + "[1004]\tvalid_0's rmse: 0.0258546\n", + "[1005]\tvalid_0's rmse: 0.0258533\n", + "[1006]\tvalid_0's rmse: 0.0258519\n", + "[1007]\tvalid_0's rmse: 0.0258508\n", + "[1008]\tvalid_0's rmse: 0.0258508\n", + "[1009]\tvalid_0's rmse: 0.0258509\n", + "[1010]\tvalid_0's rmse: 0.0258469\n", + "[1011]\tvalid_0's rmse: 0.025851\n", + "[1012]\tvalid_0's rmse: 0.0258512\n", + "[1013]\tvalid_0's rmse: 0.0258474\n", + "[1014]\tvalid_0's rmse: 0.0258468\n", + "[1015]\tvalid_0's rmse: 0.0258432\n", + "[1016]\tvalid_0's rmse: 0.0258409\n", + "[1017]\tvalid_0's rmse: 0.0258283\n", + "[1018]\tvalid_0's rmse: 0.0258284\n", + "[1019]\tvalid_0's rmse: 0.0258254\n", + "[1020]\tvalid_0's rmse: 0.0258244\n", + "[1021]\tvalid_0's rmse: 0.0258246\n", + "[1022]\tvalid_0's rmse: 0.0258249\n", + "[1023]\tvalid_0's rmse: 0.0258246\n", + "[1024]\tvalid_0's rmse: 0.0258215\n", + "[1025]\tvalid_0's rmse: 0.0258211\n", + "[1026]\tvalid_0's rmse: 0.0258215\n", + "[1027]\tvalid_0's rmse: 0.0258213\n", + "[1028]\tvalid_0's rmse: 0.0258215\n", + "[1029]\tvalid_0's rmse: 0.0258233\n", + "[1030]\tvalid_0's rmse: 0.0258232\n", + "[1031]\tvalid_0's rmse: 0.0258233\n", + "[1032]\tvalid_0's rmse: 0.0258191\n", + "[1033]\tvalid_0's rmse: 0.0258196\n", + "[1034]\tvalid_0's rmse: 0.0258169\n", + "[1035]\tvalid_0's rmse: 0.025816\n", + "[1036]\tvalid_0's rmse: 0.0258137\n", + "[1037]\tvalid_0's rmse: 0.0258143\n", + "[1038]\tvalid_0's rmse: 0.0258121\n", + "[1039]\tvalid_0's rmse: 0.0258055\n", + "[1040]\tvalid_0's rmse: 0.0258055\n", + "[1041]\tvalid_0's rmse: 0.0258079\n", + "[1042]\tvalid_0's rmse: 0.0258097\n", + "[1043]\tvalid_0's rmse: 0.0258097\n", + "[1044]\tvalid_0's rmse: 0.0258109\n", + "[1045]\tvalid_0's rmse: 0.0258118\n", + "[1046]\tvalid_0's rmse: 0.0258121\n", + "[1047]\tvalid_0's rmse: 0.0258112\n", + "[1048]\tvalid_0's rmse: 0.0258103\n", + "[1049]\tvalid_0's rmse: 0.0258102\n", + "[1050]\tvalid_0's rmse: 0.0258113\n", + "[1051]\tvalid_0's rmse: 0.0258119\n", + "[1052]\tvalid_0's rmse: 0.0258115\n", + "[1053]\tvalid_0's rmse: 0.0258116\n", + "[1054]\tvalid_0's rmse: 0.0258114\n", + "[1055]\tvalid_0's rmse: 0.0258098\n", + "[1056]\tvalid_0's rmse: 0.0258097\n", + "[1057]\tvalid_0's rmse: 0.0258085\n", + "[1058]\tvalid_0's rmse: 0.0258088\n", + "[1059]\tvalid_0's rmse: 0.0258058\n", + "[1060]\tvalid_0's rmse: 0.0258033\n", + "[1061]\tvalid_0's rmse: 0.0257999\n", + "[1062]\tvalid_0's rmse: 0.025795\n", + "[1063]\tvalid_0's rmse: 0.0257936\n", + "[1064]\tvalid_0's rmse: 0.0257928\n", + "[1065]\tvalid_0's rmse: 0.025793\n", + "[1066]\tvalid_0's rmse: 0.0257934\n", + "[1067]\tvalid_0's rmse: 0.0257928\n", + "[1068]\tvalid_0's rmse: 0.0257786\n", + "[1069]\tvalid_0's rmse: 0.0257783\n", + "[1070]\tvalid_0's rmse: 0.0257778\n", + "[1071]\tvalid_0's rmse: 0.025777\n", + "[1072]\tvalid_0's rmse: 0.0257782\n", + "[1073]\tvalid_0's rmse: 0.0257767\n", + "[1074]\tvalid_0's rmse: 0.0257763\n", + "[1075]\tvalid_0's rmse: 0.0257764\n", + "[1076]\tvalid_0's rmse: 0.025776\n", + "[1077]\tvalid_0's rmse: 0.0257776\n", + "[1078]\tvalid_0's rmse: 0.0257782\n", + "[1079]\tvalid_0's rmse: 0.0257782\n", + "[1080]\tvalid_0's rmse: 0.0257781\n", + "[1081]\tvalid_0's rmse: 0.025776\n", + "[1082]\tvalid_0's rmse: 0.0257761\n", + "[1083]\tvalid_0's rmse: 0.0257762\n", + "[1084]\tvalid_0's rmse: 0.0257773\n", + "[1085]\tvalid_0's rmse: 0.0257783\n", + "[1086]\tvalid_0's rmse: 0.0257785\n", + "[1087]\tvalid_0's rmse: 0.0257788\n", + "[1088]\tvalid_0's rmse: 0.0257792\n", + "[1089]\tvalid_0's rmse: 0.02578\n", + "[1090]\tvalid_0's rmse: 0.0257788\n", + "[1091]\tvalid_0's rmse: 0.0257776\n", + "[1092]\tvalid_0's rmse: 0.0257795\n", + "[1093]\tvalid_0's rmse: 0.0257788\n", + "[1094]\tvalid_0's rmse: 0.0257782\n", + "[1095]\tvalid_0's rmse: 0.025778\n", + "[1096]\tvalid_0's rmse: 0.0257811\n", + "[1097]\tvalid_0's rmse: 0.0257814\n", + "[1098]\tvalid_0's rmse: 0.0257792\n", + "[1099]\tvalid_0's rmse: 0.0257788\n", + "[1100]\tvalid_0's rmse: 0.0257798\n", + "[1101]\tvalid_0's rmse: 0.0257804\n", + "[1102]\tvalid_0's rmse: 0.0257804\n", + "[1103]\tvalid_0's rmse: 0.0257781\n", + "[1104]\tvalid_0's rmse: 0.0257786\n", + "[1105]\tvalid_0's rmse: 0.0257794\n", + "[1106]\tvalid_0's rmse: 0.0257793\n", + "[1107]\tvalid_0's rmse: 0.0257795\n", + "[1108]\tvalid_0's rmse: 0.0257792\n", + "[1109]\tvalid_0's rmse: 0.0257754\n", + "[1110]\tvalid_0's rmse: 0.0257772\n", + "[1111]\tvalid_0's rmse: 0.0257766\n", + "[1112]\tvalid_0's rmse: 0.0257761\n", + "[1113]\tvalid_0's rmse: 0.0257759\n", + "[1114]\tvalid_0's rmse: 0.0257754\n", + "[1115]\tvalid_0's rmse: 0.0257751\n", + "[1116]\tvalid_0's rmse: 0.0257731\n", + "[1117]\tvalid_0's rmse: 0.0257728\n", + "[1118]\tvalid_0's rmse: 0.0257725\n", + "[1119]\tvalid_0's rmse: 0.025771\n", + "[1120]\tvalid_0's rmse: 0.0257698\n", + "[1121]\tvalid_0's rmse: 0.0257699\n", + "[1122]\tvalid_0's rmse: 0.0257698\n", + "[1123]\tvalid_0's rmse: 0.0257685\n", + "[1124]\tvalid_0's rmse: 0.0257678\n", + "[1125]\tvalid_0's rmse: 0.0257679\n", + "[1126]\tvalid_0's rmse: 0.0257667\n", + "[1127]\tvalid_0's rmse: 0.0257669\n", + "[1128]\tvalid_0's rmse: 0.0257648\n", + "[1129]\tvalid_0's rmse: 0.0257647\n", + "[1130]\tvalid_0's rmse: 0.0257651\n", + "[1131]\tvalid_0's rmse: 0.0257653\n", + "[1132]\tvalid_0's rmse: 0.0257657\n", + "[1133]\tvalid_0's rmse: 0.0257652\n", + "[1134]\tvalid_0's rmse: 0.0257653\n", + "[1135]\tvalid_0's rmse: 0.0257593\n", + "[1136]\tvalid_0's rmse: 0.0257585\n", + "[1137]\tvalid_0's rmse: 0.0257583\n", + "[1138]\tvalid_0's rmse: 0.0257575\n", + "[1139]\tvalid_0's rmse: 0.0257571\n", + "[1140]\tvalid_0's rmse: 0.0257562\n", + "[1141]\tvalid_0's rmse: 0.0257562\n", + "[1142]\tvalid_0's rmse: 0.0257561\n", + "[1143]\tvalid_0's rmse: 0.025755\n", + "[1144]\tvalid_0's rmse: 0.025754\n", + "[1145]\tvalid_0's rmse: 0.0257534\n", + "[1146]\tvalid_0's rmse: 0.0257535\n", + "[1147]\tvalid_0's rmse: 0.0257503\n", + "[1148]\tvalid_0's rmse: 0.0257519\n", + "[1149]\tvalid_0's rmse: 0.0257486\n", + "[1150]\tvalid_0's rmse: 0.0257485\n", + "[1151]\tvalid_0's rmse: 0.0257492\n", + "[1152]\tvalid_0's rmse: 0.0257531\n", + "[1153]\tvalid_0's rmse: 0.0257529\n", + "[1154]\tvalid_0's rmse: 0.0257521\n", + "[1155]\tvalid_0's rmse: 0.0257517\n", + "[1156]\tvalid_0's rmse: 0.0257545\n", + "[1157]\tvalid_0's rmse: 0.0257556\n", + "[1158]\tvalid_0's rmse: 0.0257559\n", + "[1159]\tvalid_0's rmse: 0.0257578\n", + "[1160]\tvalid_0's rmse: 0.0257567\n", + "[1161]\tvalid_0's rmse: 0.0257569\n", + "[1162]\tvalid_0's rmse: 0.0257559\n", + "[1163]\tvalid_0's rmse: 0.0257577\n", + "[1164]\tvalid_0's rmse: 0.0257551\n", + "[1165]\tvalid_0's rmse: 0.025756\n", + "[1166]\tvalid_0's rmse: 0.0257558\n", + "[1167]\tvalid_0's rmse: 0.0257561\n", + "[1168]\tvalid_0's rmse: 0.0257562\n", + "[1169]\tvalid_0's rmse: 0.0257558\n", + "[1170]\tvalid_0's rmse: 0.0257527\n", + "[1171]\tvalid_0's rmse: 0.0257479\n", + "[1172]\tvalid_0's rmse: 0.0257481\n", + "[1173]\tvalid_0's rmse: 0.0257445\n", + "[1174]\tvalid_0's rmse: 0.0257442\n", + "[1175]\tvalid_0's rmse: 0.0257454\n", + "[1176]\tvalid_0's rmse: 0.0257446\n", + "[1177]\tvalid_0's rmse: 0.0257455\n", + "[1178]\tvalid_0's rmse: 0.0257465\n", + "[1179]\tvalid_0's rmse: 0.0257483\n", + "[1180]\tvalid_0's rmse: 0.0257494\n", + "[1181]\tvalid_0's rmse: 0.025749\n", + "[1182]\tvalid_0's rmse: 0.0257492\n", + "[1183]\tvalid_0's rmse: 0.0257497\n", + "[1184]\tvalid_0's rmse: 0.02575\n", + "[1185]\tvalid_0's rmse: 0.0257441\n", + "[1186]\tvalid_0's rmse: 0.0257412\n", + "[1187]\tvalid_0's rmse: 0.0257376\n", + "[1188]\tvalid_0's rmse: 0.025734\n", + "[1189]\tvalid_0's rmse: 0.0257333\n", + "[1190]\tvalid_0's rmse: 0.0257326\n", + "[1191]\tvalid_0's rmse: 0.0257325\n", + "[1192]\tvalid_0's rmse: 0.0257347\n", + "[1193]\tvalid_0's rmse: 0.0257189\n", + "[1194]\tvalid_0's rmse: 0.0257085\n", + "[1195]\tvalid_0's rmse: 0.0257073\n", + "[1196]\tvalid_0's rmse: 0.025707\n", + "[1197]\tvalid_0's rmse: 0.0257055\n", + "[1198]\tvalid_0's rmse: 0.0257056\n", + "[1199]\tvalid_0's rmse: 0.0257043\n", + "[1200]\tvalid_0's rmse: 0.0257063\n", + "[1201]\tvalid_0's rmse: 0.0257056\n", + "[1202]\tvalid_0's rmse: 0.0257059\n", + "[1203]\tvalid_0's rmse: 0.0257041\n", + "[1204]\tvalid_0's rmse: 0.0257018\n", + "[1205]\tvalid_0's rmse: 0.025702\n", + "[1206]\tvalid_0's rmse: 0.0257017\n", + "[1207]\tvalid_0's rmse: 0.0256966\n", + "[1208]\tvalid_0's rmse: 0.0256931\n", + "[1209]\tvalid_0's rmse: 0.0256931\n", + "[1210]\tvalid_0's rmse: 0.025693\n", + "[1211]\tvalid_0's rmse: 0.0256934\n", + "[1212]\tvalid_0's rmse: 0.0256969\n", + "[1213]\tvalid_0's rmse: 0.0256973\n", + "[1214]\tvalid_0's rmse: 0.0256982\n", + "[1215]\tvalid_0's rmse: 0.0256965\n", + "[1216]\tvalid_0's rmse: 0.0256955\n", + "[1217]\tvalid_0's rmse: 0.0256956\n", + "[1218]\tvalid_0's rmse: 0.0256956\n", + "[1219]\tvalid_0's rmse: 0.0256943\n", + "[1220]\tvalid_0's rmse: 0.0256932\n", + "[1221]\tvalid_0's rmse: 0.0256944\n", + "[1222]\tvalid_0's rmse: 0.0256935\n", + "[1223]\tvalid_0's rmse: 0.0256947\n", + "[1224]\tvalid_0's rmse: 0.0256951\n", + "[1225]\tvalid_0's rmse: 0.0256953\n", + "[1226]\tvalid_0's rmse: 0.0256967\n", + "[1227]\tvalid_0's rmse: 0.0256974\n", + "[1228]\tvalid_0's rmse: 0.0256971\n", + "[1229]\tvalid_0's rmse: 0.025697\n", + "[1230]\tvalid_0's rmse: 0.0256973\n", + "[1231]\tvalid_0's rmse: 0.0256971\n", + "[1232]\tvalid_0's rmse: 0.0256976\n", + "[1233]\tvalid_0's rmse: 0.0256976\n", + "[1234]\tvalid_0's rmse: 0.025696\n", + "[1235]\tvalid_0's rmse: 0.0256965\n", + "[1236]\tvalid_0's rmse: 0.0256961\n", + "[1237]\tvalid_0's rmse: 0.0256962\n", + "[1238]\tvalid_0's rmse: 0.0256996\n", + "[1239]\tvalid_0's rmse: 0.0257003\n", + "[1240]\tvalid_0's rmse: 0.0257023\n", + "[1241]\tvalid_0's rmse: 0.0257018\n", + "[1242]\tvalid_0's rmse: 0.0257016\n", + "[1243]\tvalid_0's rmse: 0.0257023\n", + "[1244]\tvalid_0's rmse: 0.0257013\n", + "[1245]\tvalid_0's rmse: 0.0256968\n", + "[1246]\tvalid_0's rmse: 0.0256967\n", + "[1247]\tvalid_0's rmse: 0.0256935\n", + "[1248]\tvalid_0's rmse: 0.0256932\n", + "[1249]\tvalid_0's rmse: 0.0256959\n", + "[1250]\tvalid_0's rmse: 0.025695\n", + "[1251]\tvalid_0's rmse: 0.025695\n", + "[1252]\tvalid_0's rmse: 0.0256954\n", + "[1253]\tvalid_0's rmse: 0.0256932\n", + "[1254]\tvalid_0's rmse: 0.0256933\n", + "[1255]\tvalid_0's rmse: 0.0256942\n", + "[1256]\tvalid_0's rmse: 0.0256929\n", + "[1257]\tvalid_0's rmse: 0.0256918\n", + "[1258]\tvalid_0's rmse: 0.0256916\n", + "[1259]\tvalid_0's rmse: 0.0256913\n", + "[1260]\tvalid_0's rmse: 0.0256924\n", + "[1261]\tvalid_0's rmse: 0.0256909\n", + "[1262]\tvalid_0's rmse: 0.0256907\n", + "[1263]\tvalid_0's rmse: 0.0256914\n", + "[1264]\tvalid_0's rmse: 0.0256819\n", + "[1265]\tvalid_0's rmse: 0.0256823\n", + "[1266]\tvalid_0's rmse: 0.0256822\n", + "[1267]\tvalid_0's rmse: 0.0256828\n", + "[1268]\tvalid_0's rmse: 0.025683\n", + "[1269]\tvalid_0's rmse: 0.0256841\n", + "[1270]\tvalid_0's rmse: 0.0256839\n", + "[1271]\tvalid_0's rmse: 0.0256837\n", + "[1272]\tvalid_0's rmse: 0.0256835\n", + "[1273]\tvalid_0's rmse: 0.0256819\n", + "[1274]\tvalid_0's rmse: 0.0256814\n", + "[1275]\tvalid_0's rmse: 0.0256859\n", + "[1276]\tvalid_0's rmse: 0.0256845\n", + "[1277]\tvalid_0's rmse: 0.0256854\n", + "[1278]\tvalid_0's rmse: 0.0256899\n", + "[1279]\tvalid_0's rmse: 0.0256912\n", + "[1280]\tvalid_0's rmse: 0.0256951\n", + "[1281]\tvalid_0's rmse: 0.0256952\n", + "[1282]\tvalid_0's rmse: 0.0256956\n", + "[1283]\tvalid_0's rmse: 0.0256958\n", + "[1284]\tvalid_0's rmse: 0.0256956\n", + "[1285]\tvalid_0's rmse: 0.025695\n", + "[1286]\tvalid_0's rmse: 0.0256955\n", + "[1287]\tvalid_0's rmse: 0.0256955\n", + "[1288]\tvalid_0's rmse: 0.0256966\n", + "[1289]\tvalid_0's rmse: 0.0256969\n", + "[1290]\tvalid_0's rmse: 0.0256961\n", + "[1291]\tvalid_0's rmse: 0.0256955\n", + "[1292]\tvalid_0's rmse: 0.025695\n", + "[1293]\tvalid_0's rmse: 0.0256959\n", + "[1294]\tvalid_0's rmse: 0.0256953\n", + "[1295]\tvalid_0's rmse: 0.0256943\n", + "[1296]\tvalid_0's rmse: 0.0256935\n", + "[1297]\tvalid_0's rmse: 0.0256928\n", + "[1298]\tvalid_0's rmse: 0.0256922\n", + "[1299]\tvalid_0's rmse: 0.0256921\n", + "[1300]\tvalid_0's rmse: 0.0256929\n", + "[1301]\tvalid_0's rmse: 0.0256929\n", + "[1302]\tvalid_0's rmse: 0.0256922\n", + "[1303]\tvalid_0's rmse: 0.0256922\n", + "[1304]\tvalid_0's rmse: 0.0256903\n", + "[1305]\tvalid_0's rmse: 0.0256902\n", + "[1306]\tvalid_0's rmse: 0.025689\n", + "[1307]\tvalid_0's rmse: 0.0256867\n", + "[1308]\tvalid_0's rmse: 0.025687\n", + "[1309]\tvalid_0's rmse: 0.0256871\n", + "[1310]\tvalid_0's rmse: 0.0256871\n", + "[1311]\tvalid_0's rmse: 0.0256937\n", + "[1312]\tvalid_0's rmse: 0.0256927\n", + "[1313]\tvalid_0's rmse: 0.0256883\n", + "[1314]\tvalid_0's rmse: 0.0256881\n", + "[1315]\tvalid_0's rmse: 0.0256876\n", + "[1316]\tvalid_0's rmse: 0.0256871\n", + "[1317]\tvalid_0's rmse: 0.025685\n", + "[1318]\tvalid_0's rmse: 0.0256843\n", + "[1319]\tvalid_0's rmse: 0.0256852\n", + "[1320]\tvalid_0's rmse: 0.0256852\n", + "[1321]\tvalid_0's rmse: 0.0256852\n", + "[1322]\tvalid_0's rmse: 0.0256842\n", + "[1323]\tvalid_0's rmse: 0.0256825\n", + "[1324]\tvalid_0's rmse: 0.0256824\n", + "[1325]\tvalid_0's rmse: 0.0256792\n", + "[1326]\tvalid_0's rmse: 0.0256781\n", + "[1327]\tvalid_0's rmse: 0.0256776\n", + "[1328]\tvalid_0's rmse: 0.0256776\n", + "[1329]\tvalid_0's rmse: 0.0256782\n", + "[1330]\tvalid_0's rmse: 0.0256781\n", + "[1331]\tvalid_0's rmse: 0.0256777\n", + "[1332]\tvalid_0's rmse: 0.0256777\n", + "[1333]\tvalid_0's rmse: 0.0256772\n", + "[1334]\tvalid_0's rmse: 0.025677\n", + "[1335]\tvalid_0's rmse: 0.0256771\n", + "[1336]\tvalid_0's rmse: 0.0256768\n", + "[1337]\tvalid_0's rmse: 0.0256775\n", + "[1338]\tvalid_0's rmse: 0.0256776\n", + "[1339]\tvalid_0's rmse: 0.0256774\n", + "[1340]\tvalid_0's rmse: 0.0256753\n", + "[1341]\tvalid_0's rmse: 0.0256751\n", + "[1342]\tvalid_0's rmse: 0.0256747\n", + "[1343]\tvalid_0's rmse: 0.0256749\n", + "[1344]\tvalid_0's rmse: 0.0256746\n", + "[1345]\tvalid_0's rmse: 0.0256722\n", + "[1346]\tvalid_0's rmse: 0.0256697\n", + "[1347]\tvalid_0's rmse: 0.0256704\n", + "[1348]\tvalid_0's rmse: 0.0256681\n", + "[1349]\tvalid_0's rmse: 0.025668\n", + "[1350]\tvalid_0's rmse: 0.0256667\n", + "[1351]\tvalid_0's rmse: 0.0256684\n", + "[1352]\tvalid_0's rmse: 0.0256685\n", + "[1353]\tvalid_0's rmse: 0.0256673\n", + "[1354]\tvalid_0's rmse: 0.0256673\n", + "[1355]\tvalid_0's rmse: 0.025667\n", + "[1356]\tvalid_0's rmse: 0.0256675\n", + "[1357]\tvalid_0's rmse: 0.0256686\n", + "[1358]\tvalid_0's rmse: 0.0256681\n", + "[1359]\tvalid_0's rmse: 0.0256681\n", + "[1360]\tvalid_0's rmse: 0.0256682\n", + "[1361]\tvalid_0's rmse: 0.025668\n", + "[1362]\tvalid_0's rmse: 0.0256671\n", + "[1363]\tvalid_0's rmse: 0.0256675\n", + "[1364]\tvalid_0's rmse: 0.0256638\n", + "[1365]\tvalid_0's rmse: 0.0256638\n", + "[1366]\tvalid_0's rmse: 0.0256526\n", + "[1367]\tvalid_0's rmse: 0.0256534\n", + "[1368]\tvalid_0's rmse: 0.0256534\n", + "[1369]\tvalid_0's rmse: 0.025653\n", + "[1370]\tvalid_0's rmse: 0.0256528\n", + "[1371]\tvalid_0's rmse: 0.0256532\n", + "[1372]\tvalid_0's rmse: 0.025647\n", + "[1373]\tvalid_0's rmse: 0.0256454\n", + "[1374]\tvalid_0's rmse: 0.0256457\n", + "[1375]\tvalid_0's rmse: 0.0256426\n", + "[1376]\tvalid_0's rmse: 0.0256425\n", + "[1377]\tvalid_0's rmse: 0.0256441\n", + "[1378]\tvalid_0's rmse: 0.0256431\n", + "[1379]\tvalid_0's rmse: 0.0256452\n", + "[1380]\tvalid_0's rmse: 0.0256455\n", + "[1381]\tvalid_0's rmse: 0.0256454\n", + "[1382]\tvalid_0's rmse: 0.0256441\n", + "[1383]\tvalid_0's rmse: 0.0256446\n", + "[1384]\tvalid_0's rmse: 0.0256443\n", + "[1385]\tvalid_0's rmse: 0.0256444\n", + "[1386]\tvalid_0's rmse: 0.0256445\n", + "[1387]\tvalid_0's rmse: 0.0256436\n", + "[1388]\tvalid_0's rmse: 0.0256418\n", + "[1389]\tvalid_0's rmse: 0.0256422\n", + "[1390]\tvalid_0's rmse: 0.0256363\n", + "[1391]\tvalid_0's rmse: 0.0256359\n", + "[1392]\tvalid_0's rmse: 0.0256348\n", + "[1393]\tvalid_0's rmse: 0.0256345\n", + "[1394]\tvalid_0's rmse: 0.0256347\n", + "[1395]\tvalid_0's rmse: 0.025635\n", + "[1396]\tvalid_0's rmse: 0.0256333\n", + "[1397]\tvalid_0's rmse: 0.025633\n", + "[1398]\tvalid_0's rmse: 0.025633\n", + "[1399]\tvalid_0's rmse: 0.0256312\n", + "[1400]\tvalid_0's rmse: 0.025631\n", + "[1401]\tvalid_0's rmse: 0.025631\n", + "[1402]\tvalid_0's rmse: 0.0256313\n", + "[1403]\tvalid_0's rmse: 0.025627\n", + "[1404]\tvalid_0's rmse: 0.0256275\n", + "[1405]\tvalid_0's rmse: 0.0256277\n", + "[1406]\tvalid_0's rmse: 0.0256274\n", + "[1407]\tvalid_0's rmse: 0.0256277\n", + "[1408]\tvalid_0's rmse: 0.0256266\n", + "[1409]\tvalid_0's rmse: 0.025626\n", + "[1410]\tvalid_0's rmse: 0.0256258\n", + "[1411]\tvalid_0's rmse: 0.0256246\n", + "[1412]\tvalid_0's rmse: 0.0256245\n", + "[1413]\tvalid_0's rmse: 0.0256243\n", + "[1414]\tvalid_0's rmse: 0.0256237\n", + "[1415]\tvalid_0's rmse: 0.0256244\n", + "[1416]\tvalid_0's rmse: 0.0256238\n", + "[1417]\tvalid_0's rmse: 0.0256171\n", + "[1418]\tvalid_0's rmse: 0.0256115\n", + "[1419]\tvalid_0's rmse: 0.0256106\n", + "[1420]\tvalid_0's rmse: 0.0256105\n", + "[1421]\tvalid_0's rmse: 0.02561\n", + "[1422]\tvalid_0's rmse: 0.0256113\n", + "[1423]\tvalid_0's rmse: 0.0256111\n", + "[1424]\tvalid_0's rmse: 0.025611\n", + "[1425]\tvalid_0's rmse: 0.0256113\n", + "[1426]\tvalid_0's rmse: 0.0256108\n", + "[1427]\tvalid_0's rmse: 0.0256105\n", + "[1428]\tvalid_0's rmse: 0.0256095\n", + "[1429]\tvalid_0's rmse: 0.0256065\n", + "[1430]\tvalid_0's rmse: 0.0256062\n", + "[1431]\tvalid_0's rmse: 0.025607\n", + "[1432]\tvalid_0's rmse: 0.0256074\n", + "[1433]\tvalid_0's rmse: 0.025607\n", + "[1434]\tvalid_0's rmse: 0.0256081\n", + "[1435]\tvalid_0's rmse: 0.0256045\n", + "[1436]\tvalid_0's rmse: 0.0256057\n", + "[1437]\tvalid_0's rmse: 0.0256067\n", + "[1438]\tvalid_0's rmse: 0.0256063\n", + "[1439]\tvalid_0's rmse: 0.0256066\n", + "[1440]\tvalid_0's rmse: 0.0256061\n", + "[1441]\tvalid_0's rmse: 0.025605\n", + "[1442]\tvalid_0's rmse: 0.0256045\n", + "[1443]\tvalid_0's rmse: 0.0256032\n", + "[1444]\tvalid_0's rmse: 0.0256063\n", + "[1445]\tvalid_0's rmse: 0.0256076\n", + "[1446]\tvalid_0's rmse: 0.025608\n", + "[1447]\tvalid_0's rmse: 0.0256077\n", + "[1448]\tvalid_0's rmse: 0.0256093\n", + "[1449]\tvalid_0's rmse: 0.0256077\n", + "[1450]\tvalid_0's rmse: 0.0256074\n", + "[1451]\tvalid_0's rmse: 0.0256078\n", + "[1452]\tvalid_0's rmse: 0.025608\n", + "[1453]\tvalid_0's rmse: 0.0256081\n", + "[1454]\tvalid_0's rmse: 0.0256081\n", + "[1455]\tvalid_0's rmse: 0.0256079\n", + "[1456]\tvalid_0's rmse: 0.0256087\n", + "[1457]\tvalid_0's rmse: 0.0256062\n", + "[1458]\tvalid_0's rmse: 0.025602\n", + "[1459]\tvalid_0's rmse: 0.0256021\n", + "[1460]\tvalid_0's rmse: 0.0256041\n", + "[1461]\tvalid_0's rmse: 0.0256042\n", + "[1462]\tvalid_0's rmse: 0.025605\n", + "[1463]\tvalid_0's rmse: 0.0256056\n", + "[1464]\tvalid_0's rmse: 0.0256053\n", + "[1465]\tvalid_0's rmse: 0.0256077\n", + "[1466]\tvalid_0's rmse: 0.0256076\n", + "[1467]\tvalid_0's rmse: 0.0256083\n", + "[1468]\tvalid_0's rmse: 0.0256082\n", + "[1469]\tvalid_0's rmse: 0.0256074\n", + "[1470]\tvalid_0's rmse: 0.0256074\n", + "[1471]\tvalid_0's rmse: 0.025608\n", + "[1472]\tvalid_0's rmse: 0.0256081\n", + "[1473]\tvalid_0's rmse: 0.0256084\n", + "[1474]\tvalid_0's rmse: 0.0256081\n", + "[1475]\tvalid_0's rmse: 0.0256084\n", + "[1476]\tvalid_0's rmse: 0.0256083\n", + "[1477]\tvalid_0's rmse: 0.0256086\n", + "[1478]\tvalid_0's rmse: 0.0256084\n", + "[1479]\tvalid_0's rmse: 0.025608\n", + "[1480]\tvalid_0's rmse: 0.02561\n", + "[1481]\tvalid_0's rmse: 0.0256062\n", + "[1482]\tvalid_0's rmse: 0.0256062\n", + "[1483]\tvalid_0's rmse: 0.0256062\n", + "[1484]\tvalid_0's rmse: 0.0256056\n", + "[1485]\tvalid_0's rmse: 0.0256048\n", + "[1486]\tvalid_0's rmse: 0.0256054\n", + "[1487]\tvalid_0's rmse: 0.025605\n", + "[1488]\tvalid_0's rmse: 0.0256026\n", + "[1489]\tvalid_0's rmse: 0.0255999\n", + "[1490]\tvalid_0's rmse: 0.0255993\n", + "[1491]\tvalid_0's rmse: 0.0255995\n", + "[1492]\tvalid_0's rmse: 0.0256009\n", + "[1493]\tvalid_0's rmse: 0.0256006\n", + "[1494]\tvalid_0's rmse: 0.0256027\n", + "[1495]\tvalid_0's rmse: 0.0256021\n", + "[1496]\tvalid_0's rmse: 0.0256017\n", + "[1497]\tvalid_0's rmse: 0.0256016\n", + "[1498]\tvalid_0's rmse: 0.0256018\n", + "[1499]\tvalid_0's rmse: 0.0256011\n", + "[1500]\tvalid_0's rmse: 0.025602\n", + "[1501]\tvalid_0's rmse: 0.0256019\n", + "[1502]\tvalid_0's rmse: 0.025602\n", + "[1503]\tvalid_0's rmse: 0.0256027\n", + "[1504]\tvalid_0's rmse: 0.0255921\n", + "[1505]\tvalid_0's rmse: 0.0255919\n", + "[1506]\tvalid_0's rmse: 0.025592\n", + "[1507]\tvalid_0's rmse: 0.0255918\n", + "[1508]\tvalid_0's rmse: 0.0255914\n", + "[1509]\tvalid_0's rmse: 0.0255913\n", + "[1510]\tvalid_0's rmse: 0.0255907\n", + "[1511]\tvalid_0's rmse: 0.0255905\n", + "[1512]\tvalid_0's rmse: 0.0255883\n", + "[1513]\tvalid_0's rmse: 0.0255877\n", + "[1514]\tvalid_0's rmse: 0.025587\n", + "[1515]\tvalid_0's rmse: 0.0255873\n", + "[1516]\tvalid_0's rmse: 0.025587\n", + "[1517]\tvalid_0's rmse: 0.0255872\n", + "[1518]\tvalid_0's rmse: 0.0255876\n", + "[1519]\tvalid_0's rmse: 0.0255883\n", + "[1520]\tvalid_0's rmse: 0.0255884\n", + "[1521]\tvalid_0's rmse: 0.0255852\n", + "[1522]\tvalid_0's rmse: 0.0255853\n", + "[1523]\tvalid_0's rmse: 0.0255852\n", + "[1524]\tvalid_0's rmse: 0.0255875\n", + "[1525]\tvalid_0's rmse: 0.025588\n", + "[1526]\tvalid_0's rmse: 0.0255894\n", + "[1527]\tvalid_0's rmse: 0.0255891\n", + "[1528]\tvalid_0's rmse: 0.0255891\n", + "[1529]\tvalid_0's rmse: 0.0255892\n", + "[1530]\tvalid_0's rmse: 0.0255908\n", + "[1531]\tvalid_0's rmse: 0.0255902\n", + "[1532]\tvalid_0's rmse: 0.0255903\n", + "[1533]\tvalid_0's rmse: 0.0255905\n", + "[1534]\tvalid_0's rmse: 0.0255906\n", + "[1535]\tvalid_0's rmse: 0.0255913\n", + "[1536]\tvalid_0's rmse: 0.0255906\n", + "[1537]\tvalid_0's rmse: 0.0255919\n", + "[1538]\tvalid_0's rmse: 0.0255919\n", + "[1539]\tvalid_0's rmse: 0.0255936\n", + "[1540]\tvalid_0's rmse: 0.025594\n", + "[1541]\tvalid_0's rmse: 0.0255927\n", + "[1542]\tvalid_0's rmse: 0.0255924\n", + "[1543]\tvalid_0's rmse: 0.0255929\n", + "[1544]\tvalid_0's rmse: 0.0255937\n", + "[1545]\tvalid_0's rmse: 0.0255927\n", + "[1546]\tvalid_0's rmse: 0.025592\n", + "[1547]\tvalid_0's rmse: 0.0255914\n", + "[1548]\tvalid_0's rmse: 0.0255914\n", + "[1549]\tvalid_0's rmse: 0.0255913\n", + "[1550]\tvalid_0's rmse: 0.0255909\n", + "[1551]\tvalid_0's rmse: 0.0255915\n", + "[1552]\tvalid_0's rmse: 0.0255916\n", + "[1553]\tvalid_0's rmse: 0.0255916\n", + "[1554]\tvalid_0's rmse: 0.0255915\n", + "[1555]\tvalid_0's rmse: 0.0255921\n", + "[1556]\tvalid_0's rmse: 0.0255909\n", + "[1557]\tvalid_0's rmse: 0.0255908\n", + "[1558]\tvalid_0's rmse: 0.0255916\n", + "[1559]\tvalid_0's rmse: 0.0255904\n", + "[1560]\tvalid_0's rmse: 0.0255898\n", + "[1561]\tvalid_0's rmse: 0.0255908\n", + "[1562]\tvalid_0's rmse: 0.0255909\n", + "[1563]\tvalid_0's rmse: 0.0255911\n", + "[1564]\tvalid_0's rmse: 0.0255908\n", + "[1565]\tvalid_0's rmse: 0.0255928\n", + "[1566]\tvalid_0's rmse: 0.0255909\n", + "[1567]\tvalid_0's rmse: 0.0255908\n", + "[1568]\tvalid_0's rmse: 0.0255925\n", + "[1569]\tvalid_0's rmse: 0.0255903\n", + "[1570]\tvalid_0's rmse: 0.0255904\n", + "[1571]\tvalid_0's rmse: 0.0255902\n", + "[1572]\tvalid_0's rmse: 0.0255895\n", + "[1573]\tvalid_0's rmse: 0.0255941\n", + "[1574]\tvalid_0's rmse: 0.025596\n", + "[1575]\tvalid_0's rmse: 0.0255966\n", + "[1576]\tvalid_0's rmse: 0.0255966\n", + "[1577]\tvalid_0's rmse: 0.0255965\n", + "[1578]\tvalid_0's rmse: 0.0255957\n", + "[1579]\tvalid_0's rmse: 0.0255949\n", + "[1580]\tvalid_0's rmse: 0.0255931\n", + "[1581]\tvalid_0's rmse: 0.0255936\n", + "[1582]\tvalid_0's rmse: 0.0255936\n", + "[1583]\tvalid_0's rmse: 0.0255941\n", + "[1584]\tvalid_0's rmse: 0.0255942\n", + "[1585]\tvalid_0's rmse: 0.0255976\n", + "[1586]\tvalid_0's rmse: 0.0255974\n", + "[1587]\tvalid_0's rmse: 0.0255956\n", + "[1588]\tvalid_0's rmse: 0.025595\n", + "[1589]\tvalid_0's rmse: 0.0255943\n", + "[1590]\tvalid_0's rmse: 0.0255946\n", + "[1591]\tvalid_0's rmse: 0.0255945\n", + "[1592]\tvalid_0's rmse: 0.0255938\n", + "[1593]\tvalid_0's rmse: 0.0255907\n", + "[1594]\tvalid_0's rmse: 0.0255832\n", + "[1595]\tvalid_0's rmse: 0.0255833\n", + "[1596]\tvalid_0's rmse: 0.0255824\n", + "[1597]\tvalid_0's rmse: 0.025583\n", + "[1598]\tvalid_0's rmse: 0.0255812\n", + "[1599]\tvalid_0's rmse: 0.0255811\n", + "[1600]\tvalid_0's rmse: 0.0255808\n", + "[1601]\tvalid_0's rmse: 0.0255761\n", + "[1602]\tvalid_0's rmse: 0.0255687\n", + "[1603]\tvalid_0's rmse: 0.0255698\n", + "[1604]\tvalid_0's rmse: 0.0255697\n", + "[1605]\tvalid_0's rmse: 0.0255691\n", + "[1606]\tvalid_0's rmse: 0.0255697\n", + "[1607]\tvalid_0's rmse: 0.0255554\n", + "[1608]\tvalid_0's rmse: 0.0255555\n", + "[1609]\tvalid_0's rmse: 0.0255572\n", + "[1610]\tvalid_0's rmse: 0.0255572\n", + "[1611]\tvalid_0's rmse: 0.0255571\n", + "[1612]\tvalid_0's rmse: 0.0255571\n", + "[1613]\tvalid_0's rmse: 0.0255573\n", + "[1614]\tvalid_0's rmse: 0.0255553\n", + "[1615]\tvalid_0's rmse: 0.0255563\n", + "[1616]\tvalid_0's rmse: 0.0255559\n", + "[1617]\tvalid_0's rmse: 0.0255553\n", + "[1618]\tvalid_0's rmse: 0.0255544\n", + "[1619]\tvalid_0's rmse: 0.0255544\n", + "[1620]\tvalid_0's rmse: 0.0255537\n", + "[1621]\tvalid_0's rmse: 0.0255486\n", + "[1622]\tvalid_0's rmse: 0.0255496\n", + "[1623]\tvalid_0's rmse: 0.0255495\n", + "[1624]\tvalid_0's rmse: 0.0255509\n", + "[1625]\tvalid_0's rmse: 0.0255513\n", + "[1626]\tvalid_0's rmse: 0.0255499\n", + "[1627]\tvalid_0's rmse: 0.0255497\n", + "[1628]\tvalid_0's rmse: 0.0255489\n", + "[1629]\tvalid_0's rmse: 0.0255457\n", + "[1630]\tvalid_0's rmse: 0.0255384\n", + "[1631]\tvalid_0's rmse: 0.0255383\n", + "[1632]\tvalid_0's rmse: 0.0255377\n", + "[1633]\tvalid_0's rmse: 0.025538\n", + "[1634]\tvalid_0's rmse: 0.0255383\n", + "[1635]\tvalid_0's rmse: 0.0255381\n", + "[1636]\tvalid_0's rmse: 0.0255379\n", + "[1637]\tvalid_0's rmse: 0.0255386\n", + "[1638]\tvalid_0's rmse: 0.0255391\n", + "[1639]\tvalid_0's rmse: 0.0255386\n", + "[1640]\tvalid_0's rmse: 0.0255322\n", + "[1641]\tvalid_0's rmse: 0.0255328\n", + "[1642]\tvalid_0's rmse: 0.0255273\n", + "[1643]\tvalid_0's rmse: 0.0255264\n", + "[1644]\tvalid_0's rmse: 0.0255262\n", + "[1645]\tvalid_0's rmse: 0.0255239\n", + "[1646]\tvalid_0's rmse: 0.0255234\n", + "[1647]\tvalid_0's rmse: 0.0255245\n", + "[1648]\tvalid_0's rmse: 0.0255188\n", + "[1649]\tvalid_0's rmse: 0.0255174\n", + "[1650]\tvalid_0's rmse: 0.0255231\n", + "[1651]\tvalid_0's rmse: 0.0255231\n", + "[1652]\tvalid_0's rmse: 0.0255237\n", + "[1653]\tvalid_0's rmse: 0.0255217\n", + "[1654]\tvalid_0's rmse: 0.025521\n", + "[1655]\tvalid_0's rmse: 0.0255201\n", + "[1656]\tvalid_0's rmse: 0.02552\n", + "[1657]\tvalid_0's rmse: 0.0255204\n", + "[1658]\tvalid_0's rmse: 0.0255194\n", + "[1659]\tvalid_0's rmse: 0.0255194\n", + "[1660]\tvalid_0's rmse: 0.0255194\n", + "[1661]\tvalid_0's rmse: 0.0255189\n", + "[1662]\tvalid_0's rmse: 0.0255192\n", + "[1663]\tvalid_0's rmse: 0.0255183\n", + "[1664]\tvalid_0's rmse: 0.0255186\n", + "[1665]\tvalid_0's rmse: 0.0255179\n", + "[1666]\tvalid_0's rmse: 0.0255182\n", + "[1667]\tvalid_0's rmse: 0.0255178\n", + "[1668]\tvalid_0's rmse: 0.0255175\n", + "[1669]\tvalid_0's rmse: 0.0255181\n", + "[1670]\tvalid_0's rmse: 0.0255179\n", + "[1671]\tvalid_0's rmse: 0.025517\n", + "[1672]\tvalid_0's rmse: 0.0255169\n", + "[1673]\tvalid_0's rmse: 0.0255012\n", + "[1674]\tvalid_0's rmse: 0.0255018\n", + "[1675]\tvalid_0's rmse: 0.0255017\n", + "[1676]\tvalid_0's rmse: 0.0255032\n", + "[1677]\tvalid_0's rmse: 0.0255028\n", + "[1678]\tvalid_0's rmse: 0.0255035\n", + "[1679]\tvalid_0's rmse: 0.0255038\n", + "[1680]\tvalid_0's rmse: 0.0255043\n", + "[1681]\tvalid_0's rmse: 0.0255043\n", + "[1682]\tvalid_0's rmse: 0.0255052\n", + "[1683]\tvalid_0's rmse: 0.0255043\n", + "[1684]\tvalid_0's rmse: 0.0255045\n", + "[1685]\tvalid_0's rmse: 0.0255044\n", + "[1686]\tvalid_0's rmse: 0.0255039\n", + "[1687]\tvalid_0's rmse: 0.0255027\n", + "[1688]\tvalid_0's rmse: 0.0255026\n", + "[1689]\tvalid_0's rmse: 0.0255028\n", + "[1690]\tvalid_0's rmse: 0.0255036\n", + "[1691]\tvalid_0's rmse: 0.0255024\n", + "[1692]\tvalid_0's rmse: 0.0255021\n", + "[1693]\tvalid_0's rmse: 0.0255018\n", + "[1694]\tvalid_0's rmse: 0.0255018\n", + "[1695]\tvalid_0's rmse: 0.0255012\n", + "[1696]\tvalid_0's rmse: 0.0255006\n", + "[1697]\tvalid_0's rmse: 0.0255006\n", + "[1698]\tvalid_0's rmse: 0.0255005\n", + "[1699]\tvalid_0's rmse: 0.0254974\n", + "[1700]\tvalid_0's rmse: 0.0254964\n", + "[1701]\tvalid_0's rmse: 0.0254971\n", + "[1702]\tvalid_0's rmse: 0.0254974\n", + "[1703]\tvalid_0's rmse: 0.0254974\n", + "[1704]\tvalid_0's rmse: 0.0254945\n", + "[1705]\tvalid_0's rmse: 0.0254948\n", + "[1706]\tvalid_0's rmse: 0.0254947\n", + "[1707]\tvalid_0's rmse: 0.025495\n", + "[1708]\tvalid_0's rmse: 0.0254952\n", + "[1709]\tvalid_0's rmse: 0.025495\n", + "[1710]\tvalid_0's rmse: 0.0254946\n", + "[1711]\tvalid_0's rmse: 0.0254946\n", + "[1712]\tvalid_0's rmse: 0.0254923\n", + "[1713]\tvalid_0's rmse: 0.0254919\n", + "[1714]\tvalid_0's rmse: 0.0254932\n", + "[1715]\tvalid_0's rmse: 0.025493\n", + "[1716]\tvalid_0's rmse: 0.0254935\n", + "[1717]\tvalid_0's rmse: 0.025492\n", + "[1718]\tvalid_0's rmse: 0.0254914\n", + "[1719]\tvalid_0's rmse: 0.0254918\n", + "[1720]\tvalid_0's rmse: 0.0254917\n", + "[1721]\tvalid_0's rmse: 0.0254922\n", + "[1722]\tvalid_0's rmse: 0.0254925\n", + "[1723]\tvalid_0's rmse: 0.0254928\n", + "[1724]\tvalid_0's rmse: 0.0254932\n", + "[1725]\tvalid_0's rmse: 0.0254931\n", + "[1726]\tvalid_0's rmse: 0.0254933\n", + "[1727]\tvalid_0's rmse: 0.0254931\n", + "[1728]\tvalid_0's rmse: 0.0254962\n", + "[1729]\tvalid_0's rmse: 0.0254961\n", + "[1730]\tvalid_0's rmse: 0.0254956\n", + "[1731]\tvalid_0's rmse: 0.025495\n", + "[1732]\tvalid_0's rmse: 0.0254947\n", + "[1733]\tvalid_0's rmse: 0.0254938\n", + "[1734]\tvalid_0's rmse: 0.0254942\n", + "[1735]\tvalid_0's rmse: 0.0254946\n", + "[1736]\tvalid_0's rmse: 0.0254936\n", + "[1737]\tvalid_0's rmse: 0.0254922\n", + "[1738]\tvalid_0's rmse: 0.0254917\n", + "[1739]\tvalid_0's rmse: 0.025492\n", + "[1740]\tvalid_0's rmse: 0.025492\n", + "[1741]\tvalid_0's rmse: 0.0254923\n", + "[1742]\tvalid_0's rmse: 0.0254932\n", + "[1743]\tvalid_0's rmse: 0.0254933\n", + "[1744]\tvalid_0's rmse: 0.0254935\n", + "[1745]\tvalid_0's rmse: 0.0254933\n", + "[1746]\tvalid_0's rmse: 0.0254937\n", + "[1747]\tvalid_0's rmse: 0.0254928\n", + "[1748]\tvalid_0's rmse: 0.0254926\n", + "[1749]\tvalid_0's rmse: 0.0254945\n", + "[1750]\tvalid_0's rmse: 0.0254948\n", + "[1751]\tvalid_0's rmse: 0.025495\n", + "[1752]\tvalid_0's rmse: 0.025487\n", + "[1753]\tvalid_0's rmse: 0.0254868\n", + "[1754]\tvalid_0's rmse: 0.025486\n", + "[1755]\tvalid_0's rmse: 0.0254842\n", + "[1756]\tvalid_0's rmse: 0.0254837\n", + "[1757]\tvalid_0's rmse: 0.025483\n", + "[1758]\tvalid_0's rmse: 0.0254827\n", + "[1759]\tvalid_0's rmse: 0.0254805\n", + "[1760]\tvalid_0's rmse: 0.02548\n", + "[1761]\tvalid_0's rmse: 0.0254799\n", + "[1762]\tvalid_0's rmse: 0.0254799\n", + "[1763]\tvalid_0's rmse: 0.0254794\n", + "[1764]\tvalid_0's rmse: 0.0254783\n", + "[1765]\tvalid_0's rmse: 0.0254772\n", + "[1766]\tvalid_0's rmse: 0.0254773\n", + "[1767]\tvalid_0's rmse: 0.0254773\n", + "[1768]\tvalid_0's rmse: 0.0254767\n", + "[1769]\tvalid_0's rmse: 0.0254775\n", + "[1770]\tvalid_0's rmse: 0.0254774\n", + "[1771]\tvalid_0's rmse: 0.0254775\n", + "[1772]\tvalid_0's rmse: 0.0254769\n", + "[1773]\tvalid_0's rmse: 0.025477\n", + "[1774]\tvalid_0's rmse: 0.0254779\n", + "[1775]\tvalid_0's rmse: 0.025477\n", + "[1776]\tvalid_0's rmse: 0.0254767\n", + "[1777]\tvalid_0's rmse: 0.025474\n", + "[1778]\tvalid_0's rmse: 0.0254756\n", + "[1779]\tvalid_0's rmse: 0.0254761\n", + "[1780]\tvalid_0's rmse: 0.025476\n", + "[1781]\tvalid_0's rmse: 0.0254763\n", + "[1782]\tvalid_0's rmse: 0.0254763\n", + "[1783]\tvalid_0's rmse: 0.0254762\n", + "[1784]\tvalid_0's rmse: 0.0254749\n", + "[1785]\tvalid_0's rmse: 0.025473\n", + "[1786]\tvalid_0's rmse: 0.0254723\n", + "[1787]\tvalid_0's rmse: 0.0254712\n", + "[1788]\tvalid_0's rmse: 0.0254711\n", + "[1789]\tvalid_0's rmse: 0.0254718\n", + "[1790]\tvalid_0's rmse: 0.0254716\n", + "[1791]\tvalid_0's rmse: 0.0254721\n", + "[1792]\tvalid_0's rmse: 0.0254709\n", + "[1793]\tvalid_0's rmse: 0.0254738\n", + "[1794]\tvalid_0's rmse: 0.0254739\n", + "[1795]\tvalid_0's rmse: 0.025474\n", + "[1796]\tvalid_0's rmse: 0.0254719\n", + "[1797]\tvalid_0's rmse: 0.0254719\n", + "[1798]\tvalid_0's rmse: 0.0254734\n", + "[1799]\tvalid_0's rmse: 0.0254738\n", + "[1800]\tvalid_0's rmse: 0.0254739\n", + "[1801]\tvalid_0's rmse: 0.0254722\n", + "[1802]\tvalid_0's rmse: 0.0254725\n", + "[1803]\tvalid_0's rmse: 0.0254716\n", + "[1804]\tvalid_0's rmse: 0.0254717\n", + "[1805]\tvalid_0's rmse: 0.0254718\n", + "[1806]\tvalid_0's rmse: 0.025471\n", + "[1807]\tvalid_0's rmse: 0.0254714\n", + "[1808]\tvalid_0's rmse: 0.0254714\n", + "[1809]\tvalid_0's rmse: 0.0254713\n", + "[1810]\tvalid_0's rmse: 0.0254711\n", + "[1811]\tvalid_0's rmse: 0.0254716\n", + "[1812]\tvalid_0's rmse: 0.025472\n", + "[1813]\tvalid_0's rmse: 0.0254719\n", + "[1814]\tvalid_0's rmse: 0.0254712\n", + "[1815]\tvalid_0's rmse: 0.0254712\n", + "[1816]\tvalid_0's rmse: 0.0254708\n", + "[1817]\tvalid_0's rmse: 0.0254711\n", + "[1818]\tvalid_0's rmse: 0.0254701\n", + "[1819]\tvalid_0's rmse: 0.0254683\n", + "[1820]\tvalid_0's rmse: 0.0254685\n", + "[1821]\tvalid_0's rmse: 0.0254685\n", + "[1822]\tvalid_0's rmse: 0.0254687\n", + "[1823]\tvalid_0's rmse: 0.0254688\n", + "[1824]\tvalid_0's rmse: 0.0254686\n", + "[1825]\tvalid_0's rmse: 0.0254686\n", + "[1826]\tvalid_0's rmse: 0.0254685\n", + "[1827]\tvalid_0's rmse: 0.0254681\n", + "[1828]\tvalid_0's rmse: 0.0254681\n", + "[1829]\tvalid_0's rmse: 0.025468\n", + "[1830]\tvalid_0's rmse: 0.0254683\n", + "[1831]\tvalid_0's rmse: 0.025464\n", + "[1832]\tvalid_0's rmse: 0.0254641\n", + "[1833]\tvalid_0's rmse: 0.0254636\n", + "[1834]\tvalid_0's rmse: 0.0254633\n", + "[1835]\tvalid_0's rmse: 0.0254625\n", + "[1836]\tvalid_0's rmse: 0.0254622\n", + "[1837]\tvalid_0's rmse: 0.0254617\n", + "[1838]\tvalid_0's rmse: 0.0254617\n", + "[1839]\tvalid_0's rmse: 0.0254609\n", + "[1840]\tvalid_0's rmse: 0.025452\n", + "[1841]\tvalid_0's rmse: 0.0254516\n", + "[1842]\tvalid_0's rmse: 0.0254517\n", + "[1843]\tvalid_0's rmse: 0.0254523\n", + "[1844]\tvalid_0's rmse: 0.0254516\n", + "[1845]\tvalid_0's rmse: 0.0254519\n", + "[1846]\tvalid_0's rmse: 0.0254519\n", + "[1847]\tvalid_0's rmse: 0.0254506\n", + "[1848]\tvalid_0's rmse: 0.0254508\n", + "[1849]\tvalid_0's rmse: 0.0254503\n", + "[1850]\tvalid_0's rmse: 0.0254484\n", + "[1851]\tvalid_0's rmse: 0.0254485\n", + "[1852]\tvalid_0's rmse: 0.0254486\n", + "[1853]\tvalid_0's rmse: 0.0254492\n", + "[1854]\tvalid_0's rmse: 0.0254493\n", + "[1855]\tvalid_0's rmse: 0.0254488\n", + "[1856]\tvalid_0's rmse: 0.0254492\n", + "[1857]\tvalid_0's rmse: 0.0254538\n", + "[1858]\tvalid_0's rmse: 0.0254541\n", + "[1859]\tvalid_0's rmse: 0.0254591\n", + "[1860]\tvalid_0's rmse: 0.0254593\n", + "[1861]\tvalid_0's rmse: 0.0254593\n", + "[1862]\tvalid_0's rmse: 0.0254589\n", + "[1863]\tvalid_0's rmse: 0.0254589\n", + "[1864]\tvalid_0's rmse: 0.0254596\n", + "[1865]\tvalid_0's rmse: 0.0254593\n", + "[1866]\tvalid_0's rmse: 0.02546\n", + "[1867]\tvalid_0's rmse: 0.0254596\n", + "[1868]\tvalid_0's rmse: 0.0254609\n", + "[1869]\tvalid_0's rmse: 0.0254586\n", + "[1870]\tvalid_0's rmse: 0.0254583\n", + "[1871]\tvalid_0's rmse: 0.0254584\n", + "[1872]\tvalid_0's rmse: 0.0254582\n", + "[1873]\tvalid_0's rmse: 0.025458\n", + "[1874]\tvalid_0's rmse: 0.0254559\n", + "[1875]\tvalid_0's rmse: 0.0254556\n", + "[1876]\tvalid_0's rmse: 0.0254552\n", + "[1877]\tvalid_0's rmse: 0.0254551\n", + "[1878]\tvalid_0's rmse: 0.0254557\n", + "[1879]\tvalid_0's rmse: 0.0254539\n", + "[1880]\tvalid_0's rmse: 0.0254533\n", + "[1881]\tvalid_0's rmse: 0.0254524\n", + "[1882]\tvalid_0's rmse: 0.0254525\n", + "[1883]\tvalid_0's rmse: 0.0254542\n", + "[1884]\tvalid_0's rmse: 0.0254548\n", + "[1885]\tvalid_0's rmse: 0.0254539\n", + "[1886]\tvalid_0's rmse: 0.0254536\n", + "[1887]\tvalid_0's rmse: 0.0254537\n", + "[1888]\tvalid_0's rmse: 0.0254532\n", + "[1889]\tvalid_0's rmse: 0.0254555\n", + "[1890]\tvalid_0's rmse: 0.0254548\n", + "[1891]\tvalid_0's rmse: 0.0254549\n", + "[1892]\tvalid_0's rmse: 0.0254548\n", + "[1893]\tvalid_0's rmse: 0.0254545\n", + "[1894]\tvalid_0's rmse: 0.0254543\n", + "[1895]\tvalid_0's rmse: 0.0254553\n", + "[1896]\tvalid_0's rmse: 0.0254551\n", + "[1897]\tvalid_0's rmse: 0.0254553\n", + "[1898]\tvalid_0's rmse: 0.0254557\n", + "[1899]\tvalid_0's rmse: 0.0254553\n", + "[1900]\tvalid_0's rmse: 0.0254554\n", + "[1901]\tvalid_0's rmse: 0.025455\n", + "[1902]\tvalid_0's rmse: 0.0254548\n", + "[1903]\tvalid_0's rmse: 0.0254559\n", + "[1904]\tvalid_0's rmse: 0.025455\n", + "[1905]\tvalid_0's rmse: 0.0254548\n", + "[1906]\tvalid_0's rmse: 0.0254548\n", + "[1907]\tvalid_0's rmse: 0.025454\n", + "[1908]\tvalid_0's rmse: 0.0254535\n", + "[1909]\tvalid_0's rmse: 0.0254534\n", + "[1910]\tvalid_0's rmse: 0.0254536\n", + "[1911]\tvalid_0's rmse: 0.0254536\n", + "[1912]\tvalid_0's rmse: 0.0254531\n", + "[1913]\tvalid_0's rmse: 0.0254532\n", + "[1914]\tvalid_0's rmse: 0.0254535\n", + "[1915]\tvalid_0's rmse: 0.0254525\n", + "[1916]\tvalid_0's rmse: 0.025452\n", + "[1917]\tvalid_0's rmse: 0.0254519\n", + "[1918]\tvalid_0's rmse: 0.0254518\n", + "[1919]\tvalid_0's rmse: 0.0254515\n", + "[1920]\tvalid_0's rmse: 0.0254513\n", + "[1921]\tvalid_0's rmse: 0.0254524\n", + "[1922]\tvalid_0's rmse: 0.0254529\n", + "[1923]\tvalid_0's rmse: 0.0254551\n", + "[1924]\tvalid_0's rmse: 0.0254534\n", + "[1925]\tvalid_0's rmse: 0.0254535\n", + "[1926]\tvalid_0's rmse: 0.0254536\n", + "[1927]\tvalid_0's rmse: 0.0254536\n", + "[1928]\tvalid_0's rmse: 0.0254538\n", + "[1929]\tvalid_0's rmse: 0.0254538\n", + "[1930]\tvalid_0's rmse: 0.0254529\n", + "[1931]\tvalid_0's rmse: 0.0254529\n", + "[1932]\tvalid_0's rmse: 0.0254527\n", + "[1933]\tvalid_0's rmse: 0.0254525\n", + "[1934]\tvalid_0's rmse: 0.0254524\n", + "[1935]\tvalid_0's rmse: 0.0254518\n", + "[1936]\tvalid_0's rmse: 0.0254518\n", + "[1937]\tvalid_0's rmse: 0.0254518\n", + "[1938]\tvalid_0's rmse: 0.0254512\n", + "[1939]\tvalid_0's rmse: 0.0254511\n", + "[1940]\tvalid_0's rmse: 0.0254517\n", + "[1941]\tvalid_0's rmse: 0.0254514\n", + "[1942]\tvalid_0's rmse: 0.0254517\n", + "[1943]\tvalid_0's rmse: 0.0254503\n", + "[1944]\tvalid_0's rmse: 0.0254474\n", + "[1945]\tvalid_0's rmse: 0.0254471\n", + "[1946]\tvalid_0's rmse: 0.0254472\n", + "[1947]\tvalid_0's rmse: 0.0254473\n", + "[1948]\tvalid_0's rmse: 0.0254469\n", + "[1949]\tvalid_0's rmse: 0.0254462\n", + "[1950]\tvalid_0's rmse: 0.0254464\n", + "[1951]\tvalid_0's rmse: 0.025446\n", + "[1952]\tvalid_0's rmse: 0.025446\n", + "[1953]\tvalid_0's rmse: 0.0254422\n", + "[1954]\tvalid_0's rmse: 0.0254356\n", + "[1955]\tvalid_0's rmse: 0.0254358\n", + "[1956]\tvalid_0's rmse: 0.0254357\n", + "[1957]\tvalid_0's rmse: 0.0254344\n", + "[1958]\tvalid_0's rmse: 0.0254348\n", + "[1959]\tvalid_0's rmse: 0.0254348\n", + "[1960]\tvalid_0's rmse: 0.0254347\n", + "[1961]\tvalid_0's rmse: 0.0254346\n", + "[1962]\tvalid_0's rmse: 0.0254346\n", + "[1963]\tvalid_0's rmse: 0.0254344\n", + "[1964]\tvalid_0's rmse: 0.0254341\n", + "[1965]\tvalid_0's rmse: 0.0254337\n", + "[1966]\tvalid_0's rmse: 0.0254337\n", + "[1967]\tvalid_0's rmse: 0.0254335\n", + "[1968]\tvalid_0's rmse: 0.0254336\n", + "[1969]\tvalid_0's rmse: 0.0254336\n", + "[1970]\tvalid_0's rmse: 0.0254333\n", + "[1971]\tvalid_0's rmse: 0.0254335\n", + "[1972]\tvalid_0's rmse: 0.0254333\n", + "[1973]\tvalid_0's rmse: 0.0254328\n", + "[1974]\tvalid_0's rmse: 0.0254329\n", + "[1975]\tvalid_0's rmse: 0.0254329\n", + "[1976]\tvalid_0's rmse: 0.0254334\n", + "[1977]\tvalid_0's rmse: 0.0254333\n", + "[1978]\tvalid_0's rmse: 0.0254336\n", + "[1979]\tvalid_0's rmse: 0.0254342\n", + "[1980]\tvalid_0's rmse: 0.0254343\n", + "[1981]\tvalid_0's rmse: 0.0254338\n", + "[1982]\tvalid_0's rmse: 0.0254341\n", + "[1983]\tvalid_0's rmse: 0.0254341\n", + "[1984]\tvalid_0's rmse: 0.0254343\n", + "[1985]\tvalid_0's rmse: 0.0254342\n", + "[1986]\tvalid_0's rmse: 0.0254341\n", + "[1987]\tvalid_0's rmse: 0.0254347\n", + "[1988]\tvalid_0's rmse: 0.025435\n", + "[1989]\tvalid_0's rmse: 0.0254349\n", + "[1990]\tvalid_0's rmse: 0.0254338\n", + "[1991]\tvalid_0's rmse: 0.0254339\n", + "[1992]\tvalid_0's rmse: 0.0254342\n", + "[1993]\tvalid_0's rmse: 0.0254341\n", + "[1994]\tvalid_0's rmse: 0.0254341\n", + "[1995]\tvalid_0's rmse: 0.0254339\n", + "[1996]\tvalid_0's rmse: 0.0254349\n", + "[1997]\tvalid_0's rmse: 0.025434\n", + "[1998]\tvalid_0's rmse: 0.0254327\n", + "[1999]\tvalid_0's rmse: 0.0254326\n", + "[2000]\tvalid_0's rmse: 0.025432\n", + "Did not meet early stopping. Best iteration is:\n", + "[2000]\tvalid_0's rmse: 0.025432\n" + ] + } + ], + "source": [ + "gbm = lgb.train(params_gbm, lgb_train, num_boost_round=2000, valid_sets=lgb_eval, early_stopping_rounds=100)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "y_pred = gbm.predict(X_test)\n", + "y_true = Y_test.values" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE: 3.7E-04\n", + "RMSE: 0.019\n", + "MAE: 0.013\n", + "MAPE: 2.64 %\n", + "R_2: 0.93\n" + ] + } + ], + "source": [ + "MSE = mean_squared_error(y_true, y_pred)\n", + "RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n", + "MAE = mean_absolute_error(y_true, y_pred)\n", + "MAPE = mean_absolute_percentage_error(y_true, y_pred)\n", + "R_2 = r2_score(y_true, y_pred)\n", + "print('MSE:', format(MSE, '.1E'))\n", + "print('RMSE:', round(RMSE, 3))\n", + "print('MAE:', round(MAE, 3))\n", + "print('MAPE:', round(MAPE*100, 2), '%')\n", + "print('R_2:', round(R_2, 3)) #R方为负就说明拟合效果比平均值差a" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 25, + "outputs": [], + "source": [ + "dtrain = xgb.DMatrix(X_train, Y_train)\n", + "dvalid = xgb.DMatrix(X_valid, Y_valid)\n", + "dtest = xgb.DMatrix(X_test, Y_test)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 102, + "outputs": [], + "source": [ + "from sklearn.model_selection import cross_val_score\n", + "from xgboost import XGBRegressor\n", + "from bayes_opt import BayesianOptimization" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 103, + "outputs": [], + "source": [ + "def xgb_cv(max_depth, learning_rate, n_estimators, min_child_weight, subsample, colsample_bytree, reg_alpha, gamma):\n", + " val = cross_val_score(estimator=XGBRegressor(max_depth=int(max_depth),\n", + " learning_rate=learning_rate,\n", + " n_estimators=int(n_estimators),\n", + " min_child_weight=min_child_weight,\n", + " subsample=max(min(subsample, 1), 0),\n", + " colsample_bytree=max(min(colsample_bytree, 1), 0),\n", + " reg_alpha=max(reg_alpha, 0), gamma=gamma, objective='reg:squarederror',\n", + " booster='gbtree',\n", + " seed=666), X=use_data[feature_cols], y=use_data.values[:1], scoring='r2',\n", + " cv=10).max()\n", + " return val" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 104, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "| iter | target | colsam... | gamma | learni... | max_depth | min_ch... | n_esti... | reg_alpha | subsample |\n", + "-------------------------------------------------------------------------------------------------------------------------\n" + ] + }, + { + "ename": "ValueError", + "evalue": "Found input variables with inconsistent numbers of samples: [3080, 1]", + "output_type": "error", + "traceback": [ + "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[1;31mValueError\u001B[0m Traceback (most recent call last)", + "\u001B[1;32m~\\AppData\\Local\\Temp\\ipykernel_17148\\1576227182.py\u001B[0m in \u001B[0;36m\u001B[1;34m\u001B[0m\n\u001B[0;32m 7\u001B[0m \u001B[1;34m'reg_alpha'\u001B[0m\u001B[1;33m:\u001B[0m \u001B[1;33m(\u001B[0m\u001B[1;36m0.001\u001B[0m\u001B[1;33m,\u001B[0m \u001B[1;36m10\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m,\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 8\u001B[0m 'gamma': (0.001, 10)})\n\u001B[1;32m----> 9\u001B[1;33m \u001B[0mxgb_bo\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mmaximize\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mn_iter\u001B[0m\u001B[1;33m=\u001B[0m\u001B[1;36m100\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0minit_points\u001B[0m\u001B[1;33m=\u001B[0m\u001B[1;36m10\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m", + "\u001B[1;32mD:\\miniconda3\\envs\\py37\\lib\\site-packages\\bayes_opt\\bayesian_optimization.py\u001B[0m in \u001B[0;36mmaximize\u001B[1;34m(self, init_points, n_iter, acquisition_function, acq, kappa, kappa_decay, kappa_decay_delay, xi, **gp_params)\u001B[0m\n\u001B[0;32m 309\u001B[0m \u001B[0mx_probe\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0msuggest\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mutil\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 310\u001B[0m \u001B[0miteration\u001B[0m \u001B[1;33m+=\u001B[0m \u001B[1;36m1\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 311\u001B[1;33m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mprobe\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mx_probe\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mlazy\u001B[0m\u001B[1;33m=\u001B[0m\u001B[1;32mFalse\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 312\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 313\u001B[0m \u001B[1;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_bounds_transformer\u001B[0m \u001B[1;32mand\u001B[0m \u001B[0miteration\u001B[0m \u001B[1;33m>\u001B[0m \u001B[1;36m0\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32mD:\\miniconda3\\envs\\py37\\lib\\site-packages\\bayes_opt\\bayesian_optimization.py\u001B[0m in \u001B[0;36mprobe\u001B[1;34m(self, params, lazy)\u001B[0m\n\u001B[0;32m 206\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_queue\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0madd\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mparams\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 207\u001B[0m \u001B[1;32melse\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 208\u001B[1;33m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_space\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mprobe\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mparams\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 209\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mdispatch\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mEvents\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mOPTIMIZATION_STEP\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 210\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32mD:\\miniconda3\\envs\\py37\\lib\\site-packages\\bayes_opt\\target_space.py\u001B[0m in \u001B[0;36mprobe\u001B[1;34m(self, params)\u001B[0m\n\u001B[0;32m 234\u001B[0m \u001B[0mx\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_as_array\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mparams\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 235\u001B[0m \u001B[0mparams\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mdict\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mzip\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_keys\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mx\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 236\u001B[1;33m \u001B[0mtarget\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mtarget_func\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m**\u001B[0m\u001B[0mparams\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 237\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 238\u001B[0m \u001B[1;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_constraint\u001B[0m \u001B[1;32mis\u001B[0m \u001B[1;32mNone\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32m~\\AppData\\Local\\Temp\\ipykernel_17148\\2288155185.py\u001B[0m in \u001B[0;36mxgb_cv\u001B[1;34m(max_depth, learning_rate, n_estimators, min_child_weight, subsample, colsample_bytree, reg_alpha, gamma)\u001B[0m\n\u001B[0;32m 9\u001B[0m \u001B[0mbooster\u001B[0m\u001B[1;33m=\u001B[0m\u001B[1;34m'gbtree'\u001B[0m\u001B[1;33m,\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 10\u001B[0m seed=666), X=use_data[feature_cols], y=use_data.values[:1], scoring='r2',\n\u001B[1;32m---> 11\u001B[1;33m cv=10).max()\n\u001B[0m\u001B[0;32m 12\u001B[0m \u001B[1;32mreturn\u001B[0m \u001B[0mval\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32mD:\\miniconda3\\envs\\py37\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\u001B[0m in \u001B[0;36mcross_val_score\u001B[1;34m(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)\u001B[0m\n\u001B[0;32m 518\u001B[0m \u001B[0mfit_params\u001B[0m\u001B[1;33m=\u001B[0m\u001B[0mfit_params\u001B[0m\u001B[1;33m,\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 519\u001B[0m \u001B[0mpre_dispatch\u001B[0m\u001B[1;33m=\u001B[0m\u001B[0mpre_dispatch\u001B[0m\u001B[1;33m,\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 520\u001B[1;33m \u001B[0merror_score\u001B[0m\u001B[1;33m=\u001B[0m\u001B[0merror_score\u001B[0m\u001B[1;33m,\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 521\u001B[0m )\n\u001B[0;32m 522\u001B[0m \u001B[1;32mreturn\u001B[0m \u001B[0mcv_results\u001B[0m\u001B[1;33m[\u001B[0m\u001B[1;34m\"test_score\"\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32mD:\\miniconda3\\envs\\py37\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\u001B[0m in \u001B[0;36mcross_validate\u001B[1;34m(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)\u001B[0m\n\u001B[0;32m 251\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 252\u001B[0m \"\"\"\n\u001B[1;32m--> 253\u001B[1;33m \u001B[0mX\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0my\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mgroups\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mindexable\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mX\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0my\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mgroups\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 254\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 255\u001B[0m \u001B[0mcv\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mcheck_cv\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mcv\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0my\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mclassifier\u001B[0m\u001B[1;33m=\u001B[0m\u001B[0mis_classifier\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mestimator\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32mD:\\miniconda3\\envs\\py37\\lib\\site-packages\\sklearn\\utils\\validation.py\u001B[0m in \u001B[0;36mindexable\u001B[1;34m(*iterables)\u001B[0m\n\u001B[0;32m 376\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 377\u001B[0m \u001B[0mresult\u001B[0m \u001B[1;33m=\u001B[0m \u001B[1;33m[\u001B[0m\u001B[0m_make_indexable\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mX\u001B[0m\u001B[1;33m)\u001B[0m \u001B[1;32mfor\u001B[0m \u001B[0mX\u001B[0m \u001B[1;32min\u001B[0m \u001B[0miterables\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 378\u001B[1;33m \u001B[0mcheck_consistent_length\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m*\u001B[0m\u001B[0mresult\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 379\u001B[0m \u001B[1;32mreturn\u001B[0m \u001B[0mresult\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 380\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32mD:\\miniconda3\\envs\\py37\\lib\\site-packages\\sklearn\\utils\\validation.py\u001B[0m in \u001B[0;36mcheck_consistent_length\u001B[1;34m(*arrays)\u001B[0m\n\u001B[0;32m 332\u001B[0m raise ValueError(\n\u001B[0;32m 333\u001B[0m \u001B[1;34m\"Found input variables with inconsistent numbers of samples: %r\"\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 334\u001B[1;33m \u001B[1;33m%\u001B[0m \u001B[1;33m[\u001B[0m\u001B[0mint\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0ml\u001B[0m\u001B[1;33m)\u001B[0m \u001B[1;32mfor\u001B[0m \u001B[0ml\u001B[0m \u001B[1;32min\u001B[0m \u001B[0mlengths\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 335\u001B[0m )\n\u001B[0;32m 336\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;31mValueError\u001B[0m: Found input variables with inconsistent numbers of samples: [3080, 1]" + ] + } + ], + "source": [ + "xgb_bo = BayesianOptimization(xgb_cv, pbounds={'max_depth': (20, 60),\n", + " 'learning_rate': (0.005, 0.1),\n", + " 'n_estimators': (100, 2000),\n", + " 'min_child_weight': (0, 30),\n", + " 'subsample': (0.05, 1),\n", + " 'colsample_bytree': (0.1, 1),\n", + " 'reg_alpha': (0.001, 10),\n", + " 'gamma': (0.001, 10)})\n", + "xgb_bo.maximize(n_iter=100, init_points=10)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 105, + "outputs": [], + "source": [ + "params_xgb = {'objective': 'reg:squarederror',\n", + " 'booster': 'gbtree',\n", + " 'eta': 0.037,\n", + " 'max_depth': 30,\n", + " 'subsample': 1.0,\n", + " 'colsample_bytree': 0.47,\n", + " 'min_child_weight': 30,\n", + " 'seed': 42}\n", + "num_boost_round = 2000\n", + "\n", + "dtrain = xgb.DMatrix(X_train, Y_train)\n", + "dvalid = xgb.DMatrix(X_valid, Y_valid)\n", + "watchlist = [(dtrain, 'train'), (dvalid, 'eval')]\n", + "\n", + "gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", + " early_stopping_rounds=100, verbose_eval=False)\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 106, + "outputs": [], + "source": [ + "y_pred_xgb = np.expm1(gb_model.predict(xgb.DMatrix(X_test)))\n", + "y_true_xgb = np.expm1(Y_test.values)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 107, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE: 1.1E-05\n", + "RMSE: 0.003\n", + "MAE: 0.002\n", + "MAPE: 2.99 %\n", + "R_2: 0.88\n" + ] + } + ], + "source": [ + "MSE = mean_squared_error(y_true_xgb, y_pred_xgb)\n", + "RMSE = np.sqrt(mean_squared_error(y_true_xgb, y_pred_xgb))\n", + "MAE = mean_absolute_error(y_true_xgb, y_pred_xgb)\n", + "MAPE = mean_absolute_percentage_error(y_true_xgb, y_pred_xgb)\n", + "R_2 = r2_score(y_true_xgb, y_pred_xgb)\n", + "print('MSE:', format(MSE, '.1E'))\n", + "print('RMSE:', round(RMSE, 3))\n", + "print('MAE:', round(MAE, 3))\n", + "print('MAPE:', round(MAPE*100, 2), '%')\n", + "print('R_2:', round(R_2, 3)) #R方为负就说明拟合效果比平均值差a" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 108, + "outputs": [], + "source": [ + "kf = KFold(n_splits=10, shuffle=True, random_state=42)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 109, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE: 1.8E-05, RMSE: 0.004, MAE: 0.002, MAPE: 3.47 %, R_2: 0.776\n", + "MSE: 1.8E-05, RMSE: 0.004, MAE: 0.002, MAPE: 3.19 %, R_2: 0.83\n", + "MSE: 1.8E-05, RMSE: 0.004, MAE: 0.002, MAPE: 3.87 %, R_2: 0.811\n", + "MSE: 1.2E-05, RMSE: 0.003, MAE: 0.002, MAPE: 2.96 %, R_2: 0.861\n", + "MSE: 1.9E-05, RMSE: 0.004, MAE: 0.002, MAPE: 3.65 %, R_2: 0.775\n", + "MSE: 1.9E-05, RMSE: 0.004, MAE: 0.002, MAPE: 3.56 %, R_2: 0.789\n", + "MSE: 2.3E-05, RMSE: 0.005, MAE: 0.002, MAPE: 3.05 %, R_2: 0.723\n", + "MSE: 2.5E-05, RMSE: 0.005, MAE: 0.002, MAPE: 3.94 %, R_2: 0.717\n", + "MSE: 1.0E-05, RMSE: 0.003, MAE: 0.002, MAPE: 2.9 %, R_2: 0.864\n", + "MSE: 9.4E-06, RMSE: 0.003, MAE: 0.002, MAPE: 2.89 %, R_2: 0.881\n" + ] + } + ], + "source": [ + "eva_list = list()\n", + "for (train_index, test_index) in kf.split(use_data):\n", + " train = use_data.loc[train_index]\n", + " test = use_data.loc[test_index]\n", + " train, valid = train_test_split(train, test_size=0.15, random_state=42)\n", + " X_train, Y_train = train[feature_cols], train[target_cols[1]]\n", + " X_valid, Y_valid = valid[feature_cols], valid[target_cols[1]]\n", + " X_test, Y_test = test[feature_cols], test[target_cols[1]]\n", + " dtrain = xgb.DMatrix(X_train, Y_train)\n", + " dvalid = xgb.DMatrix(X_valid, Y_valid)\n", + " watchlist = [(dvalid, 'eval')]\n", + " gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", + " early_stopping_rounds=100, verbose_eval=False)\n", + " y_pred = gb_model.predict(xgb.DMatrix(X_test))\n", + " y_true = Y_test.values\n", + " MSE = mean_squared_error(y_true, y_pred)\n", + " RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n", + " MAE = mean_absolute_error(y_true, y_pred)\n", + " MAPE = mean_absolute_percentage_error(y_true, y_pred)\n", + " R_2 = r2_score(y_true, y_pred)\n", + " print('MSE:', format(MSE, '.1E'), end=', ')\n", + " print('RMSE:', round(RMSE, 3), end=', ')\n", + " print('MAE:', round(MAE, 3), end=', ')\n", + " print('MAPE:', round(MAPE*100, 2), '%', end=', ')\n", + " print('R_2:', round(R_2, 3)) #R方为负就说明拟合效果比平均值差\n", + " eva_list.append([MSE, RMSE, MAE, MAPE, R_2])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 110, + "outputs": [], + "source": [ + "record = pd.DataFrame.from_records(eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R2'])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 120, + "outputs": [ + { + "data": { + "text/plain": " MSE RMSE MAE MAPE R2\n0 0.000018 0.004221 0.002394 0.034705 0.775560\n1 0.000018 0.004191 0.002405 0.031921 0.829931\n2 0.000018 0.004249 0.002235 0.038677 0.810649\n3 0.000012 0.003395 0.002090 0.029607 0.861337\n4 0.000019 0.004334 0.002302 0.036496 0.775066\n5 0.000019 0.004367 0.002260 0.035588 0.789063\n6 0.000023 0.004806 0.002272 0.030522 0.723082\n7 0.000025 0.004968 0.002401 0.039428 0.717094\n8 0.000010 0.003207 0.002037 0.029033 0.863679\n9 0.000009 0.003072 0.002008 0.028871 0.880821", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
MSERMSEMAEMAPER2
00.0000180.0042210.0023940.0347050.775560
10.0000180.0041910.0024050.0319210.829931
20.0000180.0042490.0022350.0386770.810649
30.0000120.0033950.0020900.0296070.861337
40.0000190.0043340.0023020.0364960.775066
50.0000190.0043670.0022600.0355880.789063
60.0000230.0048060.0022720.0305220.723082
70.0000250.0049680.0024010.0394280.717094
80.0000100.0032070.0020370.0290330.863679
90.0000090.0030720.0020080.0288710.880821
\n
" + }, + "execution_count": 120, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "record" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 124, + "outputs": [ + { + "data": { + "text/plain": " MSE RMSE MAE MAPE R2\n8 0.00001 0.003207 0.002037 0.029033 0.863679", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
MSERMSEMAEMAPER2
80.000010.0032070.0020370.0290330.863679
\n
" + }, + "execution_count": 124, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 126, + "outputs": [], + "source": [ + "index = [0, 1, 2, 3, 4, 5, 6, 8]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 128, + "outputs": [ + { + "data": { + "text/plain": "MSE 0.000017\nRMSE 0.004096\nMAE 0.002249\nMAPE 0.033319\nR2 0.803546\ndtype: float64" + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "record.loc[index].mean()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 63, + "outputs": [ + { + "data": { + "text/plain": "MSE 0.000552\nRMSE 0.022978\nMAE 0.014251\nMAPE 0.034105\nR2 0.896138\ndtype: float64" + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "record.mean()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 57, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "#新增加的两行\n", + "from pylab import mpl\n", + "# 设置显示中文字体\n", + "mpl.rcParams[\"font.sans-serif\"] = [\"SimHei\"]\n", + "\n", + "mpl.rcParams[\"axes.unicode_minus\"] = False" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 58, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(16, 10))\n", + "plt.plot(range(len(y_true)), y_true, 'o-', label='真实值')\n", + "plt.plot(range(len(y_pred)), y_pred, '*-', label='预测值')\n", + "plt.legend(loc='best')\n", + "plt.title('预测结果')\n", + "plt.savefig('./figure/CO2排放强度预测结果.png')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 59, + "outputs": [], + "source": [ + "pd.DataFrame.from_records([y_pred, y_true]).T.to_csv('pred.csv')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 60, + "outputs": [], + "source": [ + "rst = pd.DataFrame.from_records(([y_true_xgb, y_pred_xgb])).T\n", + "rst.columns = ['y_true', 'y_pred']" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 61, + "outputs": [], + "source": [ + "rst['mAP'] = abs(rst.y_pred - rst.y_true) / rst.y_true" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 62, + "outputs": [ + { + "data": { + "text/plain": " y_true y_pred mAP\n23 0.233161 0.228589 0.019609\n46 0.242031 0.260373 0.075782\n42 0.233845 0.215675 0.077700\n1 0.233773 0.237715 0.016864\n58 0.258407 0.259042 0.002460\n41 0.233404 0.246465 0.055956\n15 0.249245 0.248289 0.003837\n63 0.237670 0.284324 0.196296\n59 0.244008 0.242001 0.008228\n37 0.252681 0.251169 0.005983", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
y_truey_predmAP
230.2331610.2285890.019609
460.2420310.2603730.075782
420.2338450.2156750.077700
10.2337730.2377150.016864
580.2584070.2590420.002460
410.2334040.2464650.055956
150.2492450.2482890.003837
630.2376700.2843240.196296
590.2440080.2420010.008228
370.2526810.2511690.005983
\n
" + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rst.sort_values(by='mAP').sample(10)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 63, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(16, 10))\n", + "plt.plot(range(len(y_true_xgb)), y_true_xgb, 'o-', label='真实值')\n", + "plt.plot(range(len(y_pred_xgb)), y_pred_xgb, '*-', label='预测值')\n", + "plt.legend(loc='best')\n", + "plt.title('预测结果')\n", + "plt.savefig('./figure/CO2排放强度预测结果.png')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## 煤种标准化工程" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 73, + "outputs": [], + "source": [ + "new_values = total_data.groupby(['煤种', '入炉煤低位热值_new', '燃煤挥发份Var_new', '燃煤灰份Aar_new']).CO2_em_air.mean()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 74, + "outputs": [ + { + "data": { + "text/plain": " 煤种 入炉煤低位热值_new 燃煤挥发份Var_new 燃煤灰份Aar_new\n0 无烟煤 17050.00 6.51 31.330000\n1 无烟煤 18440.00 9.13 21.240189\n2 无烟煤 19335.65 7.06 21.400000\n3 无烟煤 20125.07 5.70 29.850000\n4 无烟煤 20463.30 5.70 29.790000\n.. ... ... ... ...\n622 贫煤 21772.91 10.66 26.320000\n623 贫煤 21907.00 10.64 28.100000\n624 贫煤 22042.72 12.96 25.690000\n625 贫煤 23215.00 11.00 19.310000\n626 贫煤 23791.00 11.00 19.310000\n\n[627 rows x 4 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
煤种入炉煤低位热值_new燃煤挥发份Var_new燃煤灰份Aar_new
0无烟煤17050.006.5131.330000
1无烟煤18440.009.1321.240189
2无烟煤19335.657.0621.400000
3无烟煤20125.075.7029.850000
4无烟煤20463.305.7029.790000
...............
622贫煤21772.9110.6626.320000
623贫煤21907.0010.6428.100000
624贫煤22042.7212.9625.690000
625贫煤23215.0011.0019.310000
626贫煤23791.0011.0019.310000
\n

627 rows × 4 columns

\n
" + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "coal_df = new_values.reset_index().drop(columns='CO2_em_air')\n", + "coal_df" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 75, + "outputs": [], + "source": [ + "coal_params_dict = dict()\n", + "for coal_type in coal_df['煤种'].unique().tolist():\n", + " options = coal_df[coal_df['煤种']==coal_type][['入炉煤低位热值_new', '燃煤挥发份Var_new', '燃煤灰份Aar_new']].values\n", + " coal_params_dict[coal_type] = options" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 76, + "outputs": [ + { + "data": { + "text/plain": "{'无烟煤': array([[1.70500000e+04, 6.51000000e+00, 3.13300000e+01],\n [1.84400000e+04, 9.13000000e+00, 2.12401894e+01],\n [1.93356500e+04, 7.06000000e+00, 2.14000000e+01],\n [2.01250700e+04, 5.70000000e+00, 2.98500000e+01],\n [2.04633000e+04, 5.70000000e+00, 2.97900000e+01]]),\n '烟煤': array([[1.277100e+04, 2.126000e+01, 3.355000e+01],\n [1.500000e+04, 2.346000e+01, 1.904000e+01],\n [1.610000e+04, 2.333000e+01, 1.873000e+01],\n ...,\n [2.348751e+04, 2.927000e+01, 2.097000e+01],\n [2.365000e+04, 2.887000e+01, 7.910000e+00],\n [2.365614e+04, 2.927000e+01, 2.097000e+01]]),\n '褐煤': array([[1.059800e+04, 2.476000e+01, 2.179000e+01],\n [1.129000e+04, 4.764000e+01, 3.079000e+01],\n [1.160400e+04, 4.758000e+01, 3.025000e+01],\n [1.172435e+04, 4.601000e+01, 3.673000e+01],\n [1.203000e+04, 4.726000e+01, 3.119000e+01],\n [1.213546e+04, 4.642000e+01, 1.113000e+01],\n [1.217290e+04, 4.642000e+01, 1.113000e+01],\n [1.219256e+04, 4.642000e+01, 1.113000e+01],\n [1.221131e+04, 4.642000e+01, 1.113000e+01],\n [1.230939e+04, 4.642000e+01, 1.113000e+01],\n [1.233780e+04, 4.642000e+01, 1.113000e+01],\n [1.267400e+04, 4.324000e+01, 1.237000e+01],\n [1.278700e+04, 4.884000e+01, 4.117000e+01],\n [1.295100e+04, 2.228000e+01, 1.287000e+01],\n [1.299880e+04, 2.256000e+01, 1.716000e+01],\n [1.311100e+04, 2.367000e+01, 2.107000e+01],\n [1.313000e+04, 2.417000e+01, 1.630000e+01],\n [1.318000e+04, 2.445000e+01, 1.794000e+01],\n [1.320830e+04, 2.451000e+01, 1.429000e+01],\n [1.325722e+04, 1.703000e+01, 3.660000e+01],\n [1.327000e+04, 3.204000e+01, 1.709000e+01],\n [1.327300e+04, 2.364000e+01, 1.622000e+01],\n [1.327300e+04, 2.458000e+01, 1.261000e+01],\n [1.332771e+04, 4.090000e+01, 2.507000e+01],\n [1.333064e+04, 1.680000e+01, 3.741000e+01],\n [1.335883e+04, 2.301000e+01, 1.841000e+01],\n [1.336864e+04, 2.301000e+01, 1.841000e+01],\n [1.343787e+04, 2.336000e+01, 1.705000e+01],\n [1.344000e+04, 4.782000e+01, 2.290000e+01],\n [1.345749e+04, 2.388000e+01, 1.652000e+01],\n [1.357000e+04, 1.799000e+01, 2.177000e+01],\n [1.364000e+04, 2.526000e+01, 2.108000e+01],\n [1.365410e+04, 2.232000e+01, 1.171000e+01],\n [1.369000e+04, 4.771000e+01, 2.205000e+01],\n [1.382000e+04, 2.420000e+01, 1.104000e+01],\n [1.389597e+04, 2.232000e+01, 1.171000e+01],\n [1.390000e+04, 3.683000e+01, 4.441000e+01],\n [1.395400e+04, 2.310000e+01, 1.011000e+01],\n [1.396000e+04, 4.665000e+01, 1.890000e+01],\n [1.400000e+04, 4.520000e+01, 1.364000e+01],\n [1.404100e+04, 2.346000e+01, 1.046000e+01],\n [1.410900e+04, 4.520000e+01, 1.364000e+01],\n [1.412200e+04, 2.478000e+01, 1.916000e+01],\n [1.419900e+04, 4.733000e+01, 1.697000e+01],\n [1.433937e+04, 2.476000e+01, 3.371000e+01],\n [1.440000e+04, 2.589000e+01, 1.643000e+01],\n [1.442729e+04, 4.474000e+01, 1.193000e+01],\n [1.446814e+04, 2.484000e+01, 3.331000e+01],\n [1.448810e+04, 3.554000e+01, 1.171000e+01],\n [1.458200e+04, 2.834000e+01, 2.320000e+01],\n [1.460000e+04, 2.714000e+01, 4.346000e+01],\n [1.462400e+04, 4.613000e+01, 2.700000e+01],\n [1.463500e+04, 4.613000e+01, 2.700000e+01],\n [1.464000e+04, 4.439000e+01, 1.684000e+01],\n [1.470100e+04, 2.210000e+01, 4.588000e+01],\n [1.481078e+04, 4.501000e+01, 1.325000e+01],\n [1.489878e+04, 2.386000e+01, 3.161000e+01],\n [1.507938e+04, 4.501000e+01, 1.325000e+01],\n [1.512117e+04, 2.355000e+01, 1.472000e+01],\n [1.517400e+04, 3.126000e+01, 1.696000e+01],\n [1.523800e+04, 2.492000e+01, 2.378000e+01],\n [1.524041e+04, 2.355000e+01, 1.472000e+01],\n [1.528927e+04, 2.345000e+01, 1.554000e+01],\n [1.534700e+04, 2.492000e+01, 2.378000e+01],\n [1.536708e+04, 4.501000e+01, 8.590000e+00],\n [1.540000e+04, 2.450000e+01, 2.085000e+01],\n [1.560165e+04, 2.345000e+01, 1.554000e+01],\n [1.562100e+04, 4.409000e+01, 1.019000e+01],\n [1.568455e+04, 1.865000e+01, 3.545000e+01],\n [1.599544e+04, 1.865000e+01, 3.545000e+01],\n [1.619823e+04, 2.032000e+01, 3.297000e+01],\n [1.619823e+04, 2.075000e+01, 3.310000e+01],\n [1.619951e+04, 1.790000e+01, 3.976000e+01],\n [1.620200e+04, 1.268000e+01, 4.012000e+01],\n [1.638000e+04, 2.264000e+01, 2.024000e+01],\n [1.644918e+04, 2.061000e+01, 3.224000e+01],\n [1.644918e+04, 2.087000e+01, 3.238000e+01],\n [1.660450e+04, 3.484000e+01, 9.590000e+00],\n [1.662400e+04, 1.287000e+01, 3.909000e+01],\n [1.667800e+04, 1.320000e+01, 3.884000e+01],\n [1.701000e+04, 2.721000e+01, 4.295000e+01],\n [1.711359e+04, 3.560000e+01, 9.440000e+00],\n [1.721702e+04, 3.266000e+01, 6.030000e+00],\n [1.732699e+04, 3.266000e+01, 6.030000e+00],\n [1.769205e+04, 3.632000e+01, 8.880000e+00],\n [1.783200e+04, 3.564000e+01, 2.418000e+01],\n [1.792600e+04, 3.563000e+01, 2.488000e+01],\n [1.802919e+04, 3.526000e+01, 7.680000e+00],\n [1.811583e+04, 3.348000e+01, 1.236000e+01],\n [1.815944e+04, 3.348000e+01, 1.236000e+01],\n [1.834900e+04, 3.542000e+01, 1.152000e+01],\n [1.862400e+04, 3.951000e+01, 1.937000e+01],\n [1.877383e+04, 2.676000e+01, 3.448000e+01],\n [1.877602e+04, 2.676000e+01, 3.448000e+01],\n [1.882100e+04, 2.678000e+01, 3.445000e+01],\n [1.884200e+04, 2.685000e+01, 3.451000e+01],\n [1.896000e+04, 3.951000e+01, 1.937000e+01],\n [1.903900e+04, 2.580000e+01, 2.420000e+01],\n [1.908760e+04, 3.426000e+01, 4.580000e+00],\n [1.918000e+04, 2.670000e+01, 2.480000e+01],\n [1.922827e+04, 3.426000e+01, 4.580000e+00],\n [1.924675e+04, 3.243000e+01, 7.700000e+00],\n [1.927600e+04, 3.200000e+01, 7.700000e+00],\n [1.959900e+04, 3.514000e+01, 1.065000e+01],\n [1.964010e+04, 3.446000e+01, 4.600000e+00],\n [1.965200e+04, 2.990000e+01, 2.406000e+01],\n [1.974233e+04, 3.422000e+01, 2.892000e+01],\n [1.976235e+04, 3.414000e+01, 2.934000e+01],\n [1.977612e+04, 3.446000e+01, 4.600000e+00],\n [1.993700e+04, 3.514000e+01, 1.065000e+01],\n [1.997000e+04, 3.533000e+01, 9.050000e+00],\n [2.003000e+04, 3.948000e+01, 3.080000e+01],\n [2.006000e+04, 3.911000e+01, 3.080000e+01],\n [2.011300e+04, 2.560000e+01, 2.312000e+01],\n [2.017338e+04, 2.979000e+01, 1.814000e+01],\n [2.025484e+04, 2.979000e+01, 1.814000e+01],\n [2.028500e+04, 3.009000e+01, 1.125000e+01],\n [2.057100e+04, 3.147000e+01, 2.478000e+01],\n [2.062600e+04, 2.627000e+01, 2.050000e+01],\n [2.066423e+04, 2.752000e+01, 2.014000e+01],\n [2.067360e+04, 2.840000e+01, 2.165000e+01],\n [2.068200e+04, 2.960000e+01, 1.603000e+01],\n [2.068600e+04, 3.124000e+01, 2.445000e+01],\n [2.070300e+04, 3.000000e+01, 1.125000e+01],\n [2.073600e+04, 2.627000e+01, 2.050000e+01],\n [2.075090e+04, 2.780000e+01, 2.254000e+01],\n [2.076000e+04, 2.977000e+01, 1.291000e+01],\n [2.078500e+04, 3.871000e+01, 1.575000e+01],\n [2.083648e+04, 2.780000e+01, 2.254000e+01],\n [2.089200e+04, 3.252000e+01, 9.680000e+00],\n [2.089200e+04, 3.255000e+01, 9.380000e+00],\n [2.089200e+04, 3.262000e+01, 1.026000e+01],\n [2.089200e+04, 3.324000e+01, 8.560000e+00],\n [2.090000e+04, 3.100000e+01, 1.981000e+01],\n [2.093990e+04, 2.840000e+01, 2.165000e+01],\n [2.094100e+04, 2.977000e+01, 1.291000e+01],\n [2.094900e+04, 3.100000e+01, 2.007000e+01],\n [2.107400e+04, 3.830000e+01, 1.525000e+01],\n [2.110000e+04, 2.470000e+01, 2.599000e+01],\n [2.114300e+04, 2.580000e+01, 2.196000e+01],\n [2.114300e+04, 2.580000e+01, 2.197000e+01],\n [2.121740e+04, 3.279000e+01, 1.334000e+01],\n [2.127156e+04, 3.844000e+01, 1.186000e+01],\n [2.134680e+04, 3.885000e+01, 1.243000e+01],\n [2.137900e+04, 2.944000e+01, 1.436000e+01],\n [2.147400e+04, 2.944000e+01, 1.436000e+01],\n [2.166129e+04, 3.124000e+01, 1.849000e+01],\n [2.176000e+04, 3.213000e+01, 1.785000e+01],\n [2.208167e+04, 3.176000e+01, 1.816000e+01],\n [2.214783e+04, 3.736000e+01, 1.390000e+01],\n [2.219619e+04, 3.736000e+01, 1.390000e+01],\n [2.240000e+04, 3.052000e+01, 1.785000e+01],\n [2.248200e+04, 3.010000e+01, 1.125000e+01],\n [2.261900e+04, 3.047000e+01, 1.303000e+01],\n [2.274200e+04, 3.028000e+01, 1.057000e+01]]),\n '贫煤': array([[1.695900e+04, 9.310000e+00, 4.477000e+01],\n [1.742404e+04, 1.058000e+01, 2.268000e+01],\n [1.742931e+04, 7.900000e+00, 3.840000e+01],\n [1.799800e+04, 1.175000e+01, 2.981000e+01],\n [1.875700e+04, 1.185000e+01, 3.122000e+01],\n [1.912518e+04, 7.810000e+00, 3.145000e+01],\n [1.928076e+04, 7.930000e+00, 3.137000e+01],\n [1.935228e+04, 1.119000e+01, 3.202000e+01],\n [1.938269e+04, 1.127000e+01, 3.192000e+01],\n [1.983535e+04, 1.152000e+01, 3.052000e+01],\n [1.986900e+04, 1.161000e+01, 3.042000e+01],\n [1.994000e+04, 9.370000e+00, 3.426000e+01],\n [1.994300e+04, 9.370000e+00, 3.426000e+01],\n [2.003700e+04, 1.125000e+01, 3.067000e+01],\n [2.024590e+04, 1.058000e+01, 2.654000e+01],\n [2.028730e+04, 1.120000e+01, 2.698000e+01],\n [2.031000e+04, 1.123000e+01, 3.357000e+01],\n [2.031700e+04, 1.125000e+01, 3.067000e+01],\n [2.036000e+04, 9.450000e+00, 3.077000e+01],\n [2.057000e+04, 1.185000e+01, 2.786000e+01],\n [2.075500e+04, 1.174000e+01, 2.817000e+01],\n [2.086230e+04, 1.040000e+01, 2.583000e+01],\n [2.092670e+04, 9.510000e+00, 2.515000e+01],\n [2.096500e+04, 1.258000e+01, 2.965000e+01],\n [2.097590e+04, 1.017000e+01, 2.491000e+01],\n [2.098100e+04, 1.258000e+01, 2.965000e+01],\n [2.101000e+04, 1.209000e+01, 2.169000e+01],\n [2.101980e+04, 9.410000e+00, 2.489000e+01],\n [2.103908e+04, 7.010000e+00, 2.714000e+01],\n [2.105200e+04, 1.074000e+01, 3.136000e+01],\n [2.106690e+04, 1.034000e+01, 2.481000e+01],\n [2.107710e+04, 1.017000e+01, 2.478000e+01],\n [2.110900e+04, 7.670000e+00, 2.597000e+01],\n [2.110900e+04, 1.209000e+01, 2.169000e+01],\n [2.119000e+04, 7.170000e+00, 2.591000e+01],\n [2.119400e+04, 7.190000e+00, 2.597000e+01],\n [2.119433e+04, 7.010000e+00, 2.667000e+01],\n [2.122400e+04, 1.256000e+01, 2.636000e+01],\n [2.126600e+04, 7.260000e+00, 2.567000e+01],\n [2.126900e+04, 1.174000e+01, 2.817000e+01],\n [2.157900e+04, 1.189000e+01, 2.689000e+01],\n [2.174500e+04, 1.074000e+01, 2.850000e+01],\n [2.176688e+04, 1.062000e+01, 2.687000e+01],\n [2.177291e+04, 1.066000e+01, 2.632000e+01],\n [2.190700e+04, 1.064000e+01, 2.810000e+01],\n [2.204272e+04, 1.296000e+01, 2.569000e+01],\n [2.321500e+04, 1.100000e+01, 1.931000e+01],\n [2.379100e+04, 1.100000e+01, 1.931000e+01]])}" + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "coal_params_dict" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 77, + "outputs": [ + { + "data": { + "text/plain": " 地区 所属集团 投产时间 机组容量 机组类型 参数分类 冷却方式 锅炉类型 时间 \\\n0 北京 华能 1998/1/20 0:00 165 供热式 超高压 水冷 煤粉 2016.0 \n1 北京 华能 1998/1/20 0:00 165 供热式 超高压 水冷 煤粉 2016.0 \n2 北京 华能 1998/12/20 0:00 220 供热式 超高压 水冷 煤粉 2016.0 \n3 北京 华能 1999/6/26 0:00 220 供热式 超高压 水冷 煤粉 2016.0 \n4 辽宁 大唐 2009/4/30 0:00 300 供热式 亚临界 水冷 煤粉 2016.0 \n.. .. ... ... ... ... ... ... ... ... \n847 新疆 NaN NaN 1320 纯凝式 超临界 间接空冷 煤粉 NaN \n848 辽宁 NaN NaN 700 供热式 超临界 水冷 煤粉 NaN \n849 内蒙 NaN NaN 700 供热式 超临界 直接空冷 煤粉 NaN \n850 山东 NaN NaN 40 供热式 超高压 水冷 循环流化床 NaN \n851 浙江 NaN NaN 70 供热式 超高压 水冷 循环流化床 NaN \n\n 发电量 ... 标煤量 出力系数 煤种 入炉煤低位热值 燃煤挥发份Var 燃煤灰份Aar \\\n0 51841.70000 ... 2.580497e+05 75.84 烟煤 23380.0 27.59 9.94 \n1 47387.95000 ... 2.126813e+05 74.50 烟煤 23380.0 27.59 9.94 \n2 115498.04000 ... 4.410925e+05 78.76 烟煤 23380.0 27.59 9.94 \n3 120884.07000 ... 4.707218e+05 81.41 烟煤 23380.0 27.59 9.94 \n4 111218.55000 ... 3.726990e+05 71.27 褐煤 14122.0 24.78 19.16 \n.. ... ... ... ... .. ... ... ... \n847 704381.26290 ... 2.283076e+06 NaN 褐煤 19970.0 35.33 9.05 \n848 350000.00000 ... 1.328747e+06 NaN 褐煤 14640.0 44.39 16.84 \n849 385000.00000 ... 1.362009e+06 NaN 褐煤 13960.0 46.65 18.90 \n850 17000.00000 ... 1.810834e+05 NaN 烟煤 21060.0 19.12 20.27 \n851 35788.81469 ... 3.502535e+05 NaN 烟煤 22021.0 19.12 21.77 \n\n CO2_em_air 入炉煤低位热值_new 燃煤挥发份Var_new 燃煤灰份Aar_new \n0 0.235066 23380.0 27.59 9.94 \n1 0.226207 23380.0 27.59 9.94 \n2 0.220954 23380.0 27.59 9.94 \n3 0.216298 23380.0 27.59 9.94 \n4 0.238755 14122.0 24.78 19.16 \n.. ... ... ... ... \n847 0.196452 19970.0 35.33 9.05 \n848 0.185688 14640.0 44.39 16.84 \n849 0.181214 13960.0 46.65 18.90 \n850 0.347570 21060.0 19.12 20.27 \n851 0.253057 22021.0 19.12 21.77 \n\n[852 rows x 21 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
地区所属集团投产时间机组容量机组类型参数分类冷却方式锅炉类型时间发电量...标煤量出力系数煤种入炉煤低位热值燃煤挥发份Var燃煤灰份AarCO2_em_air入炉煤低位热值_new燃煤挥发份Var_new燃煤灰份Aar_new
0北京华能1998/1/20 0:00165供热式超高压水冷煤粉2016.051841.70000...2.580497e+0575.84烟煤23380.027.599.940.23506623380.027.599.94
1北京华能1998/1/20 0:00165供热式超高压水冷煤粉2016.047387.95000...2.126813e+0574.50烟煤23380.027.599.940.22620723380.027.599.94
2北京华能1998/12/20 0:00220供热式超高压水冷煤粉2016.0115498.04000...4.410925e+0578.76烟煤23380.027.599.940.22095423380.027.599.94
3北京华能1999/6/26 0:00220供热式超高压水冷煤粉2016.0120884.07000...4.707218e+0581.41烟煤23380.027.599.940.21629823380.027.599.94
4辽宁大唐2009/4/30 0:00300供热式亚临界水冷煤粉2016.0111218.55000...3.726990e+0571.27褐煤14122.024.7819.160.23875514122.024.7819.16
..................................................................
847新疆NaNNaN1320纯凝式超临界间接空冷煤粉NaN704381.26290...2.283076e+06NaN褐煤19970.035.339.050.19645219970.035.339.05
848辽宁NaNNaN700供热式超临界水冷煤粉NaN350000.00000...1.328747e+06NaN褐煤14640.044.3916.840.18568814640.044.3916.84
849内蒙NaNNaN700供热式超临界直接空冷煤粉NaN385000.00000...1.362009e+06NaN褐煤13960.046.6518.900.18121413960.046.6518.90
850山东NaNNaN40供热式超高压水冷循环流化床NaN17000.00000...1.810834e+05NaN烟煤21060.019.1220.270.34757021060.019.1220.27
851浙江NaNNaN70供热式超高压水冷循环流化床NaN35788.81469...3.502535e+05NaN烟煤22021.019.1221.770.25305722021.019.1221.77
\n

852 rows × 21 columns

\n
" + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 78, + "outputs": [], + "source": [ + "new_use_data = total_data.groupby(use_col + ['煤种'])['CO2_em_air'].mean().reset_index().drop(columns=['入炉煤低位热值_new', '燃煤挥发份Var_new', '燃煤灰份Aar_new'])\n", + "new_use_data.rename(columns={0:'CO2_em_air'}, inplace=True)\n", + "new_use_data['coal_params'] = new_use_data['煤种'].apply(lambda x: coal_params_dict.get(x))\n", + "new_use_data.drop(columns='煤种', inplace=True)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 79, + "outputs": [], + "source": [ + "new_data = new_use_data.explode(column='coal_params')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 80, + "outputs": [ + { + "data": { + "text/plain": " 地区 机组类型 参数分类 冷却方式 锅炉类型 机组容量 coal_params\n0 上海 纯凝式 亚临界 水冷 煤粉 320 [12771.0, 21.26, 33.55]\n0 上海 纯凝式 亚临界 水冷 煤粉 320 [15000.0, 23.46, 19.04]\n0 上海 纯凝式 亚临界 水冷 煤粉 320 [16100.0, 23.33, 18.73]\n0 上海 纯凝式 亚临界 水冷 煤粉 320 [16190.0, 23.33, 18.73]\n0 上海 纯凝式 亚临界 水冷 煤粉 320 [16641.0, 19.13, 39.12]\n.. ... ... ... ... ... ... ...\n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 [23253.68, 23.72, 18.45]\n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 [23380.0, 27.59, 9.94]\n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 [23487.51, 29.27, 20.97]\n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 [23650.0, 28.87, 7.91]\n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 [23656.14, 29.27, 20.97]\n\n[208875 rows x 7 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
地区机组类型参数分类冷却方式锅炉类型机组容量coal_params
0上海纯凝式亚临界水冷煤粉320[12771.0, 21.26, 33.55]
0上海纯凝式亚临界水冷煤粉320[15000.0, 23.46, 19.04]
0上海纯凝式亚临界水冷煤粉320[16100.0, 23.33, 18.73]
0上海纯凝式亚临界水冷煤粉320[16190.0, 23.33, 18.73]
0上海纯凝式亚临界水冷煤粉320[16641.0, 19.13, 39.12]
........................
646黑龙江纯凝式超高压水冷煤粉210[23253.68, 23.72, 18.45]
646黑龙江纯凝式超高压水冷煤粉210[23380.0, 27.59, 9.94]
646黑龙江纯凝式超高压水冷煤粉210[23487.51, 29.27, 20.97]
646黑龙江纯凝式超高压水冷煤粉210[23650.0, 28.87, 7.91]
646黑龙江纯凝式超高压水冷煤粉210[23656.14, 29.27, 20.97]
\n

208875 rows × 7 columns

\n
" + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_data.drop(columns=['CO2_em_air'])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 81, + "outputs": [], + "source": [ + "norm_data = pd.concat([new_data, new_data.coal_params.apply(pd.Series, index=['入炉煤低位热值_new', '燃煤挥发份Var_new', '燃煤灰份Aar_new'])], axis=1).drop(columns='coal_params')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 82, + "outputs": [ + { + "data": { + "text/plain": " 地区 机组类型 参数分类 冷却方式 锅炉类型 机组容量 CO2_em_air 入炉煤低位热值_new 燃煤挥发份Var_new \\\n0 上海 纯凝式 亚临界 水冷 煤粉 320 0.266602 12771.00 21.26 \n0 上海 纯凝式 亚临界 水冷 煤粉 320 0.266602 15000.00 23.46 \n0 上海 纯凝式 亚临界 水冷 煤粉 320 0.266602 16100.00 23.33 \n0 上海 纯凝式 亚临界 水冷 煤粉 320 0.266602 16190.00 23.33 \n0 上海 纯凝式 亚临界 水冷 煤粉 320 0.266602 16641.00 19.13 \n.. ... ... ... ... ... ... ... ... ... \n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 0.278763 23253.68 23.72 \n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 0.278763 23380.00 27.59 \n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 0.278763 23487.51 29.27 \n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 0.278763 23650.00 28.87 \n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 0.278763 23656.14 29.27 \n\n 燃煤灰份Aar_new \n0 33.55 \n0 19.04 \n0 18.73 \n0 18.73 \n0 39.12 \n.. ... \n646 18.45 \n646 9.94 \n646 20.97 \n646 7.91 \n646 20.97 \n\n[208875 rows x 10 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
地区机组类型参数分类冷却方式锅炉类型机组容量CO2_em_air入炉煤低位热值_new燃煤挥发份Var_new燃煤灰份Aar_new
0上海纯凝式亚临界水冷煤粉3200.26660212771.0021.2633.55
0上海纯凝式亚临界水冷煤粉3200.26660215000.0023.4619.04
0上海纯凝式亚临界水冷煤粉3200.26660216100.0023.3318.73
0上海纯凝式亚临界水冷煤粉3200.26660216190.0023.3318.73
0上海纯凝式亚临界水冷煤粉3200.26660216641.0019.1339.12
.................................
646黑龙江纯凝式超高压水冷煤粉2100.27876323253.6823.7218.45
646黑龙江纯凝式超高压水冷煤粉2100.27876323380.0027.599.94
646黑龙江纯凝式超高压水冷煤粉2100.27876323487.5129.2720.97
646黑龙江纯凝式超高压水冷煤粉2100.27876323650.0028.877.91
646黑龙江纯凝式超高压水冷煤粉2100.27876323656.1429.2720.97
\n

208875 rows × 10 columns

\n
" + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "norm_data" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 83, + "outputs": [], + "source": [ + "for col in num_cols:\n", + " norm_data[col] = np.log1p(norm_data[col])\n", + " # total_data[col] = (total_data[col] - total_data[col].min()) / (total_data[col].max() - total_data[col].min())\n", + "norm_data_dummy = pd.get_dummies(norm_data, columns=object_cols)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 84, + "outputs": [ + { + "data": { + "text/plain": " 机组容量 CO2_em_air 入炉煤低位热值_new 燃煤挥发份Var_new 燃煤灰份Aar_new 地区_上海 \\\n0 5.771441 0.236338 9.455011 3.102791 3.542408 1 \n0 5.771441 0.236338 9.615872 3.197039 2.997730 1 \n0 5.771441 0.236338 9.686637 3.191710 2.982140 1 \n0 5.771441 0.236338 9.692211 3.191710 2.982140 1 \n0 5.771441 0.236338 9.719685 3.002211 3.691875 1 \n.. ... ... ... ... ... ... \n646 5.351858 0.245893 10.054262 3.207613 2.967847 0 \n646 5.351858 0.245893 10.059679 3.353057 2.392426 0 \n646 5.351858 0.245893 10.064267 3.410157 3.089678 0 \n646 5.351858 0.245893 10.071161 3.396855 2.187174 0 \n646 5.351858 0.245893 10.071420 3.410157 3.089678 0 \n\n 地区_云南 地区_内蒙 地区_北京 地区_吉林 ... 机组类型_纯凝式 参数分类_亚临界 参数分类_超临界 参数分类_超超临界 \\\n0 0 0 0 0 ... 1 1 0 0 \n0 0 0 0 0 ... 1 1 0 0 \n0 0 0 0 0 ... 1 1 0 0 \n0 0 0 0 0 ... 1 1 0 0 \n0 0 0 0 0 ... 1 1 0 0 \n.. ... ... ... ... ... ... ... ... ... \n646 0 0 0 0 ... 1 0 0 0 \n646 0 0 0 0 ... 1 0 0 0 \n646 0 0 0 0 ... 1 0 0 0 \n646 0 0 0 0 ... 1 0 0 0 \n646 0 0 0 0 ... 1 0 0 0 \n\n 参数分类_超高压 冷却方式_水冷 冷却方式_直接空冷 冷却方式_间接空冷 锅炉类型_循环流化床 锅炉类型_煤粉 \n0 0 1 0 0 0 1 \n0 0 1 0 0 0 1 \n0 0 1 0 0 0 1 \n0 0 1 0 0 0 1 \n0 0 1 0 0 0 1 \n.. ... ... ... ... ... ... \n646 1 1 0 0 0 1 \n646 1 1 0 0 0 1 \n646 1 1 0 0 0 1 \n646 1 1 0 0 0 1 \n646 1 1 0 0 0 1 \n\n[208875 rows x 45 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
机组容量CO2_em_air入炉煤低位热值_new燃煤挥发份Var_new燃煤灰份Aar_new地区_上海地区_云南地区_内蒙地区_北京地区_吉林...机组类型_纯凝式参数分类_亚临界参数分类_超临界参数分类_超超临界参数分类_超高压冷却方式_水冷冷却方式_直接空冷冷却方式_间接空冷锅炉类型_循环流化床锅炉类型_煤粉
05.7714410.2363389.4550113.1027913.54240810000...1100010001
05.7714410.2363389.6158723.1970392.99773010000...1100010001
05.7714410.2363389.6866373.1917102.98214010000...1100010001
05.7714410.2363389.6922113.1917102.98214010000...1100010001
05.7714410.2363389.7196853.0022113.69187510000...1100010001
..................................................................
6465.3518580.24589310.0542623.2076132.96784700000...1000110001
6465.3518580.24589310.0596793.3530572.39242600000...1000110001
6465.3518580.24589310.0642673.4101573.08967800000...1000110001
6465.3518580.24589310.0711613.3968552.18717400000...1000110001
6465.3518580.24589310.0714203.4101573.08967800000...1000110001
\n

208875 rows × 45 columns

\n
" + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "norm_data_dummy" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 85, + "outputs": [], + "source": [ + "new_xgb_data = xgb.DMatrix(norm_data_dummy[feature_cols])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 86, + "outputs": [], + "source": [ + "norm_data.drop(columns='CO2_em_air', inplace=True)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 87, + "outputs": [], + "source": [ + "norm_data['co2_pred'] = gb_model.predict(new_xgb_data)\n", + "normaled_data = norm_data.drop(columns=['入炉煤低位热值_new', '燃煤挥发份Var_new', '燃煤灰份Aar_new']).groupby([x for x in use_col if x not in ['CO2_em_air', '入炉煤低位热值_new', '燃煤挥发份Var_new', '燃煤灰份Aar_new']])['co2_pred'].mean()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "normaled_data.reset_index().to_csv('./data/去煤种化数据.csv', encoding='utf-8-sig', index=False)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.13 ('py37')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "993bd31d5df1020fab369d79a34ff0a2a159e1798f3e25d3ad4b7751d38184c9" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/data_extract.ipynb b/data_extract.ipynb new file mode 100644 index 0000000..d56cbfb --- /dev/null +++ b/data_extract.ipynb @@ -0,0 +1,892 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "files_old = [x for x in os.listdir('./new_data/') if x.endswith('xls') and '经济性' in x]\n", + "files_new = [x for x in os.listdir('./火电20230313/火电20230313F/') if '经济性' in x]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "data = pd.read_excel(f'./new_data/{files_old[0]}', header=[3,4,5])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [ + { + "data": { + "text/plain": "['序号',\n '机组编号',\n '时间',\n '发电量(万kWh)',\n '供电量(万kWh)',\n '标煤量(t)',\n '发电用标煤量(t)',\n '供热用标煤量(t)',\n '利用小时(h)',\n '平均负荷(MW)',\n '出力系数(%)',\n '出力系数(%)',\n '出力系数(%)',\n '工业供热量',\n '工业热电比(%)',\n '采暖供热量',\n '采暖热电比(%)',\n '总热电比(%)',\n '总供热量',\n '供热煤耗',\n '给水泵汽轮机总耗热量',\n '供电煤耗gce/(kWh)',\n '综合厂用电率(%)',\n '发电厂用电率(%)',\n '供热厂用电率(%)',\n '非生产厂用电率(%)',\n '最新THA工况二类修正后汽机热耗率kJ/KWh',\n '点火用油(kg/万kWh)',\n '助燃用油(kg/万kWh)',\n '主蒸汽压力(MPa)',\n '主蒸汽温度(℃)',\n '再热蒸汽温度(℃)',\n '高加投入率(%)',\n '给水温度(℃)',\n '真空严密性V(Pa/min)',\n '真空度(%)',\n '凝汽器端差(℃)',\n '凝结水过冷度(℃)',\n '发电补给水率(%)',\n '发电综合耗水率',\n '排烟温度(℃)',\n '飞灰含碳量(%)',\n '空预器漏风率(%)',\n '过热器减温水量(t/h)',\n '再热器减温水量(t/h)',\n '入厂煤低位热值(kJ/kg)',\n '入炉煤低位热值(kJ/kg)',\n '燃煤挥发份Var(%)',\n '燃煤灰份Aar(%)',\n '燃煤低位热值Qar,net(kJ/kg)',\n '燃煤硫份Sar(%)',\n '锅炉专业主要辅机耗电率 送风机耗电率(%)',\n '锅炉专业主要辅机耗电率 引风机耗电率(%)',\n '锅炉专业主要辅机耗电率 一次风机耗电率(%)',\n '锅炉专业主要辅机耗电率 炉水泵耗电率(%)',\n '锅炉专业主要辅机耗电率 给煤机耗电率(%)',\n '锅炉专业主要辅机耗电率 磨煤机耗电率(%)',\n '锅炉专业主要辅机耗电率 电除尘器耗电率(%)',\n '锅炉专业主要辅机耗电率 除灰系统耗电率(%)',\n '汽机专业主要辅机耗电率 凝结水泵耗电率(%)',\n '汽机专业主要辅机耗电率 前置泵耗电率(%)',\n '汽机专业主要辅机耗电率 电动给水泵耗电率(%)',\n '汽机专业主要辅机耗电率 循环水泵耗电率(%)',\n '汽机专业主要辅机耗电率 空冷风机耗电率(%)',\n '汽机专业主要辅机耗电率 热网循环水泵耗电率(%)',\n '环保专业耗电率 脱硫系统耗电率(%)',\n '环保专业耗电率 脱销系统耗电率(%)',\n '输煤专业耗电率输煤系统耗电率(%)',\n '化学系统耗电率 (%)',\n '化学系统耗电率 (%)']" + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cols = [''.join([x for x in y if 'Unnamed' not in x]) for y in data.columns]\n", + "cols" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [], + "source": [ + "data_list = list()\n", + "for file in files_old:\n", + " data = pd.read_excel(f'./new_data/{file}', header=[3,4,5])\n", + " data.columns = cols\n", + " plant = file.split('-')[0]\n", + " data['时间'] = data['时间'].astype(str)\n", + " use_data = data[~data['时间'].str.contains('半年')].copy()\n", + " use_data['电厂名称'] = plant\n", + " data_list.append(use_data)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [], + "source": [ + "for file in files_new:\n", + " data = pd.read_excel(f'./火电20230313/火电20230313F/{file}', header=[3,4,5])\n", + " data.columns = cols\n", + " plant = file.split('-')[0]\n", + " data['时间'] = data['时间'].astype(str)\n", + " use_data = data[~data['时间'].str.contains('半年')].copy()\n", + " use_data['电厂名称'] = plant\n", + " data_list.append(use_data)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [], + "source": [ + "run_data = pd.concat(data_list, axis=0)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [], + "source": [ + "run_data.drop(columns=['序号'], inplace=True)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [], + "source": [ + "run_data.drop_duplicates(inplace=True)\n", + "run_data.reset_index(inplace=True, drop=True)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 11, + "outputs": [ + { + "data": { + "text/plain": "(17616, 70)" + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "run_data.shape" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 12, + "outputs": [], + "source": [ + "run_data_max = run_data.groupby(['电厂名称', '机组编号', '时间']).max().reset_index()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### 机组信息" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 13, + "outputs": [ + { + "data": { + "text/plain": "['中国中信集团公司-32020-电厂机组数据查询-20230313.xls',\n '中国中煤能源集团有限公司-34027-电厂机组数据查询-20230313.xls',\n '中国华电集团有限公司-21060-电厂机组数据查询-20230313.xls',\n '中国华能集团有限公司-17021-电厂机组数据查询-20230313.xls',\n '中国大唐集团有限公司-61005-电厂机组数据查询-20230313.xls']" + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "old_units = [x for x in os.listdir('./new_data/') if '电厂机组' in x]\n", + "new_units = [x for x in os.listdir('./火电20230313/火电20230313/') if '电厂机组' in x]\n", + "new_units[:5]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 14, + "outputs": [ + { + "data": { + "text/plain": "['序号',\n '企业编码',\n '电厂名称',\n '简称',\n '机组编号',\n '铭牌容量 (MW)',\n '投产时间',\n '机组类型',\n '参数分类',\n '所处地区',\n '机组产地',\n '锅炉制造厂家',\n '汽轮机制造厂家',\n '发电机制造厂家',\n '主变压器制造厂家',\n '二级公司',\n '所属集团',\n '所属电网',\n '所属电网']" + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "unit_samples = pd.read_excel(f'./new_data/{old_units[0]}', header=[3,4])\n", + "unit_cols = [''.join([x for x in y if 'Unnamed' not in x]) for y in unit_samples.columns]\n", + "unit_cols" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 15, + "outputs": [], + "source": [ + "unit_list = list()\n", + "for file in old_units:\n", + " data = pd.read_excel(f'./new_data/{file}', header=[3,4])\n", + " data.columns = unit_cols\n", + " unit_list.append(data)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 16, + "outputs": [], + "source": [ + "# unit_list = list()\n", + "for file in new_units:\n", + " data = pd.read_excel(f'./火电20230313/火电20230313/{file}', header=[3,4])\n", + " data.columns = unit_cols\n", + " unit_list.append(data)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 17, + "outputs": [], + "source": [ + "units_df = pd.concat(unit_list).drop(columns=['序号', '所属电网']).drop_duplicates().reset_index(drop=True)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 18, + "outputs": [ + { + "data": { + "text/plain": " 企业编码 电厂名称 简称 机组编号 铭牌容量 (MW) 投产时间 机组类型 参数分类 \\\n1388 33008 国能浙江北仑第一发电有限公司 国家能源浙江北仑 2 630.0 1994-11-18 纯凝式 亚临界 \n1389 33008 国能浙江北仑第一发电有限公司 国家能源浙江北仑 1 630.0 1991-10-30 纯凝式 亚临界 \n1390 33008 国能浙江北仑第一发电有限公司 国家能源浙江北仑 3 660.0 2000-9-28 纯凝式 亚临界 \n1391 33008 国能浙江北仑第一发电有限公司 国家能源浙江北仑 7 1000.0 2009-6-2 纯凝式 超超临界 \n1392 33008 国能浙江北仑第一发电有限公司 国家能源浙江北仑 6 1050.0 2008-12-20 纯凝式 超超临界 \n1393 33008 国能浙江北仑第一发电有限公司 国家能源浙江北仑 5 660.0 2000-7-28 纯凝式 亚临界 \n1394 33008 国能浙江北仑第一发电有限公司 国家能源浙江北仑 4 660.0 2000-7-8 纯凝式 亚临界 \n1891 33008 国能浙江北仑第一发电有限公司 国家能源浙江北仑 7 1050.0 2009-6-2 纯凝式 超超临界 \n\n 所处地区 机组产地 锅炉制造厂家 汽轮机制造厂家 发电机制造厂家 主变压器制造厂家 \\\n1388 浙江省 进口 加拿大巴布科克威尔科克斯公司 法国阿尔斯通 法国阿尔斯通 法国阿尔斯通公司 \n1389 浙江省 进口 美国燃烧工程公司 日本东芝公司 日本东芝公司 日本东芝公司 \n1390 浙江省 进口 日本石川岛播磨株式会社 日本东芝公司 日本东芝公司 日本东芝公司 \n1391 浙江省 国产 东方锅炉厂(东锅) 上海汽轮机厂(上汽) 上海电机厂(上电) 保定天威保变电气股份有限公司 \n1392 浙江省 国产 东方锅炉厂(东锅) 上海汽轮机厂(上汽) 上海电机厂(上电) 保定天威保变电气股份有限公司 \n1393 浙江省 进口 日本石川岛播磨株式会社 日本东芝公司 日本东芝公司 日本东芝公司 \n1394 浙江省 进口 日本石川岛播磨株式会社 日本东芝公司 日本东芝公司 日本东芝公司 \n1891 浙江省 国产 东方锅炉厂(东锅) 上海汽轮机厂(上汽) 上海电机厂(上电) 保定天威保变电气股份有限公司 \n\n 二级公司 所属集团 \n1388 北京国华电力有限责任公司 国家能源投资集团有限责任公司 \n1389 北京国华电力有限责任公司 国家能源投资集团有限责任公司 \n1390 北京国华电力有限责任公司 国家能源投资集团有限责任公司 \n1391 北京国华电力有限责任公司 国家能源投资集团有限责任公司 \n1392 北京国华电力有限责任公司 国家能源投资集团有限责任公司 \n1393 北京国华电力有限责任公司 国家能源投资集团有限责任公司 \n1394 北京国华电力有限责任公司 国家能源投资集团有限责任公司 \n1891 北京国华电力有限责任公司 国家能源投资集团有限责任公司 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
企业编码电厂名称简称机组编号铭牌容量 (MW)投产时间机组类型参数分类所处地区机组产地锅炉制造厂家汽轮机制造厂家发电机制造厂家主变压器制造厂家二级公司所属集团
138833008国能浙江北仑第一发电有限公司国家能源浙江北仑2630.01994-11-18纯凝式亚临界浙江省进口加拿大巴布科克威尔科克斯公司法国阿尔斯通法国阿尔斯通法国阿尔斯通公司北京国华电力有限责任公司国家能源投资集团有限责任公司
138933008国能浙江北仑第一发电有限公司国家能源浙江北仑1630.01991-10-30纯凝式亚临界浙江省进口美国燃烧工程公司日本东芝公司日本东芝公司日本东芝公司北京国华电力有限责任公司国家能源投资集团有限责任公司
139033008国能浙江北仑第一发电有限公司国家能源浙江北仑3660.02000-9-28纯凝式亚临界浙江省进口日本石川岛播磨株式会社日本东芝公司日本东芝公司日本东芝公司北京国华电力有限责任公司国家能源投资集团有限责任公司
139133008国能浙江北仑第一发电有限公司国家能源浙江北仑71000.02009-6-2纯凝式超超临界浙江省国产东方锅炉厂(东锅)上海汽轮机厂(上汽)上海电机厂(上电)保定天威保变电气股份有限公司北京国华电力有限责任公司国家能源投资集团有限责任公司
139233008国能浙江北仑第一发电有限公司国家能源浙江北仑61050.02008-12-20纯凝式超超临界浙江省国产东方锅炉厂(东锅)上海汽轮机厂(上汽)上海电机厂(上电)保定天威保变电气股份有限公司北京国华电力有限责任公司国家能源投资集团有限责任公司
139333008国能浙江北仑第一发电有限公司国家能源浙江北仑5660.02000-7-28纯凝式亚临界浙江省进口日本石川岛播磨株式会社日本东芝公司日本东芝公司日本东芝公司北京国华电力有限责任公司国家能源投资集团有限责任公司
139433008国能浙江北仑第一发电有限公司国家能源浙江北仑4660.02000-7-8纯凝式亚临界浙江省进口日本石川岛播磨株式会社日本东芝公司日本东芝公司日本东芝公司北京国华电力有限责任公司国家能源投资集团有限责任公司
189133008国能浙江北仑第一发电有限公司国家能源浙江北仑71050.02009-6-2纯凝式超超临界浙江省国产东方锅炉厂(东锅)上海汽轮机厂(上汽)上海电机厂(上电)保定天威保变电气股份有限公司北京国华电力有限责任公司国家能源投资集团有限责任公司
\n
" + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "units_df[units_df['电厂名称']=='国能浙江北仑第一发电有限公司']" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 19, + "outputs": [ + { + "data": { + "text/plain": "(1934, 16)" + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "units_df.shape" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 20, + "outputs": [ + { + "data": { + "text/plain": " 企业编码 电厂名称 简称 机组编号 铭牌容量 (MW) 投产时间 机组类型 参数分类 所处地区 \\\n1812 61022 陕西渭河发电有限公司 陕西省投咸阳渭河 3 320.0 1992-7-1 纯凝式 亚临界 陕西省 \n1928 61022 陕西渭河发电有限公司 陕西省投咸阳渭河 3 320.0 1992-7-1 纯凝式 亚临界 陕西省 \n\n 机组产地 锅炉制造厂家 汽轮机制造厂家 发电机制造厂家 主变压器制造厂家 二级公司 所属集团 \n1812 国产 上海锅炉厂(上锅) 哈尔滨汽轮机厂(哈汽) 哈尔滨电机厂(哈电) 沈变 NaN 陕西投资集团有限公司 \n1928 国产 上海锅炉厂(上锅) 上海汽轮机厂(上汽) 上海电机厂(上电) 西安西电变压器厂 NaN 陕西投资集团有限公司 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
企业编码电厂名称简称机组编号铭牌容量 (MW)投产时间机组类型参数分类所处地区机组产地锅炉制造厂家汽轮机制造厂家发电机制造厂家主变压器制造厂家二级公司所属集团
181261022陕西渭河发电有限公司陕西省投咸阳渭河3320.01992-7-1纯凝式亚临界陕西省国产上海锅炉厂(上锅)哈尔滨汽轮机厂(哈汽)哈尔滨电机厂(哈电)沈变NaN陕西投资集团有限公司
192861022陕西渭河发电有限公司陕西省投咸阳渭河3320.01992-7-1纯凝式亚临界陕西省国产上海锅炉厂(上锅)上海汽轮机厂(上汽)上海电机厂(上电)西安西电变压器厂NaN陕西投资集团有限公司
\n
" + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "units_df[(units_df['电厂名称']=='陕西渭河发电有限公司')&(units_df['机组编号']==3)]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 21, + "outputs": [ + { + "data": { + "text/plain": " 企业编码 电厂名称 简称 机组编号 铭牌容量 (MW) 投产时间 机组类型 参数分类 \\\n1391 33008 国能浙江北仑第一发电有限公司 国家能源浙江北仑 7 1000.0 2009-6-2 纯凝式 超超临界 \n\n 所处地区 机组产地 锅炉制造厂家 汽轮机制造厂家 发电机制造厂家 主变压器制造厂家 \\\n1391 浙江省 国产 东方锅炉厂(东锅) 上海汽轮机厂(上汽) 上海电机厂(上电) 保定天威保变电气股份有限公司 \n\n 二级公司 所属集团 \n1391 北京国华电力有限责任公司 国家能源投资集团有限责任公司 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
企业编码电厂名称简称机组编号铭牌容量 (MW)投产时间机组类型参数分类所处地区机组产地锅炉制造厂家汽轮机制造厂家发电机制造厂家主变压器制造厂家二级公司所属集团
139133008国能浙江北仑第一发电有限公司国家能源浙江北仑71000.02009-6-2纯凝式超超临界浙江省国产东方锅炉厂(东锅)上海汽轮机厂(上汽)上海电机厂(上电)保定天威保变电气股份有限公司北京国华电力有限责任公司国家能源投资集团有限责任公司
\n
" + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "units_df[(units_df['电厂名称']=='国能浙江北仑第一发电有限公司')&(units_df['机组编号']==7)&(units_df['铭牌容量 (MW)']==1000.0)]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 22, + "outputs": [], + "source": [ + "drop_units = units_df[(units_df['电厂名称']=='国能浙江北仑第一发电有限公司')&(units_df['机组编号']==7)&(units_df['铭牌容量 (MW)']==1000.0)].index.values\n", + "units_df.drop(index=drop_units, inplace=True)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 23, + "outputs": [ + { + "data": { + "text/plain": " 企业编码 电厂名称 简称 机组编号 铭牌容量 (MW) 投产时间 机组类型 参数分类 \\\n1891 33008 国能浙江北仑第一发电有限公司 国家能源浙江北仑 7 1050.0 2009-6-2 纯凝式 超超临界 \n\n 所处地区 机组产地 锅炉制造厂家 汽轮机制造厂家 发电机制造厂家 主变压器制造厂家 \\\n1891 浙江省 国产 东方锅炉厂(东锅) 上海汽轮机厂(上汽) 上海电机厂(上电) 保定天威保变电气股份有限公司 \n\n 二级公司 所属集团 \n1891 北京国华电力有限责任公司 国家能源投资集团有限责任公司 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
企业编码电厂名称简称机组编号铭牌容量 (MW)投产时间机组类型参数分类所处地区机组产地锅炉制造厂家汽轮机制造厂家发电机制造厂家主变压器制造厂家二级公司所属集团
189133008国能浙江北仑第一发电有限公司国家能源浙江北仑71050.02009-6-2纯凝式超超临界浙江省国产东方锅炉厂(东锅)上海汽轮机厂(上汽)上海电机厂(上电)保定天威保变电气股份有限公司北京国华电力有限责任公司国家能源投资集团有限责任公司
\n
" + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "units_df[(units_df['电厂名称']=='国能浙江北仑第一发电有限公司')&(units_df['机组编号']==7)]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 24, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Dropping invalid columns in DataFrameGroupBy.max is deprecated. In a future version, a TypeError will be raised. Before calling .max, select only columns which should be valid for the function.\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + } + ], + "source": [ + "units_max = units_df.groupby(['电厂名称', '机组编号', '投产时间']).max().reset_index()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 25, + "outputs": [ + { + "data": { + "text/plain": "(1926, 13)" + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "units_max.shape" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 26, + "outputs": [ + { + "data": { + "text/plain": " 电厂名称 机组编号 投产时间 企业编码 简称 铭牌容量 (MW) 机组类型 \\\n0 万方发电厂(焦作爱依斯万方电力有限公司) 1 1997-8-17 41020 河南焦作万方 125.0 纯凝式 \n1 万方发电厂(焦作爱依斯万方电力有限公司) 2 1998-7-8 41020 河南焦作万方 125.0 纯凝式 \n2 三河发电有限责任公司 1 1999-12-17 13032 国家能源河北三河 350.0 供热式 \n3 三河发电有限责任公司 2 2000-4-2 13032 国家能源河北三河 350.0 供热式 \n4 三河发电有限责任公司 3 2007-8-31 13032 国家能源河北三河 315.0 供热式 \n... ... ... ... ... ... ... ... \n1921 黔桂发电有限责任公司 1 1994-1-16 74009 金元贵州盘县 200.0 纯凝式 \n1922 黔桂发电有限责任公司 2 1994-10-26 74009 金元贵州盘县 200.0 纯凝式 \n1923 黔桂发电有限责任公司 3 1996-10-14 74009 金元贵州盘县 200.0 纯凝式 \n1924 黔桂发电有限责任公司 4 2001-4-9 74009 金元贵州盘县 200.0 纯凝式 \n1925 黔桂发电有限责任公司 5 2001-8-25 74009 金元贵州盘县 200.0 纯凝式 \n\n 参数分类 所处地区 机组产地 锅炉制造厂家 二级公司 所属集团 \n0 超高压 河南省 国产 上海锅炉厂(上锅) NaN 其他 \n1 超高压 河南省 国产 上海锅炉厂(上锅) NaN 其他 \n2 亚临界 河北省 进口 日本三菱公司 北京国华电力有限责任公司 国家能源投资集团有限责任公司 \n3 亚临界 河北省 进口 日本三菱公司 北京国华电力有限责任公司 国家能源投资集团有限责任公司 \n4 亚临界 河北省 国产 东方锅炉厂(东锅) 北京国华电力有限责任公司 国家能源投资集团有限责任公司 \n... ... ... ... ... ... ... \n1921 超高压 贵州省 国产 东方锅炉厂(东锅) 贵州金元电力投资股份有限公司 国家电力投资集团有限公司 \n1922 超高压 贵州省 国产 东方锅炉厂(东锅) 贵州金元电力投资股份有限公司 国家电力投资集团有限公司 \n1923 超高压 贵州省 国产 东方锅炉厂(东锅) 贵州金元电力投资股份有限公司 国家电力投资集团有限公司 \n1924 超高压 贵州省 国产 东方锅炉厂(东锅) 贵州金元电力投资股份有限公司 国家电力投资集团有限公司 \n1925 超高压 贵州省 国产 东方锅炉厂(东锅) 贵州金元电力投资股份有限公司 国家电力投资集团有限公司 \n\n[1926 rows x 13 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
电厂名称机组编号投产时间企业编码简称铭牌容量 (MW)机组类型参数分类所处地区机组产地锅炉制造厂家二级公司所属集团
0万方发电厂(焦作爱依斯万方电力有限公司)11997-8-1741020河南焦作万方125.0纯凝式超高压河南省国产上海锅炉厂(上锅)NaN其他
1万方发电厂(焦作爱依斯万方电力有限公司)21998-7-841020河南焦作万方125.0纯凝式超高压河南省国产上海锅炉厂(上锅)NaN其他
2三河发电有限责任公司11999-12-1713032国家能源河北三河350.0供热式亚临界河北省进口日本三菱公司北京国华电力有限责任公司国家能源投资集团有限责任公司
3三河发电有限责任公司22000-4-213032国家能源河北三河350.0供热式亚临界河北省进口日本三菱公司北京国华电力有限责任公司国家能源投资集团有限责任公司
4三河发电有限责任公司32007-8-3113032国家能源河北三河315.0供热式亚临界河北省国产东方锅炉厂(东锅)北京国华电力有限责任公司国家能源投资集团有限责任公司
..........................................
1921黔桂发电有限责任公司11994-1-1674009金元贵州盘县200.0纯凝式超高压贵州省国产东方锅炉厂(东锅)贵州金元电力投资股份有限公司国家电力投资集团有限公司
1922黔桂发电有限责任公司21994-10-2674009金元贵州盘县200.0纯凝式超高压贵州省国产东方锅炉厂(东锅)贵州金元电力投资股份有限公司国家电力投资集团有限公司
1923黔桂发电有限责任公司31996-10-1474009金元贵州盘县200.0纯凝式超高压贵州省国产东方锅炉厂(东锅)贵州金元电力投资股份有限公司国家电力投资集团有限公司
1924黔桂发电有限责任公司42001-4-974009金元贵州盘县200.0纯凝式超高压贵州省国产东方锅炉厂(东锅)贵州金元电力投资股份有限公司国家电力投资集团有限公司
1925黔桂发电有限责任公司52001-8-2574009金元贵州盘县200.0纯凝式超高压贵州省国产东方锅炉厂(东锅)贵州金元电力投资股份有限公司国家电力投资集团有限公司
\n

1926 rows × 13 columns

\n
" + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "units_max" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### 汽轮机数据" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 27, + "outputs": [ + { + "data": { + "text/plain": "['中国中信集团公司-32020-汽轮机数据查询-20230313.xls',\n '中国中煤能源集团有限公司-34027-汽轮机数据查询-20230313.xls',\n '中国华电集团有限公司-21060-汽轮机数据查询-20230313.xls',\n '中国华能集团有限公司-17021-汽轮机数据查询-20230313.xls',\n '中国大唐集团有限公司-61005-汽轮机数据查询-20230313.xls',\n '中国铝业股份有限公司-64011-汽轮机数据查询-20230313.xls',\n '中国长江三峡集团公司-42011-汽轮机数据查询-20230313.xls',\n '其他-47001-汽轮机数据查询-20230313.xls',\n '内蒙古源源能源集团有限责任公司-15050-汽轮机数据查询-20230313.xls',\n '内蒙古能源发电投资集团有限公司-15047-汽轮机数据查询-20230313.xls',\n '北京能源集团有限责任公司-64009-汽轮机数据查询-20230313.xls',\n '华润(集团)有限公司-13008-汽轮机数据查询-20230313.xls',\n '协鑫集团有限公司-32036-汽轮机数据查询-20230313.xls',\n '国家开发投资公司-37009-汽轮机数据查询-20230313.xls',\n '国家开发投资集团有限公司-16014-汽轮机数据查询-20230313.xls',\n '国家电力投资集团有限公司-21018-汽轮机数据查询-20230313.xls',\n '国家能源投资集团有限责任公司-23012-汽轮机数据查询-20230313.xls',\n '安徽省能源集团有限公司-34003-汽轮机数据查询-20230313.xls',\n '山西国际能源集团有限公司-14088-汽轮机数据查询-20230313.xls',\n '山西焦煤集团有限公司-14099-汽轮机数据查询-20230313.xls',\n '广东省能源集团有限公司-71003-汽轮机数据查询-20230313.xls',\n '广州发展集团股份有限公司-71021-汽轮机数据查询-20230313.xls',\n '新疆天山电力股份有限公司-65004-汽轮机数据查询-20230313.xls',\n '晋能控股电力集团-14022-汽轮机数据查询-20230313.xls',\n '江苏省国信集团有限公司-32015-汽轮机数据查询-20230313.xls',\n '江西省投资集团有限公司-44020-汽轮机数据查询-20230313.xls',\n '河北建设投资集团有限责任公司-13056-汽轮机数据查询-20230313.xls',\n '河南投资集团有限公司-41014-汽轮机数据查询-20230313.xls',\n '浙江省能源集团有限公司-33001-汽轮机数据查询-20230313.xls',\n '深圳能源集团股份有限公司-71022-汽轮机数据查询-20230313.xls',\n '甘肃省电力投资集团有限责任公司-62006-汽轮机数据查询-20230313.xls',\n '申能股份有限公司-35016-汽轮机数据查询-20230313.xls',\n '陕西投资集团有限公司-61040-汽轮机数据查询-20230313.xls',\n '陕西榆林能源集团有限公司-61041-汽轮机数据查询-20230313.xls',\n '陕西煤业化工集团有限责任公司-43014-汽轮机数据查询-20230313.xls']" + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "old_steam = [x for x in os.listdir('./new_data/') if '汽轮机' in x]\n", + "steam_data = [x for x in os.listdir('./火电20230313/火电20230313/') if '汽轮机' in x]\n", + "steam_data" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 28, + "outputs": [ + { + "data": { + "text/plain": "['序号',\n '企业编码',\n '电厂名称',\n '简称',\n '机组编号',\n '制造厂家',\n '型号',\n '出厂编号',\n '出厂编号',\n '汽轮机缸效率高压缸(%)',\n '汽轮机缸效率中压缸(%)',\n '汽轮机缸效率低压缸(%)',\n '最大连续出力T-MCR(MW)',\n '额定主蒸汽压力(MPa)',\n '额定主蒸汽温度(℃)',\n '额定再热汽温度(℃)',\n '设计给水温度(℃)',\n '凝汽式机组保证热耗率(kJ/kWh)',\n '凝汽式汽机设计背压(kPa)',\n '供热式机组保证热耗率额定供热工况(kJ/kWh)',\n '供热式机组保证热耗率纯凝工况(kJ/kWh)',\n '供热式汽机设计背压供热工况(kPa)',\n '供热式汽机设计背压纯凝工况(kPa)',\n '设计供热抽汽压力工业(MPa)',\n '设计供热抽汽压力采暖(MPa)',\n '设计供热抽汽温度工业(℃)',\n '设计供热抽汽温度采暖(℃)',\n '设计最大供热抽汽流量工业(t/h)',\n '设计最大供热抽汽流量采暖(t/h)',\n '凝结水泵制造厂家',\n '凝结水泵型号',\n '凝结水泵电机电压(V)',\n '凝结水泵电机功率(kW)',\n '凝结水泵凝结水泵流量(t/h)',\n '循环水泵制造厂家',\n '循环水泵型号',\n '循环水泵电机电压(V)',\n '循环水泵电机功率(kW)',\n '循环水泵循环水泵流量(t/h)',\n '给水泵汽轮机制造厂',\n '给水泵汽轮机型号',\n '给水泵汽轮机出力比(%)',\n '给水泵汽轮机台数(台)',\n '给水泵汽轮机额定功率(KW)',\n '给水泵汽轮机内效率(%)',\n '给水泵汽轮机额定进汽压力(MPa)',\n '给水泵汽轮机额定排汽压力(kPa)',\n '给水泵汽轮机额定进汽温度(℃)',\n '给水泵汽轮机额定排汽温度(℃)',\n '给水泵汽轮机额定蒸汽流量(t/h)',\n '电动给水泵制造厂',\n '电动给水泵型号',\n '电动给水泵出力比(%)',\n '电动给水泵台数(台)',\n '电动给水泵给水泵流量(t/h)',\n '电动给水泵给水泵压力(MPa)',\n '旁路系统型式',\n '旁路系统旁路蒸汽量比率(%)',\n '冷凝器制造厂家',\n '冷凝器型号',\n '冷凝器型式',\n '循环水介质及比例地表水(%)',\n '循环水介质及比例地下水(%)',\n '循环水介质及比例中水(%)',\n '循环水介质及比例海水(%)',\n '循环水循环方式',\n '循环冷却倍率(%)',\n '开式循环水提升高度(m)',\n '空冷机组出力受阻背压(kPa)',\n '空冷机组出力受阻背压(kPa).1']" + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "steam_samples = pd.read_excel(f'./new_data/{old_steam[0]}', header=[3,4,5])\n", + "steam_cols = [''.join([x for x in y if 'Unnamed' not in x]) for y in steam_samples.columns]\n", + "steam_cols" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 29, + "outputs": [], + "source": [ + "steam_list = list()\n", + "for file in old_steam:\n", + " data = pd.read_excel(f'./new_data/{file}', header=[3, 4, 5])\n", + " data.columns = steam_cols\n", + " steam_list.append(data)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 30, + "outputs": [], + "source": [ + "# unit_list = list()\n", + "for file in steam_data:\n", + " data = pd.read_excel(f'./火电20230313/火电20230313/{file}', header=[3,4,5])\n", + " data.columns = steam_cols\n", + " steam_list.append(data)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 31, + "outputs": [], + "source": [ + "steam_df = pd.concat(steam_list).drop(columns=['序号', '空冷机组出力受阻背压(kPa).1']).drop_duplicates().reset_index(drop=True)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 32, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Dropping invalid columns in DataFrameGroupBy.max is deprecated. In a future version, a TypeError will be raised. Before calling .max, select only columns which should be valid for the function.\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + } + ], + "source": [ + "steam_max = steam_df.groupby(['电厂名称', '机组编号']).max().reset_index()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "### 锅炉数据" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 33, + "outputs": [ + { + "data": { + "text/plain": "['中国中信集团公司-32020-锅炉数据查询-20230313.xls',\n '中国中煤能源集团有限公司-34027-锅炉数据查询-20230313.xls',\n '中国华电集团有限公司-21060-锅炉数据查询-20230313.xls',\n '中国华能集团有限公司-17021-锅炉数据查询-20230313.xls',\n '中国大唐集团有限公司-61005-锅炉数据查询-20230313.xls',\n '中国铝业股份有限公司-64011-锅炉数据查询-20230313.xls',\n '中国长江三峡集团公司-42011-锅炉数据查询-20230313.xls',\n '其他-47001-锅炉数据查询-20230313.xls',\n '内蒙古源源能源集团有限责任公司-15050-锅炉数据查询-20230313.xls',\n '内蒙古能源发电投资集团有限公司-15047-锅炉数据查询-20230313.xls',\n '北京能源集团有限责任公司-64009-锅炉数据查询-20230313.xls',\n '华润(集团)有限公司-13008-锅炉数据查询-20230313.xls',\n '协鑫集团有限公司-32036-锅炉数据查询-20230313.xls',\n '国家开发投资公司-37009-锅炉数据查询-20230313.xls',\n '国家开发投资集团有限公司-16014-锅炉数据查询-20230313.xls',\n '国家电力投资集团有限公司-21018-锅炉数据查询-20230313.xls',\n '国家能源投资集团有限责任公司-23012-锅炉数据查询-20230313.xls',\n '安徽省能源集团有限公司-34003-锅炉数据查询-20230313.xls',\n '山西国际能源集团有限公司-14088-锅炉数据查询-20230313.xls',\n '山西焦煤集团有限公司-14099-锅炉数据查询-20230313.xls',\n '广东省能源集团有限公司-71003-锅炉数据查询-20230313.xls',\n '广州发展集团股份有限公司-71021-锅炉数据查询-20230313.xls',\n '新疆天山电力股份有限公司-65004-锅炉数据查询-20230313.xls',\n '晋能控股电力集团-14022-锅炉数据查询-20230313.xls',\n '江苏省国信集团有限公司-32015-锅炉数据查询-20230313.xls',\n '江西省投资集团有限公司-44020-锅炉数据查询-20230313.xls',\n '河北建设投资集团有限责任公司-13056-锅炉数据查询-20230313.xls',\n '河南投资集团有限公司-41014-锅炉数据查询-20230313.xls',\n '浙江省能源集团有限公司-33001-锅炉数据查询-20230313.xls',\n '深圳能源集团股份有限公司-71022-锅炉数据查询-20230313.xls',\n '甘肃省电力投资集团有限责任公司-62006-锅炉数据查询-20230313.xls',\n '申能股份有限公司-35016-锅炉数据查询-20230313.xls',\n '陕西投资集团有限公司-61040-锅炉数据查询-20230313.xls',\n '陕西榆林能源集团有限公司-61041-锅炉数据查询-20230313.xls',\n '陕西煤业化工集团有限责任公司-43014-锅炉数据查询-20230313.xls']" + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "old_boiler = [x for x in os.listdir('./new_data/') if '锅炉' in x]\n", + "new_boiler = [x for x in os.listdir('./火电20230313/火电20230313/') if '锅炉' in x]\n", + "new_boiler" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 34, + "outputs": [ + { + "data": { + "text/plain": "['序号',\n '企业编码',\n '电厂名称',\n '简称',\n '机组编号',\n '制造厂家',\n '型号',\n '型式',\n '最大连续出力B-MCR(t/h)',\n '设计效率(%)',\n '工质流动方式',\n ' 额定主蒸汽压力 (MPa)',\n '额再热蒸汽温度 (℃)',\n '额定再热蒸汽压力 (MPa)',\n ' 额再热蒸汽温度 (℃)',\n '额再热蒸汽温度 (℃).1',\n '点火方式',\n '燃烧方式',\n '设计燃煤种类',\n '设计燃煤灰份(收到基)(%)',\n '设计燃煤挥发份)(收到基)(%)',\n '设计燃煤低位热值(收到基)(kJ/kg)',\n '排渣方式',\n '除灰方式',\n '空预器制造厂家',\n '空预器型号',\n '空预器型式',\n '空预器设计漏风率(%)',\n '磨煤机制造厂家',\n '磨煤机型号',\n '磨煤机型式',\n '磨煤机出力比',\n '磨煤机总台数',\n '磨煤机制粉方式',\n '除尘器制造厂家',\n '除尘器型号',\n '除尘器型式',\n '除尘器除尘器设计效率(%)',\n '一次风机制造厂家',\n '一次风机型号',\n '一次风机型式',\n '一次风机最大风压(Pa)',\n '一次风机最大流量 (m3/S)',\n '一次风机电机电压(V)',\n '一次风机电机功率(KW)',\n '送风机制造厂家',\n '送风机型号',\n '送风机型式',\n '送风机最大风压(Pa)',\n '送风机 最大流量(m3/S)',\n '送风机电机电压(V)',\n '送风机电机功率(KW)',\n '引风机制造厂家',\n '引风机型号',\n '引风机型式',\n '引风机最大风压(Pa)',\n '引风机最大流量(m3/S)',\n '引风机电机电压(V)',\n '引风机电机功率(KW)',\n '引风机电机功率(KW).1']" + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "boiler_samples = pd.read_excel(f'./new_data/{old_boiler[0]}', header=[3,4])\n", + "boiler_cols = [''.join([x for x in y if 'Unnamed' not in x]) for y in boiler_samples.columns]\n", + "boiler_cols" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 35, + "outputs": [], + "source": [ + "boiler_list = list()\n", + "for file in old_boiler:\n", + " data = pd.read_excel(f'./new_data/{file}', header=[3, 4])\n", + " data.columns = boiler_cols\n", + " boiler_list.append(data)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 36, + "outputs": [], + "source": [ + "for file in new_boiler:\n", + " data = pd.read_excel(f'./火电20230313/火电20230313/{file}', header=[3,4])\n", + " data.columns = boiler_cols\n", + " boiler_list.append(data)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 37, + "outputs": [], + "source": [ + "boiler_df = pd.concat(boiler_list).drop(columns=['序号', '引风机电机功率(KW).1']).drop_duplicates().reset_index(drop=True)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 38, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Dropping invalid columns in DataFrameGroupBy.max is deprecated. In a future version, a TypeError will be raised. Before calling .max, select only columns which should be valid for the function.\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + } + ], + "source": [ + "boiler_max = boiler_df.groupby(['电厂名称', '机组编号']).max().reset_index()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 39, + "outputs": [], + "source": [ + "use_unit = units_df[units_df.columns[[1,3,4,5,6,7,8]]].drop_duplicates().set_index(['电厂名称', '机组编号'])\n", + "use_run_data = run_data_max.set_index(['电厂名称', '机组编号'])\n", + "use_steam = steam_df[steam_df.columns[[1,3,-9]]].drop_duplicates().set_index(['电厂名称', '机组编号'])\n", + "use_boiler = boiler_df.drop_duplicates().set_index(['电厂名称', '机组编号'])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 40, + "outputs": [], + "source": [ + "total_data = use_unit.merge(use_steam, how='left', on=['电厂名称', '机组编号'])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 41, + "outputs": [], + "source": [ + "total_data = total_data.merge(use_run_data, how='left', on=['电厂名称', '机组编号'])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 42, + "outputs": [], + "source": [ + "total_data = total_data.merge(use_boiler, how='left', on=['电厂名称', '机组编号'])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 43, + "outputs": [], + "source": [ + "total_data.reset_index().to_excel('./total_data.xlsx', index=False)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/evaluation.ipynb b/evaluation.ipynb new file mode 100644 index 0000000..a8a877d --- /dev/null +++ b/evaluation.ipynb @@ -0,0 +1,144 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "outputs": [], + "source": [ + "import pandas as pd" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "power_eva = pd.read_csv('./发电测试结果.csv')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [], + "source": [ + "power_eva.columns = ['real', 'pred']\n", + "power_eva['error'] = (power_eva.pred - power_eva.real).apply(abs) / power_eva.real" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 15, + "outputs": [ + { + "data": { + "text/plain": " real pred error\n222 0.517443 0.518051 0.001175\n54 0.701795 0.671254 0.043519\n201 0.539900 0.541033 0.002099\n30 0.532658 0.530621 0.003823\n124 0.410033 0.420981 0.026701\n37 0.390315 0.391309 0.002548\n7 0.571029 0.579793 0.015347\n232 0.580826 0.579876 0.001635\n165 0.352021 0.374194 0.062987\n139 0.584566 0.567410 0.029348", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
realprederror
2220.5174430.5180510.001175
540.7017950.6712540.043519
2010.5399000.5410330.002099
300.5326580.5306210.003823
1240.4100330.4209810.026701
370.3903150.3913090.002548
70.5710290.5797930.015347
2320.5808260.5798760.001635
1650.3520210.3741940.062987
1390.5845660.5674100.029348
\n
" + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "power_eva.sample(10)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [], + "source": [ + "heat_eva = pd.read_csv('./供热测试结果.csv')\n", + "heat_eva.columns = ['real', 'pred']\n", + "heat_eva['error'] = (heat_eva.pred - heat_eva.real).apply(abs) / heat_eva.real" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [ + { + "data": { + "text/plain": " real pred error\n131 0.071626 0.071494 0.001839\n256 0.076446 0.069821 0.086672\n141 0.067995 0.068865 0.012802\n71 0.071438 0.071276 0.002270\n284 0.072052 0.071835 0.003018\n294 0.075010 0.074507 0.006716\n77 0.052603 0.055783 0.060461\n96 0.062181 0.063483 0.020932\n176 0.077847 0.077317 0.006807\n164 0.082962 0.082844 0.001420", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
realprederror
1310.0716260.0714940.001839
2560.0764460.0698210.086672
1410.0679950.0688650.012802
710.0714380.0712760.002270
2840.0720520.0718350.003018
2940.0750100.0745070.006716
770.0526030.0557830.060461
960.0621810.0634830.020932
1760.0778470.0773170.006807
1640.0829620.0828440.001420
\n
" + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "heat_eva.sample(10)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/extract_plant.ipynb b/extract_plant.ipynb new file mode 100644 index 0000000..08501e1 --- /dev/null +++ b/extract_plant.ipynb @@ -0,0 +1,149 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "outputs": [], + "source": [ + "import pandas as pd" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "import os" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "files = [x for x in os.listdir('./火电20230313/火电20230313F/') if '经济性' in x]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [], + "source": [ + "plants = [x.split('-')[0].strip() for x in files]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [ + { + "data": { + "text/plain": " plant 经度 纬度\n0 万方发电厂(焦作爱依斯万方电力有限公司) 113.381649 35.255622\n1 三河发电有限责任公司 116.860260 39.953617\n2 上海上电漕泾发电有限公司 121.407593 30.765242\n3 上海吴泾发电有限责任公司 121.471140 31.065113\n4 上海吴泾第二发电有限责任公司 121.471340 31.062532", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
plant经度纬度
0万方发电厂(焦作爱依斯万方电力有限公司)113.38164935.255622
1三河发电有限责任公司116.86026039.953617
2上海上电漕泾发电有限公司121.40759330.765242
3上海吴泾发电有限责任公司121.47114031.065113
4上海吴泾第二发电有限责任公司121.47134031.062532
\n
" + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "old_plants = pd.read_excel('./lat_lon.xlsx')\n", + "old_plants.head()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [], + "source": [ + "new_plants = [x for x in plants if x not in old_plants.plant.values]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [], + "source": [ + "with open('./plant.txt', 'w', encoding='utf-8') as fw:\n", + " for plant in new_plants:\n", + " fw.write(plant)\n", + " fw.write('\\n')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/get_altitude.ipynb b/get_altitude.ipynb new file mode 100644 index 0000000..b88edb5 --- /dev/null +++ b/get_altitude.ipynb @@ -0,0 +1,291 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "888d089c-a9c8-4d2d-af74-dff1a8ccfefd", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import json\n", + "import time\n", + "from typing import List\n", + "import requests\n", + "import pandas as pd\n", + "\n", + "\n", + "class GetElevation:\n", + "\n", + " @classmethod\n", + " def __SendQuery(cls, latLngString: str) -> json:\n", + " query = ('https://api.opentopodata.org/v1/mapzen?locations={}&interpolation=bilinear'.format(latLngString))\n", + " res = requests.get(query).json()\n", + " if res[\"status\"] != \"OK\":\n", + " raise Exception(res[\"error\"])\n", + " return res\n", + "\n", + " def GetSingleElevation(self, latitude: float, longitude: float) -> float:\n", + " \"\"\"\n", + " 获取单个高程,输入经纬度格式为数值类型,返回值为高程float类型\n", + " :param latitude: 纬度\n", + " :param longitude: 经度\n", + " :return: 高程\n", + " \"\"\"\n", + " if latitude < -90 or latitude > 90:\n", + " raise Exception(\"纬度的范围应在-90-90之间!请检查数据源!\")\n", + " latLngString = str(latitude) + \",\" + str(longitude)\n", + " res = self.__SendQuery(latLngString)\n", + " elevation = res[\"results\"][0][\"elevation\"]\n", + " return elevation\n", + "\n", + " def GetMultiElevation(self, latitude: List[float], longitude: List[float]) -> List[float]:\n", + " \"\"\"\n", + " 获取数组类型的高程,输入经纬度格式为经度数组和纬度数组,返回值为高程数组\n", + " :param latitude:纬度数组\n", + " :param longitude:经度数组\n", + " :return:高程数组\n", + " \"\"\"\n", + " if len(latitude) != len(longitude):\n", + " raise Exception(\"纬度数组和经度数组长度不一致!请检查数据源!\")\n", + " for lat in latitude:\n", + " if lat < -90 or lat > 90:\n", + " raise Exception(\"纬度的范围应在-90-90之间!请检查数据源!\")\n", + " elevationList = []\n", + " hundredNums = len(latitude) // 100\n", + " # 查询整百的高程\n", + " for i in range(hundredNums):\n", + " latLngString = \"\"\n", + " for idx in range(100 * i, 100 * (i + 1)):\n", + " latLngString += (str(latitude[idx]) + \",\" + str(longitude[idx]) + \"|\")\n", + " res = self.__SendQuery(latLngString)\n", + " for idx in range(100):\n", + " elevationList.append(res[\"results\"][idx][\"elevation\"])\n", + " time.sleep(1)\n", + " # 查询剩余的不到100的高程\n", + " latLngString = \"\"\n", + " for i in range(hundredNums * 100, len(latitude)):\n", + " latLngString += (str(latitude[i]) + \",\" + str(longitude[i]) + \"|\")\n", + " res = self.__SendQuery(latLngString)\n", + " for i in range(len(latitude) - hundredNums * 100):\n", + " elevationList.append(res[\"results\"][i][\"elevation\"])\n", + " return elevationList\n", + "\n", + " def ExportToXlsx(self, latLongDf: pd.DataFrame, elevationList: List[float], outputPath: str) -> None:\n", + " \"\"\"\n", + " 如果用户可以传入一个DataFrame数据,可以将返回得到的高程拼接并输出\n", + " :param latLongDf: DataFrame数据\n", + " :param elevationList: 高程数组\n", + " :param outputPath: 输出路径\n", + " :return: 无返回值\n", + " \"\"\"\n", + " latLongDf[\"elevation\"] = elevationList\n", + " latLongDf.to_excel(outputPath, index=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "2a226b08-0c92-483e-b590-29a39dce6298", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " plant longitude latitude\n0 万方发电厂(焦作爱依斯万方电力有限公司) 113.381649 35.255622\n1 三河发电有限责任公司 116.860260 39.953617\n2 上海上电漕泾发电有限公司 121.407593 30.765242\n3 上海吴泾发电有限责任公司 121.471140 31.065113\n4 上海吴泾第二发电有限责任公司 121.471340 31.062532", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
plantlongitudelatitude
0万方发电厂(焦作爱依斯万方电力有限公司)113.38164935.255622
1三河发电有限责任公司116.86026039.953617
2上海上电漕泾发电有限公司121.40759330.765242
3上海吴泾发电有限责任公司121.47114031.065113
4上海吴泾第二发电有限责任公司121.47134031.062532
\n
" + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_excel('./lat_lon.xlsx')\n", + "data.columns = ['plant', 'longitude', 'latitude']\n", + "data = data.groupby('plant').mean().reset_index()\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "ele = GetElevation()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c1671bcd-bd33-40dd-82b5-a487801045c0", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on method GetMultiElevation in module __main__:\n", + "\n", + "GetMultiElevation(latitude: List[float], longitude: List[float]) -> List[float] method of __main__.GetElevation instance\n", + " 获取数组类型的高程,输入经纬度格式为经度数组和纬度数组,返回值为高程数组\n", + " :param latitude:纬度数组\n", + " :param longitude:经度数组\n", + " :return:高程数组\n", + "\n" + ] + } + ], + "source": [ + "help(ele.GetMultiElevation)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "91afb581-1994-47c4-85ca-d3ea3e13e95d", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[88.0, 27.0, 4.0, 3.0, 4.0, 2.0, 8.0, -2.0, 5.0, 7.0, 3.0, 58.0, 55.0, 775.0, 594.0, -3.0, 1273.0, 145.0, 145.0, 548.0, 21.0, 28.0, 48.0, 1117.0, 14.0, 1172.0, 387.0, 124.0, 1391.0, 1708.0, 1278.0, 440.0, 135.0, 494.0, 1143.0, 1869.0, 1656.0, 1873.0, 124.0, 361.0, 30.0, 971.0, -3.0, 80.0, 475.0, 1622.0, 292.0, 1317.0, 1385.0, 247.0, 1312.0, 1106.0, 1225.0, 1006.0, 1237.0, 1151.0, 555.0, 1024.0, 694.0, 1226.0, 1050.0, 1162.0, 285.0, 1007.0, 1207.0, 1023.0, 178.0, 861.0, 1171.0, 1069.0, 1015.0, 90.0, 1.0, 83.0, 1037.0, 1037.0, 1114.0, 1069.0, 1107.0, 1076.0, 1054.0, 1165.0, 1255.0, 14.0, 52.0, 171.0, 351.0, 49.0, -5.0, 7.0, -2.0, 152.0, 2.0, -2.0, 5.0, 51.0, 51.0, 606.0, 270.0, 1031.0, 1017.0, 595.0, 65.0, 279.0, 1089.0, 218.0, 57.0, 777.0, 9.0, 84.0, 520.0, 622.0, 574.0, 724.0, 582.0, 1010.0, 505.0, 1010.0, 1.0, 1.0, 59.0, 72.0, 10.0, 65.0, 64.0, 75.0, 57.0, 80.0, 118.0, 151.0, 250.0, 0.0, 1459.0, 1036.0, 1884.0, 286.0, 27.0, 2.0, 13.0, 6.0, 65.0, 3.0, 1331.0, 675.0, 240.0, 2007.0, 1590.0, 1532.0, 1545.0, 36.0, 6.0, 7.0, 199.0, 38.0, 76.0, 0.0, 73.0, 4.0, 1.0, 22.0, -2.0, 40.0, 2.0, 148.0, -1.0, 7.0, 22.0, 1149.0, 154.0, 27.0, 24.0, 44.0, 1130.0, 1277.0, 34.0, 154.0, 718.0, 1504.0, 1034.0, 19.0, 5.0, 172.0, 160.0, 27.0, 26.0, 26.0, 206.0, 19.0, 507.0, 75.0, 177.0, 102.0, 6.0, 30.0, 93.0, 218.0, -34.0, 67.0, 131.0, 1544.0, 708.0, 199.0, 119.0, 99.0, 14.0, 6.0, 11.0, 5.0, 9.0, 8.0, 130.0, 133.0, 180.0, 26.0, 1048.0, 73.0, 326.0, 192.0, 175.0, 185.0, 151.0, 151.0, 18.0, 308.0, 404.0, 360.0, 296.0, 208.0, 45.0, 46.0, 136.0, 573.0, 144.0, 147.0, -4.0, 86.0, 68.0, 167.0, 1479.0, 123.0, 35.0, 55.0, 1.0, 112.0, 8.0, 16.0, 20.0, 6.0, 19.0, 47.0, 528.0, 17.0, 627.0, -3.0, -1.0, 1459.0, 81.0, 91.0, 1122.0, 7.0, 1087.0, 1174.0, 81.0, 1349.0, 788.0, 451.0, 361.0, 65.0, 147.0, 25.0, 545.0, 1466.0, 264.0, 223.0, 56.0, 1327.0, 45.0, 45.0, 1052.0, 5.0, 1052.0, 180.0, 35.0, 1087.0, 1.0, 32.0, 1405.0, 699.0, 699.0, 1266.0, 35.0, 18.0, 116.0, 221.0, 186.0, 184.0, 7.0, 132.0, 1165.0, 1118.0, 1252.0, 29.0, 0.0, 7.0, 1184.0, 93.0, 91.0, 32.0, 849.0, 58.0, 7.0, 1.0, -6.0, -5.0, 39.0, 16.0, 12.0, 13.0, 9.0, 0.0, 132.0, 52.0, 7.0, 25.0, 32.0, 83.0, 1787.0, 0.0, 69.0, 24.0, 1.0, 3.0, -2.0, 1096.0, 207.0, 375.0, -8.0, 33.0, 94.0, 45.0, 83.0, 253.0, 24.0, 119.0, 118.0, 563.0, 468.0, 233.0, 26.0, 687.0, 1067.0, 46.0, 35.0, 30.0, 83.0, 562.0, 718.0, 426.0, 212.0, 853.0, 452.0, 294.0, 148.0, 123.0, 117.0, 25.0, 30.0, 17.0, 85.0, 7.0, 49.0, 89.0, 972.0, 56.0, 352.0, 185.0, 1113.0, 261.0, 1877.0, 222.0, 692.0, 1112.0, 395.0, 412.0, 395.0, 394.0, 8.0, 183.0, 6.0, 34.0, 19.0, 454.0, 1.0, 6.0, 8.0, 38.0, 4.0, 3.0, 6.0, 4.0, 4.0, 786.0, 1247.0, 1157.0, 1318.0, 1339.0, 1151.0, 1329.0, 46.0, 30.0, 9.0, 30.0, 9.0, 16.0, 1987.0, 151.0, 35.0, 8.0, 1353.0, 480.0, 539.0, 1010.0, 819.0, 1061.0, 371.0, 803.0, 717.0, 916.0, 918.0, 696.0, 998.0, 878.0, 28.0, 95.0, 88.0, 16.0, 93.0, 0.0, 177.0, 1.0, 9.0, 192.0, 5.0, -2.0, 5.0, -1.0, -1.0, 10.0, 2.0, 5.0, 79.0, 98.0, 338.0, 41.0, 46.0, 3.0, 35.0, 42.0, 2.0, 78.0, 54.0, 798.0, 63.0, 1288.0, 1066.0, -53.0, 705.0, 757.0, 903.0, 547.0, 1267.0, 1267.0, 355.0, 401.0, 916.0, 233.0, 6.0, 1151.0, 937.0, 940.0, 23.0, 1.0, 9.0, 9.0, 11.0, 5.0, 4.0, 7.0, -3.0, 5.0, 22.0, 4.0, 27.0, 23.0, 29.0, 10.0, 51.0, 3.0, 8.0, 63.0, 122.0, 67.0, 32.0, 28.0, 714.0, 4.0, 7.0, 604.0, 115.0, 31.0, 938.0, 21.0, 121.0, 314.0, 55.0, 70.0, 186.0, 61.0, 76.0, 154.0, 117.0, 395.0, 255.0, 40.0, 37.0, 5.0, -7.0, 3.0, 45.0, 2.0, 4.0, 6.0, 8.0, 73.0, -3.0, 4.0, 4.0, 11.0, 637.0, 30.0, 28.0, 31.0, 21.0, 23.0, 8.0, 34.0, 32.0, 1007.0, 954.0, 31.0, 31.0, 39.0, 10.0, 137.0, 31.0, 56.0, 110.0, 1.0, 592.0, 401.0, 48.0, 140.0, 171.0, 1414.0, 1455.0, 1707.0, 1707.0, 564.0, 992.0, 342.0, 35.0, 61.0, 9.0, 57.0, 41.0, 1442.0, 699.0, 1064.0, 191.0, 251.0, -11.0, 181.0, 215.0, 302.0, 1.0, 2.0, 1376.0, 2.0, 9.0, 798.0, 335.0, 21.0, 11.0, 1049.0, 1002.0, 21.0, 33.0, 75.0, 152.0, 38.0, 1266.0, 360.0, 1029.0, 1477.0, 901.0, 899.0, 1284.0, 862.0, 1205.0, 905.0, 489.0, 1205.0, 160.0, 98.0, 74.0, -2.0, 44.0, 103.0, 121.0, 443.0, 57.0, 93.0, 179.0, 175.0, 178.0, 828.0, 75.0, 115.0, 1487.0, 393.0, 165.0, 347.0, 197.0, 264.0, 27.0, 905.0, 3.0, 2.0, 33.0, 54.0, 26.0, 196.0, 142.0, 30.0, 690.0, 81.0, 955.0, 49.0, 1210.0, 500.0, 654.0, 1180.0, 1155.0, 1057.0, 407.0, 1225.0, 1169.0, 45.0, 2266.0, 2674.0, 1201.0, 1495.0, 394.0, 9.0, 182.0, 145.0, 53.0, 147.0, 1265.0]\n" + ] + } + ], + "source": [ + "multiEle = ele.GetMultiElevation(data[\"latitude\"], data[\"longitude\"])\n", + "print(multiEle)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "555559ea-7e35-4062-a21e-e5275b8da9cd", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "669" + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(multiEle)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "74fced56-47e8-43cc-b412-af40fcb3eedd", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "data['altitude'] = multiEle" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "61704866-2b97-4bd0-ac0c-f2708ef52094", + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " plant longitude latitude altitude\n0 万方发电厂(焦作爱依斯万方电力有限公司) 113.381649 35.255622 88.0\n1 三河发电有限责任公司 116.860260 39.953617 27.0\n2 上海上电漕泾发电有限公司 121.407593 30.765242 4.0\n3 上海吴泾发电有限责任公司 121.471140 31.065113 3.0\n4 上海吴泾第二发电有限责任公司 121.471340 31.062532 4.0\n.. ... ... ... ...\n664 鹤壁丰鹤发电有限责任公司 114.192184 35.850766 182.0\n665 鹤壁同力发电有限责任公司 114.191246 35.860822 145.0\n666 黄冈大别山发电有限责任公司 114.915181 31.144568 53.0\n667 黑龙江华电齐齐哈尔热电有限公司 124.063322 47.387983 147.0\n668 黔桂发电有限责任公司 106.630029 26.607537 1265.0\n\n[669 rows x 4 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
plantlongitudelatitudealtitude
0万方发电厂(焦作爱依斯万方电力有限公司)113.38164935.25562288.0
1三河发电有限责任公司116.86026039.95361727.0
2上海上电漕泾发电有限公司121.40759330.7652424.0
3上海吴泾发电有限责任公司121.47114031.0651133.0
4上海吴泾第二发电有限责任公司121.47134031.0625324.0
...............
664鹤壁丰鹤发电有限责任公司114.19218435.850766182.0
665鹤壁同力发电有限责任公司114.19124635.860822145.0
666黄冈大别山发电有限责任公司114.91518131.14456853.0
667黑龙江华电齐齐哈尔热电有限公司124.06332247.387983147.0
668黔桂发电有限责任公司106.63002926.6075371265.0
\n

669 rows × 4 columns

\n
" + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "outputs": [], + "source": [ + "data.to_csv('./电厂机组地理信息.csv', encoding='utf-8-sig', index=False)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/xgboost多任务回归.ipynb b/xgboost多任务回归.ipynb new file mode 100644 index 0000000..34c02c2 --- /dev/null +++ b/xgboost多任务回归.ipynb @@ -0,0 +1,1809 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "pycharm": { + "name": "#%%\n" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from sklearn.multioutput import MultiOutputRegressor\n", + "import xgboost as xgb\n", + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score\n", + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " 企业名称 机组编号 铭牌容量 (MW) 机组类型 参数分类 冷凝器型式 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) \\\n5740 榆能榆神热电有限公司 2 350.0 抽凝式 超临界 间接空冷 25514.0 38.84 \n\n 燃煤灰份Aar(%) 煤种 所处地区 longitude latitude altitude 发电碳排放因子(kg/kWh) \\\n5740 7.28 烟煤 陕西省 109.820265 38.304383 1151 0.661759 \n\n 供热碳排放因子(kg/MJ) \n5740 0.091483 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
企业名称机组编号铭牌容量 (MW)机组类型参数分类冷凝器型式入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)煤种所处地区longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)
5740榆能榆神热电有限公司2350.0抽凝式超临界间接空冷25514.038.847.28烟煤陕西省109.82026538.30438311510.6617590.091483
\n
" + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data = pd.read_excel('train_data.xlsx')\n", + "total_data.tail(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "Index(['企业名称', '机组编号', '铭牌容量 (MW)', '机组类型', '参数分类', '冷凝器型式', '入炉煤低位热值(kJ/kg)',\n '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', '煤种', '所处地区', 'longitude', 'latitude',\n 'altitude', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'],\n dtype='object')" + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [ + { + "data": { + "text/plain": " 发电类型 地区 城市 企业名称 机组编号 机组状态 机组数量 单机容量(MW) 总容量(MW) \\\n0 煤电 安徽省 安庆市 国能神皖安庆发电有限责任公司 1 在役 1 320.0 320.0 \n1 煤电 安徽省 安庆市 国能神皖安庆发电有限责任公司 2 在役 1 320.0 320.0 \n2 煤电 安徽省 安庆市 国能神皖安庆发电有限责任公司 3 在役 1 1000.0 1000.0 \n3 煤电 安徽省 安庆市 国能神皖安庆发电有限责任公司 4 在役 1 1000.0 1000.0 \n4 煤电 安徽省 安庆市 安徽华泰林浆纸有限公司 化学浆生产线 在役 1 40.0 40.0 \n... ... ... ... ... ... ... ... ... ... \n5317 煤电 重庆市 长寿区 中国石化集团重庆川维化工有限公司 B4 在役 1 49.0 49.0 \n5318 煤电 重庆市 长寿区 威立雅长扬热能(重庆)有限责任公司 1 在役 1 25.0 25.0 \n5319 煤电 重庆市 长寿区 威立雅长扬热能(重庆)有限责任公司 2 在役 1 25.0 25.0 \n5320 煤电 重庆市 长寿区 重庆恩力吉投资有限责任公司 2 在役 1 30.0 30.0 \n5321 煤电 重庆市 长寿区 重庆恩力吉投资有限责任公司 3 在役 1 125.0 125.0 \n\n 核心设备类型 汽轮机类型 压力参数 冷却方式 \n0 煤粉锅炉 凝气式 亚临界 水冷-开式循环 \n1 煤粉锅炉 凝气式 亚临界 水冷-开式循环 \n2 煤粉锅炉 凝气式 超超临界 水冷-闭式循环 \n3 煤粉锅炉 凝气式 超超临界 水冷-闭式循环 \n4 煤粉锅炉 抽凝式 高压 水冷-闭式循环 \n... ... ... ... ... \n5317 煤粉锅炉 抽凝式 高压 水冷-闭式循环 \n5318 循环流化床锅炉 抽凝式 高压 水冷-闭式循环 \n5319 循环流化床锅炉 抽背式 高压 水冷-闭式循环 \n5320 循环流化床锅炉 背压式 高压 其他 \n5321 循环流化床锅炉 抽凝式 高压 水冷-闭式循环 \n\n[5322 rows x 13 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
发电类型地区城市企业名称机组编号机组状态机组数量单机容量(MW)总容量(MW)核心设备类型汽轮机类型压力参数冷却方式
0煤电安徽省安庆市国能神皖安庆发电有限责任公司1在役1320.0320.0煤粉锅炉凝气式亚临界水冷-开式循环
1煤电安徽省安庆市国能神皖安庆发电有限责任公司2在役1320.0320.0煤粉锅炉凝气式亚临界水冷-开式循环
2煤电安徽省安庆市国能神皖安庆发电有限责任公司3在役11000.01000.0煤粉锅炉凝气式超超临界水冷-闭式循环
3煤电安徽省安庆市国能神皖安庆发电有限责任公司4在役11000.01000.0煤粉锅炉凝气式超超临界水冷-闭式循环
4煤电安徽省安庆市安徽华泰林浆纸有限公司化学浆生产线在役140.040.0煤粉锅炉抽凝式高压水冷-闭式循环
..........................................
5317煤电重庆市长寿区中国石化集团重庆川维化工有限公司B4在役149.049.0煤粉锅炉抽凝式高压水冷-闭式循环
5318煤电重庆市长寿区威立雅长扬热能(重庆)有限责任公司1在役125.025.0循环流化床锅炉抽凝式高压水冷-闭式循环
5319煤电重庆市长寿区威立雅长扬热能(重庆)有限责任公司2在役125.025.0循环流化床锅炉抽背式高压水冷-闭式循环
5320煤电重庆市长寿区重庆恩力吉投资有限责任公司2在役130.030.0循环流化床锅炉背压式高压其他
5321煤电重庆市长寿区重庆恩力吉投资有限责任公司3在役1125.0125.0循环流化床锅炉抽凝式高压水冷-闭式循环
\n

5322 rows × 13 columns

\n
" + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "unit_data = pd.read_excel('./data/煤电机组情况(含企业名称).xlsx')\n", + "unit_data" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [ + { + "data": { + "text/plain": "(5694, 16)" + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data.drop_duplicates(inplace=True)\n", + "total_data.shape" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [], + "source": [ + "total_data['机组编号'] = total_data['机组编号'].astype(str)\n", + "unit_data['机组编号'] = unit_data['机组编号'].astype(str)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [], + "source": [ + "total_data = total_data.merge(unit_data[['企业名称', '机组编号', '核心设备类型', '汽轮机类型', '冷却方式']], how='left', on=['企业名称', '机组编号'])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [ + { + "data": { + "text/plain": " 企业名称 机组编号 铭牌容量 (MW) 机组类型 参数分类 冷凝器型式 入炉煤低位热值(kJ/kg) \\\n0 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 21602.05000 \n1 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 21926.81000 \n2 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 21261.93062 \n3 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 20840.00000 \n4 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 20706.00000 \n... ... ... ... ... ... ... ... \n5689 浙江浙能电力股份有限公司台州发电厂 8 350.0 凝气式 亚临界 水冷-开式循环 21973.00000 \n5690 浙江浙能电力股份有限公司台州发电厂 8 350.0 凝气式 亚临界 水冷-开式循环 21372.00000 \n5691 浙江浙能电力股份有限公司台州发电厂 8 350.0 凝气式 亚临界 水冷-开式循环 20856.00000 \n5692 榆能榆神热电有限公司 1 350.0 抽凝式 超临界 间接空冷 25514.00000 \n5693 榆能榆神热电有限公司 2 350.0 抽凝式 超临界 间接空冷 25514.00000 \n\n 燃煤挥发份Var(%) 燃煤灰份Aar(%) 煤种 所处地区 longitude latitude altitude \\\n0 26.09 16.80 烟煤 江苏省 120.096620 31.942361 1 \n1 26.68 15.41 烟煤 江苏省 120.096620 31.942361 1 \n2 26.46 15.18 烟煤 江苏省 120.096620 31.942361 1 \n3 26.43 14.55 烟煤 江苏省 120.096620 31.942361 1 \n4 26.43 14.96 烟煤 江苏省 120.096620 31.942361 1 \n... ... ... .. ... ... ... ... \n5689 37.43 17.12 烟煤 浙江省 121.465840 28.704623 73 \n5690 39.87 18.01 烟煤 浙江省 121.465840 28.704623 73 \n5691 39.32 19.74 烟煤 浙江省 121.465840 28.704623 73 \n5692 38.84 7.28 烟煤 陕西省 109.820265 38.304383 1151 \n5693 38.84 7.28 烟煤 陕西省 109.820265 38.304383 1151 \n\n 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) 核心设备类型 汽轮机类型 冷却方式 \n0 0.586990 0.076843 煤粉锅炉 凝气式 水冷-开式循环 \n1 0.632859 0.077676 煤粉锅炉 凝气式 水冷-开式循环 \n2 0.609196 0.074823 煤粉锅炉 凝气式 水冷-开式循环 \n3 0.602178 0.081628 煤粉锅炉 凝气式 水冷-开式循环 \n4 0.590254 0.081103 煤粉锅炉 凝气式 水冷-开式循环 \n... ... ... ... ... ... \n5689 0.628300 0.078776 煤粉锅炉 凝气式 水冷-开式循环 \n5690 0.595019 0.076622 煤粉锅炉 凝气式 水冷-开式循环 \n5691 0.565718 0.074772 煤粉锅炉 凝气式 水冷-开式循环 \n5692 0.664456 0.091482 煤粉锅炉 抽凝式 空冷-间接空冷 \n5693 0.661759 0.091483 煤粉锅炉 抽凝式 空冷-间接空冷 \n\n[5694 rows x 19 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
企业名称机组编号铭牌容量 (MW)机组类型参数分类冷凝器型式入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)煤种所处地区longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)核心设备类型汽轮机类型冷却方式
0江苏利港电力有限公司1350.0凝气式亚临界水冷-开式循环21602.0500026.0916.80烟煤江苏省120.09662031.94236110.5869900.076843煤粉锅炉凝气式水冷-开式循环
1江苏利港电力有限公司1350.0凝气式亚临界水冷-开式循环21926.8100026.6815.41烟煤江苏省120.09662031.94236110.6328590.077676煤粉锅炉凝气式水冷-开式循环
2江苏利港电力有限公司1350.0凝气式亚临界水冷-开式循环21261.9306226.4615.18烟煤江苏省120.09662031.94236110.6091960.074823煤粉锅炉凝气式水冷-开式循环
3江苏利港电力有限公司1350.0凝气式亚临界水冷-开式循环20840.0000026.4314.55烟煤江苏省120.09662031.94236110.6021780.081628煤粉锅炉凝气式水冷-开式循环
4江苏利港电力有限公司1350.0凝气式亚临界水冷-开式循环20706.0000026.4314.96烟煤江苏省120.09662031.94236110.5902540.081103煤粉锅炉凝气式水冷-开式循环
............................................................
5689浙江浙能电力股份有限公司台州发电厂8350.0凝气式亚临界水冷-开式循环21973.0000037.4317.12烟煤浙江省121.46584028.704623730.6283000.078776煤粉锅炉凝气式水冷-开式循环
5690浙江浙能电力股份有限公司台州发电厂8350.0凝气式亚临界水冷-开式循环21372.0000039.8718.01烟煤浙江省121.46584028.704623730.5950190.076622煤粉锅炉凝气式水冷-开式循环
5691浙江浙能电力股份有限公司台州发电厂8350.0凝气式亚临界水冷-开式循环20856.0000039.3219.74烟煤浙江省121.46584028.704623730.5657180.074772煤粉锅炉凝气式水冷-开式循环
5692榆能榆神热电有限公司1350.0抽凝式超临界间接空冷25514.0000038.847.28烟煤陕西省109.82026538.30438311510.6644560.091482煤粉锅炉抽凝式空冷-间接空冷
5693榆能榆神热电有限公司2350.0抽凝式超临界间接空冷25514.0000038.847.28烟煤陕西省109.82026538.30438311510.6617590.091483煤粉锅炉抽凝式空冷-间接空冷
\n

5694 rows × 19 columns

\n
" + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [], + "source": [ + "na_boiler_df = total_data[total_data['核心设备类型'].isna()].drop(columns=['核心设备类型', '汽轮机类型', '冷却方式'])\n", + "boiler_df = total_data[~total_data['核心设备类型'].isna()].copy()\n", + "na_boiler = total_data[total_data['核心设备类型'].isna()]['企业名称'].unique()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [], + "source": [ + "na_boiler_df = na_boiler_df.merge(unit_data[['企业名称', '核心设备类型']], how='left', on=['企业名称'])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 11, + "outputs": [], + "source": [ + "total_data = pd.concat([boiler_df, na_boiler_df], axis=0).drop_duplicates()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 12, + "outputs": [ + { + "data": { + "text/plain": "煤粉锅炉 5428\nW火焰炉 151\n循环流化床锅炉 4\nName: 核心设备类型, dtype: int64" + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data['核心设备类型'].value_counts()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "num_cols = ['铭牌容量 (MW)', '入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', 'longitude', 'latitude', 'altitude', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)']\n", + "# object_cols = ['所处地区', '类型', '机组参数', '冷却型式']\n", + "# object_cols = ['所处地区', '汽轮机类型', '参数分类', '冷凝器型式', '核心设备类型']\n", + "object_cols = ['所处地区', '机组类型', '参数分类', '冷凝器型式']" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def change_str(x):\n", + " if pd.isna(x):\n", + " return x\n", + " if '空冷' in x:\n", + " return '空冷'\n", + " if '水冷' in x:\n", + " return '水冷'\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# total_data['冷凝器型式'] = total_data['冷凝器型式'].apply(change_str)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "total_data = total_data[total_data['发电碳排放因子(kg/kWh)'] <= 0.9].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "['所处地区',\n '机组类型',\n '参数分类',\n '冷凝器型式',\n '铭牌容量 (MW)',\n '入炉煤低位热值(kJ/kg)',\n '燃煤挥发份Var(%)',\n '燃煤灰份Aar(%)',\n 'longitude',\n 'latitude',\n 'altitude']" + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "use_cols = object_cols + [x for x in num_cols if '因子' not in x]\n", + "use_cols" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "total_data = total_data[~total_data['供热碳排放因子(kg/MJ)'].isna()].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "(5685, 19)" + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "(1060, 17)" + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data.groupby(['企业名称', '机组编号']).count().shape" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "total_data['入炉煤低位热值(kJ/kg)'] = total_data['入炉煤低位热值(kJ/kg)'].apply(lambda x: x * 1000 if x < 100 else x * 1)\n", + "total_data['燃煤灰份Aar(%)'] = total_data['燃煤灰份Aar(%)'].apply(lambda x: x / 1000 if x > 10000 else x * 1)\n", + "total_data['燃煤挥发份Var(%)'] = total_data['燃煤挥发份Var(%)'].apply(lambda x: x / 1000 if x > 10000 else x * 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "total_data.altitude = total_data.altitude.apply(lambda x: 0 if x < 0 else x)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "(5041, 19)" + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "use_data = total_data[(total_data['供热碳排放因子(kg/MJ)'] > 0.01)&(total_data['供热碳排放因子(kg/MJ)'] < 0.1)].dropna()\n", + "use_data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import seaborn as sns" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "count 5041.000000\nmean 0.070824\nstd 0.009937\nmin 0.010464\n25% 0.065431\n50% 0.071466\n75% 0.077387\nmax 0.099905\nName: 供热碳排放因子(kg/MJ), dtype: float64" + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "use_data['供热碳排放因子(kg/MJ)'].describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + } + ], + "source": [ + "train_data = use_data.groupby(use_cols)['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'].mean().reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) \\\n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 20209.00 25.94 \n1 上海市 凝气式 亚临界 水冷-开式循环 300.0 20785.00 25.97 \n2 上海市 凝气式 亚临界 水冷-开式循环 300.0 20796.00 26.00 \n3 上海市 凝气式 亚临界 水冷-开式循环 300.0 21762.00 27.01 \n4 上海市 凝气式 亚临界 水冷-开式循环 320.0 15829.32 30.85 \n... ... ... ... ... ... ... ... \n3789 黑龙江省 纯凝式 超高压 水冷 200.0 15941.21 23.83 \n3790 黑龙江省 纯凝式 超高压 水冷 210.0 15355.00 42.00 \n3791 黑龙江省 背压式 超高压 水冷-开式循环 200.0 13396.00 23.39 \n3792 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 15753.00 36.29 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 16471.11 30.10 \n\n 燃煤灰份Aar(%) longitude latitude altitude 发电碳排放因子(kg/kWh) \\\n0 15.34 121.471140 31.065113 3 0.623923 \n1 17.03 121.471140 31.065113 3 0.639474 \n2 13.00 121.471140 31.065113 3 0.635351 \n3 13.35 121.471140 31.065113 3 0.674456 \n4 4.77 121.601480 31.358794 2 0.506816 \n... ... ... ... ... ... \n3789 14.73 126.575647 45.918566 118 0.500172 \n3790 36.70 131.695864 46.580444 91 0.518301 \n3791 15.66 123.639146 47.210696 151 0.224312 \n3792 42.40 129.604803 44.608202 250 0.290814 \n3793 38.67 129.604803 44.608202 250 0.321635 \n\n 供热碳排放因子(kg/MJ) \n0 0.078064 \n1 0.079308 \n2 0.078691 \n3 0.085853 \n4 0.060934 \n... ... \n3789 0.064200 \n3790 0.063249 \n3791 0.053770 \n3792 0.068027 \n3793 0.067798 \n\n[3794 rows x 13 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)
0上海市凝气式亚临界水冷-开式循环300.020209.0025.9415.34121.47114031.06511330.6239230.078064
1上海市凝气式亚临界水冷-开式循环300.020785.0025.9717.03121.47114031.06511330.6394740.079308
2上海市凝气式亚临界水冷-开式循环300.020796.0026.0013.00121.47114031.06511330.6353510.078691
3上海市凝气式亚临界水冷-开式循环300.021762.0027.0113.35121.47114031.06511330.6744560.085853
4上海市凝气式亚临界水冷-开式循环320.015829.3230.854.77121.60148031.35879420.5068160.060934
..........................................
3789黑龙江省纯凝式超高压水冷200.015941.2123.8314.73126.57564745.9185661180.5001720.064200
3790黑龙江省纯凝式超高压水冷210.015355.0042.0036.70131.69586446.580444910.5183010.063249
3791黑龙江省背压式超高压水冷-开式循环200.013396.0023.3915.66123.63914647.2106961510.2243120.053770
3792黑龙江省背压式超高压水冷-闭式循环215.015753.0036.2942.40129.60480344.6082022500.2908140.068027
3793黑龙江省背压式超高压水冷-闭式循环215.016471.1130.1038.67129.60480344.6082022500.3216350.067798
\n

3794 rows × 13 columns

\n
" + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_data" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "for col in num_cols:\n", + " if '因子' not in col:\n", + " train_data[col] = np.log1p(train_data[col])" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "train_data = train_data[train_data['供热碳排放因子(kg/MJ)']<=0.1].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "train_data = pd.get_dummies(train_data, columns=object_cols).dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "for col in train_data.columns:\n", + " train_data[col] = train_data[col].astype(float)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "feature_cols = [x for x in train_data.columns if '因子' not in x and '其他' not in x]\n", + "target_cols = [x for x in train_data.columns if '因子' in x]" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "train_data.to_csv('./train_data_processed.csv', encoding='utf-8-sig', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "train, test = train_test_split(train_data.dropna(), test_size=0.1, shuffle=True, random_state=42)\n", + "train, valid = train_test_split(train, test_size=0.1, shuffle=True, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "train_X, train_y = train[feature_cols], train[target_cols]\n", + "valid_X, valid_y = valid[feature_cols], valid[target_cols]\n", + "test_X, test_y = test[feature_cols], test[target_cols]" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "from sklearn.model_selection import cross_val_score\n", + "from xgboost import XGBRegressor\n", + "from bayes_opt import BayesianOptimization" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### 供电建模" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "params_xgb = {'objective': 'reg:squarederror',\n", + " 'booster': 'gbtree',\n", + " 'eta': 0.01,\n", + " 'max_depth': 60,\n", + " 'subsample': 0.8,\n", + " 'colsample_bytree': 0.9,\n", + " 'min_child_weight': 60,\n", + " 'seed': 42}\n", + "\n", + "num_boost_round = 2000" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "outputs": [], + "source": [ + "\n", + "dtrain = xgb.DMatrix(train_X, train_y.values[:, 0])\n", + "dvalid = xgb.DMatrix(valid_X, valid_y.values[:, 0])\n", + "watchlist = [(dtrain, 'train'), (dvalid, 'eval')]\n", + "\n", + "gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", + " early_stopping_rounds=200, verbose_eval=False)\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "y_pred_xgb = gb_model.predict(xgb.DMatrix(test_X))\n", + "y_true_xgb = test_y.values[:, 0]" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE: 9.9E-04\n", + "RMSE: 0.0315\n", + "MAE: 0.0146\n", + "MAPE: 4.39 %\n", + "R_2: 0.83\n" + ] + } + ], + "source": [ + "MSE = mean_squared_error(y_true_xgb, y_pred_xgb)\n", + "RMSE = np.sqrt(mean_squared_error(y_true_xgb, y_pred_xgb))\n", + "MAE = mean_absolute_error(y_true_xgb, y_pred_xgb)\n", + "MAPE = mean_absolute_percentage_error(y_true_xgb, y_pred_xgb)\n", + "R_2 = r2_score(y_true_xgb, y_pred_xgb)\n", + "print('MSE:', format(MSE, '.1E'))\n", + "print('RMSE:', round(RMSE, 4))\n", + "print('MAE:', round(MAE, 4))\n", + "print('MAPE:', round(MAPE*100, 2), '%')\n", + "print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差a" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "outputs": [], + "source": [ + "from sklearn.model_selection import KFold" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 42, + "outputs": [], + "source": [ + "kf = KFold(n_splits=10, shuffle=True, random_state=666)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 43, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE: 3.5E-04, RMSE: 0.0188, MAE: 0.0126, MAPE: 2.6 %, R_2: 0.9346\n", + "MSE: 9.5E-04, RMSE: 0.0308, MAE: 0.0142, MAPE: 4.28 %, R_2: 0.8446\n", + "MSE: 9.9E-04, RMSE: 0.0314, MAE: 0.0139, MAPE: 4.29 %, R_2: 0.8507\n", + "MSE: 5.0E-04, RMSE: 0.0225, MAE: 0.0126, MAPE: 2.53 %, R_2: 0.9118\n", + "MSE: 9.9E-04, RMSE: 0.0314, MAE: 0.0143, MAPE: 4.45 %, R_2: 0.8383\n", + "MSE: 3.6E-04, RMSE: 0.0191, MAE: 0.0127, MAPE: 2.57 %, R_2: 0.9298\n", + "MSE: 5.3E-04, RMSE: 0.023, MAE: 0.0143, MAPE: 3.13 %, R_2: 0.9112\n", + "MSE: 5.1E-04, RMSE: 0.0226, MAE: 0.0138, MAPE: 2.84 %, R_2: 0.9092\n", + "MSE: 3.5E-04, RMSE: 0.0187, MAE: 0.0128, MAPE: 2.63 %, R_2: 0.9371\n", + "MSE: 1.3E-03, RMSE: 0.0361, MAE: 0.015, MAPE: 6.76 %, R_2: 0.8045\n" + ] + } + ], + "source": [ + "eva_list = list()\n", + "for (train_index, test_index) in kf.split(train_data):\n", + " train = train_data.loc[train_index]\n", + " test = train_data.loc[test_index]\n", + " train, valid = train_test_split(train, test_size=0.11, random_state=666)\n", + " X_train, Y_train = train[feature_cols], train['发电碳排放因子(kg/kWh)']\n", + " X_valid, Y_valid = valid[feature_cols], valid['发电碳排放因子(kg/kWh)']\n", + " X_test, Y_test = valid[feature_cols], valid['发电碳排放因子(kg/kWh)']\n", + " dtrain = xgb.DMatrix(X_train, Y_train)\n", + " dvalid = xgb.DMatrix(X_valid, Y_valid)\n", + " watchlist = [(dvalid, 'eval')]\n", + " gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", + " early_stopping_rounds=100, verbose_eval=False)\n", + " y_pred = gb_model.predict(xgb.DMatrix(X_test))\n", + " y_true = Y_test.values\n", + " MSE = mean_squared_error(y_true, y_pred)\n", + " RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n", + " MAE = mean_absolute_error(y_true, y_pred)\n", + " MAPE = mean_absolute_percentage_error(y_true, y_pred)\n", + " R_2 = r2_score(y_true, y_pred)\n", + " print('MSE:', format(MSE, '.1E'), end=', ')\n", + " print('RMSE:', round(RMSE, 4), end=', ')\n", + " print('MAE:', round(MAE, 4), end=', ')\n", + " print('MAPE:', round(MAPE*100, 2), '%', end=', ')\n", + " print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差\n", + " eva_list.append([MSE, RMSE, MAE, MAPE, R_2])\n", + " if R_2 > 0.94:\n", + " break" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 44, + "outputs": [], + "source": [ + "test_X['power_pred'] = y_pred_xgb\n", + "test_X['power_real'] = y_true_xgb\n", + "test_X['error_rate'] = abs(test_X.power_pred - test_X.power_real) / test_X.power_real" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 45, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " \"\"\"Entry point for launching an IPython kernel.\n", + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:2: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + " \n" + ] + } + ], + "source": [ + "X_test['power_pred'] = y_pred\n", + "X_test['power_real'] = y_true\n", + "X_test['error_rate'] = abs(X_test.power_pred - X_test.power_real) / X_test.power_real" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 46, + "outputs": [], + "source": [ + "test_data = X_test.copy()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 47, + "outputs": [], + "source": [ + "for col in num_cols:\n", + " if '因子' not in col:\n", + " test_data[col] = np.expm1(test_data[col])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 48, + "outputs": [ + { + "data": { + "text/plain": " 铭牌容量 (MW) 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%) longitude \\\n2132 300.0 17602.00 23.95 32.830 118.211355 \n2424 350.0 15525.82 19.72 25.540 118.075445 \n1626 330.0 17997.00 33.00 35.000 82.892729 \n319 350.0 14187.00 25.53 16.710 125.579363 \n891 350.0 19279.92 35.49 27.340 115.784650 \n2234 350.0 21697.63 26.43 12.710 116.860260 \n2304 300.0 18611.00 15.26 29.750 115.497149 \n420 350.0 14445.00 45.39 20.600 125.162487 \n1039 330.0 21095.00 25.80 20.410 119.276289 \n3090 165.0 18990.00 33.58 23.540 103.624731 \n485 300.0 20068.81 25.65 15.040 117.059925 \n3714 600.0 16567.18 34.02 38.640 131.063724 \n2076 350.0 20141.10 42.64 15.990 115.113369 \n3304 600.0 12950.00 48.22 25.040 124.150700 \n3744 350.0 15235.27 23.23 25.780 128.768082 \n2572 300.0 19965.73 10.79 29.130 114.283788 \n3423 600.0 17981.89 26.88 17.700 122.123524 \n3770 600.0 15941.38 23.83 13.750 126.575647 \n1422 330.0 18283.00 13.23 34.680 112.761299 \n3660 300.0 16031.00 21.96 39.790 130.397051 \n2439 600.0 19736.25 9.21 29.210 114.437782 \n3365 300.0 13306.62 23.55 23.090 121.228525 \n1211 140.0 20919.00 19.29 26.120 115.920941 \n2576 300.0 21052.00 10.74 31.360 114.283788 \n3395 350.0 13278.00 47.67 20.720 123.821910 \n1732 200.0 17620.64 19.25 14.380 87.660577 \n1244 150.0 20030.49 26.13 27.880 117.142424 \n1227 145.0 19292.66 19.44 26.570 117.421027 \n1213 140.0 21160.00 24.26 20.310 118.335347 \n2224 350.0 21061.75 26.06 14.167 116.860260 \n3381 300.0 14582.00 28.34 23.200 124.330739 \n1195 140.0 19731.00 22.23 26.550 118.335347 \n2366 330.0 20310.24 15.53 32.640 114.703987 \n1453 220.0 20434.00 26.70 27.410 113.232289 \n1624 330.0 17470.00 36.17 27.620 82.892729 \n3383 300.0 15736.00 43.85 37.540 123.817380 \n1128 350.0 20403.00 36.51 25.690 117.149304 \n2636 350.0 18193.00 16.96 31.720 112.409429 \n2992 350.0 23253.68 23.72 18.450 113.672684 \n881 165.0 20822.00 39.57 24.600 118.128354 \n2644 350.0 19871.76 21.06 30.780 113.875986 \n2627 330.0 20682.00 11.52 28.850 113.866062 \n3666 330.0 14813.00 43.74 14.460 124.613843 \n2215 315.0 21691.59 23.80 11.900 116.860260 \n1717 350.0 22600.03 28.67 18.680 76.054876 \n2686 660.0 17624.15 31.79 32.040 115.270887 \n3228 300.0 13075.00 28.45 27.010 123.943182 \n2311 300.0 19779.00 17.49 31.090 114.525863 \n1064 330.0 22054.00 20.08 20.800 117.103149 \n1710 350.0 20519.75 24.23 12.710 76.054876 \n1651 330.0 17913.00 35.08 22.070 87.703630 \n3391 300.0 13874.00 24.01 20.430 121.228525 \n3793 215.0 16471.11 30.10 38.670 129.604803 \n1022 330.0 20634.77 24.66 25.390 119.276289 \n343 670.0 14109.00 45.20 13.640 125.941747 \n\n latitude altitude 所处地区_上海市 所处地区_云南省 所处地区_内蒙古 ... 参数分类_高压 \\\n2132 39.655509 26.0 0.0 0.0 0.0 ... 0.0 \n2424 40.812210 338.0 0.0 0.0 0.0 ... 0.0 \n1626 41.741365 1066.0 0.0 0.0 0.0 ... 0.0 \n319 43.657507 208.0 0.0 0.0 0.0 ... 0.0 \n891 36.881948 33.0 0.0 0.0 0.0 ... 0.0 \n2234 39.953617 27.0 0.0 0.0 0.0 ... 0.0 \n2304 38.802049 17.0 0.0 0.0 0.0 ... 0.0 \n420 43.784873 222.0 0.0 0.0 0.0 ... 0.0 \n1039 36.668747 75.0 0.0 0.0 0.0 ... 0.0 \n3090 36.134735 1545.0 0.0 0.0 0.0 ... 0.0 \n485 39.157647 8.0 0.0 0.0 0.0 ... 0.0 \n3714 45.766399 207.0 0.0 0.0 0.0 ... 0.0 \n2076 25.926232 102.0 0.0 0.0 0.0 ... 0.0 \n3304 42.540258 103.0 0.0 0.0 0.0 ... 0.0 \n3744 47.746953 240.0 0.0 0.0 0.0 ... 0.0 \n2572 36.128262 83.0 0.0 0.0 0.0 ... 0.0 \n3423 40.311935 2.0 0.0 0.0 0.0 ... 0.0 \n3770 45.918566 118.0 0.0 0.0 0.0 ... 0.0 \n1422 37.634620 849.0 0.0 0.0 0.0 ... 0.0 \n3660 46.805507 80.0 0.0 0.0 0.0 ... 0.0 \n2439 38.038867 76.0 0.0 0.0 0.0 ... 0.0 \n3365 41.143879 44.0 0.0 0.0 0.0 ... 0.0 \n1211 36.466442 30.0 0.0 0.0 0.0 ... 0.0 \n2576 36.128262 83.0 0.0 0.0 0.0 ... 0.0 \n3395 41.354877 147.0 0.0 0.0 0.0 ... 0.0 \n1732 43.750058 1010.0 0.0 0.0 0.0 ... 0.0 \n1244 35.075862 65.0 0.0 0.0 0.0 ... 0.0 \n1227 36.738368 57.0 0.0 0.0 0.0 ... 0.0 \n1213 35.017881 65.0 0.0 0.0 0.0 ... 0.0 \n2224 39.953617 27.0 0.0 0.0 0.0 ... 0.0 \n3381 40.115662 135.0 0.0 0.0 0.0 ... 0.0 \n1195 35.017881 65.0 0.0 0.0 0.0 ... 0.0 \n2366 38.014364 57.0 0.0 0.0 0.0 ... 0.0 \n1453 40.067556 1061.0 0.0 0.0 0.0 ... 0.0 \n1624 41.741365 1066.0 0.0 0.0 0.0 ... 0.0 \n3383 42.347201 98.0 0.0 0.0 0.0 ... 0.0 \n1128 36.084927 123.0 0.0 0.0 0.0 ... 0.0 \n2636 34.584441 160.0 0.0 0.0 0.0 ... 0.0 \n2992 30.918494 34.0 0.0 0.0 0.0 ... 0.0 \n881 37.694642 8.0 0.0 0.0 0.0 ... 0.0 \n2644 36.116424 294.0 0.0 0.0 0.0 ... 0.0 \n2627 35.248375 72.0 0.0 0.0 0.0 ... 0.0 \n3666 46.144809 154.0 0.0 0.0 0.0 ... 0.0 \n2215 39.953617 27.0 0.0 0.0 0.0 ... 0.0 \n1717 39.484097 1288.0 0.0 0.0 0.0 ... 0.0 \n2686 35.775540 48.0 0.0 0.0 0.0 ... 0.0 \n3228 41.899725 124.0 0.0 0.0 0.0 ... 0.0 \n2311 37.959933 63.0 0.0 0.0 0.0 ... 0.0 \n1064 36.718761 27.0 0.0 0.0 0.0 ... 0.0 \n1710 39.484097 1288.0 0.0 0.0 0.0 ... 0.0 \n1651 43.909559 724.0 0.0 0.0 0.0 ... 0.0 \n3391 41.143879 44.0 0.0 0.0 0.0 ... 0.0 \n3793 44.608202 250.0 0.0 0.0 0.0 ... 0.0 \n1022 36.668747 75.0 0.0 0.0 0.0 ... 0.0 \n343 44.106509 199.0 0.0 0.0 0.0 ... 0.0 \n\n 冷凝器型式_水冷 冷凝器型式_水冷-开式循环 冷凝器型式_水冷-闭式循环 冷凝器型式_直接空冷 冷凝器型式_空冷 \\\n2132 1.0 0.0 0.0 0.0 0.0 \n2424 0.0 0.0 1.0 0.0 0.0 \n1626 0.0 0.0 0.0 0.0 0.0 \n319 1.0 0.0 0.0 0.0 0.0 \n891 0.0 1.0 0.0 0.0 0.0 \n2234 0.0 0.0 1.0 0.0 0.0 \n2304 0.0 0.0 1.0 0.0 0.0 \n420 0.0 0.0 1.0 0.0 0.0 \n1039 0.0 0.0 1.0 0.0 0.0 \n3090 0.0 0.0 1.0 0.0 0.0 \n485 0.0 0.0 1.0 0.0 0.0 \n3714 0.0 0.0 1.0 0.0 0.0 \n2076 0.0 0.0 1.0 0.0 0.0 \n3304 0.0 0.0 1.0 0.0 0.0 \n3744 0.0 0.0 1.0 0.0 0.0 \n2572 0.0 0.0 1.0 0.0 0.0 \n3423 0.0 1.0 0.0 0.0 0.0 \n3770 1.0 0.0 0.0 0.0 0.0 \n1422 0.0 0.0 0.0 1.0 0.0 \n3660 1.0 0.0 0.0 0.0 0.0 \n2439 0.0 0.0 0.0 1.0 0.0 \n3365 0.0 0.0 1.0 0.0 0.0 \n1211 0.0 0.0 1.0 0.0 0.0 \n2576 0.0 0.0 1.0 0.0 0.0 \n3395 0.0 0.0 1.0 0.0 0.0 \n1732 0.0 0.0 1.0 0.0 0.0 \n1244 0.0 0.0 1.0 0.0 0.0 \n1227 0.0 0.0 1.0 0.0 0.0 \n1213 0.0 0.0 1.0 0.0 0.0 \n2224 0.0 0.0 1.0 0.0 0.0 \n3381 0.0 0.0 1.0 0.0 0.0 \n1195 0.0 0.0 1.0 0.0 0.0 \n2366 0.0 0.0 1.0 0.0 0.0 \n1453 0.0 0.0 0.0 1.0 0.0 \n1624 0.0 0.0 0.0 0.0 0.0 \n3383 0.0 0.0 1.0 0.0 0.0 \n1128 0.0 0.0 1.0 0.0 0.0 \n2636 0.0 0.0 1.0 0.0 0.0 \n2992 0.0 0.0 1.0 0.0 0.0 \n881 1.0 0.0 0.0 0.0 0.0 \n2644 0.0 0.0 1.0 0.0 0.0 \n2627 0.0 0.0 1.0 0.0 0.0 \n3666 0.0 1.0 0.0 0.0 0.0 \n2215 0.0 0.0 1.0 0.0 0.0 \n1717 0.0 0.0 0.0 0.0 0.0 \n2686 0.0 0.0 1.0 0.0 0.0 \n3228 1.0 0.0 0.0 0.0 0.0 \n2311 0.0 0.0 1.0 0.0 0.0 \n1064 0.0 0.0 1.0 0.0 0.0 \n1710 0.0 0.0 0.0 0.0 0.0 \n1651 0.0 0.0 1.0 0.0 0.0 \n3391 0.0 0.0 1.0 0.0 0.0 \n3793 0.0 0.0 1.0 0.0 0.0 \n1022 0.0 0.0 1.0 0.0 0.0 \n343 0.0 0.0 1.0 0.0 0.0 \n\n 冷凝器型式_间接空冷 power_pred power_real error_rate \n2132 0.0 0.438344 0.461568 0.050316 \n2424 0.0 0.437482 0.460870 0.050747 \n1626 1.0 0.509937 0.537416 0.051130 \n319 0.0 0.362236 0.344575 0.051256 \n891 0.0 0.478009 0.454326 0.052129 \n2234 0.0 0.579227 0.549844 0.053440 \n2304 0.0 0.488333 0.515910 0.053453 \n420 0.0 0.376880 0.357277 0.054869 \n1039 0.0 0.562676 0.595438 0.055023 \n3090 0.0 0.516500 0.489478 0.055205 \n485 0.0 0.515064 0.487882 0.055715 \n3714 0.0 0.489738 0.518801 0.056019 \n2076 0.0 0.536181 0.568241 0.056419 \n3304 0.0 0.380260 0.359795 0.056878 \n3744 0.0 0.413687 0.389536 0.061999 \n2572 0.0 0.484924 0.455817 0.063858 \n3423 0.0 0.464855 0.436654 0.064586 \n3770 0.0 0.444540 0.475513 0.065135 \n1422 0.0 0.478954 0.513112 0.066570 \n3660 0.0 0.420857 0.394264 0.067449 \n2439 0.0 0.552083 0.517015 0.067829 \n3365 0.0 0.365339 0.392430 0.069034 \n1211 0.0 0.525372 0.565201 0.070470 \n2576 0.0 0.535750 0.576367 0.070472 \n3395 0.0 0.362074 0.338230 0.070499 \n1732 0.0 0.466749 0.435941 0.070672 \n1244 0.0 0.519800 0.559412 0.070810 \n1227 0.0 0.471088 0.439672 0.071453 \n1213 0.0 0.584875 0.630051 0.071702 \n2224 0.0 0.547243 0.510040 0.072941 \n3381 0.0 0.378266 0.410316 0.078112 \n1195 0.0 0.518649 0.480565 0.079248 \n2366 0.0 0.505584 0.551764 0.083696 \n1453 0.0 0.536567 0.493686 0.086859 \n1624 1.0 0.474544 0.525190 0.096434 \n3383 0.0 0.428344 0.475805 0.099748 \n1128 0.0 0.484347 0.541039 0.104785 \n2636 0.0 0.465685 0.420098 0.108516 \n2992 0.0 0.590282 0.663618 0.110509 \n881 0.0 0.561782 0.633226 0.112826 \n2644 0.0 0.521570 0.467694 0.115195 \n2627 0.0 0.563966 0.637925 0.115937 \n3666 0.0 0.403054 0.456362 0.116811 \n2215 0.0 0.549092 0.490824 0.118715 \n1717 1.0 0.628243 0.560538 0.120785 \n2686 0.0 0.428897 0.381988 0.122801 \n3228 0.0 0.356203 0.317093 0.123338 \n2311 0.0 0.505898 0.449974 0.124284 \n1064 0.0 0.549820 0.628280 0.124881 \n1710 1.0 0.516507 0.434781 0.187970 \n1651 0.0 0.471082 0.395266 0.191812 \n3391 0.0 0.378657 0.470581 0.195341 \n3793 0.0 0.428770 0.321635 0.333093 \n1022 0.0 0.530251 0.073112 6.252559 \n343 0.0 0.398257 0.038802 9.263788 \n\n[55 rows x 66 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
铭牌容量 (MW)入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)longitudelatitudealtitude所处地区_上海市所处地区_云南省所处地区_内蒙古...参数分类_高压冷凝器型式_水冷冷凝器型式_水冷-开式循环冷凝器型式_水冷-闭式循环冷凝器型式_直接空冷冷凝器型式_空冷冷凝器型式_间接空冷power_predpower_realerror_rate
2132300.017602.0023.9532.830118.21135539.65550926.00.00.00.0...0.01.00.00.00.00.00.00.4383440.4615680.050316
2424350.015525.8219.7225.540118.07544540.812210338.00.00.00.0...0.00.00.01.00.00.00.00.4374820.4608700.050747
1626330.017997.0033.0035.00082.89272941.7413651066.00.00.00.0...0.00.00.00.00.00.01.00.5099370.5374160.051130
319350.014187.0025.5316.710125.57936343.657507208.00.00.00.0...0.01.00.00.00.00.00.00.3622360.3445750.051256
891350.019279.9235.4927.340115.78465036.88194833.00.00.00.0...0.00.01.00.00.00.00.00.4780090.4543260.052129
2234350.021697.6326.4312.710116.86026039.95361727.00.00.00.0...0.00.00.01.00.00.00.00.5792270.5498440.053440
2304300.018611.0015.2629.750115.49714938.80204917.00.00.00.0...0.00.00.01.00.00.00.00.4883330.5159100.053453
420350.014445.0045.3920.600125.16248743.784873222.00.00.00.0...0.00.00.01.00.00.00.00.3768800.3572770.054869
1039330.021095.0025.8020.410119.27628936.66874775.00.00.00.0...0.00.00.01.00.00.00.00.5626760.5954380.055023
3090165.018990.0033.5823.540103.62473136.1347351545.00.00.00.0...0.00.00.01.00.00.00.00.5165000.4894780.055205
485300.020068.8125.6515.040117.05992539.1576478.00.00.00.0...0.00.00.01.00.00.00.00.5150640.4878820.055715
3714600.016567.1834.0238.640131.06372445.766399207.00.00.00.0...0.00.00.01.00.00.00.00.4897380.5188010.056019
2076350.020141.1042.6415.990115.11336925.926232102.00.00.00.0...0.00.00.01.00.00.00.00.5361810.5682410.056419
3304600.012950.0048.2225.040124.15070042.540258103.00.00.00.0...0.00.00.01.00.00.00.00.3802600.3597950.056878
3744350.015235.2723.2325.780128.76808247.746953240.00.00.00.0...0.00.00.01.00.00.00.00.4136870.3895360.061999
2572300.019965.7310.7929.130114.28378836.12826283.00.00.00.0...0.00.00.01.00.00.00.00.4849240.4558170.063858
3423600.017981.8926.8817.700122.12352440.3119352.00.00.00.0...0.00.01.00.00.00.00.00.4648550.4366540.064586
3770600.015941.3823.8313.750126.57564745.918566118.00.00.00.0...0.01.00.00.00.00.00.00.4445400.4755130.065135
1422330.018283.0013.2334.680112.76129937.634620849.00.00.00.0...0.00.00.00.01.00.00.00.4789540.5131120.066570
3660300.016031.0021.9639.790130.39705146.80550780.00.00.00.0...0.01.00.00.00.00.00.00.4208570.3942640.067449
2439600.019736.259.2129.210114.43778238.03886776.00.00.00.0...0.00.00.00.01.00.00.00.5520830.5170150.067829
3365300.013306.6223.5523.090121.22852541.14387944.00.00.00.0...0.00.00.01.00.00.00.00.3653390.3924300.069034
1211140.020919.0019.2926.120115.92094136.46644230.00.00.00.0...0.00.00.01.00.00.00.00.5253720.5652010.070470
2576300.021052.0010.7431.360114.28378836.12826283.00.00.00.0...0.00.00.01.00.00.00.00.5357500.5763670.070472
3395350.013278.0047.6720.720123.82191041.354877147.00.00.00.0...0.00.00.01.00.00.00.00.3620740.3382300.070499
1732200.017620.6419.2514.38087.66057743.7500581010.00.00.00.0...0.00.00.01.00.00.00.00.4667490.4359410.070672
1244150.020030.4926.1327.880117.14242435.07586265.00.00.00.0...0.00.00.01.00.00.00.00.5198000.5594120.070810
1227145.019292.6619.4426.570117.42102736.73836857.00.00.00.0...0.00.00.01.00.00.00.00.4710880.4396720.071453
1213140.021160.0024.2620.310118.33534735.01788165.00.00.00.0...0.00.00.01.00.00.00.00.5848750.6300510.071702
2224350.021061.7526.0614.167116.86026039.95361727.00.00.00.0...0.00.00.01.00.00.00.00.5472430.5100400.072941
3381300.014582.0028.3423.200124.33073940.115662135.00.00.00.0...0.00.00.01.00.00.00.00.3782660.4103160.078112
1195140.019731.0022.2326.550118.33534735.01788165.00.00.00.0...0.00.00.01.00.00.00.00.5186490.4805650.079248
2366330.020310.2415.5332.640114.70398738.01436457.00.00.00.0...0.00.00.01.00.00.00.00.5055840.5517640.083696
1453220.020434.0026.7027.410113.23228940.0675561061.00.00.00.0...0.00.00.00.01.00.00.00.5365670.4936860.086859
1624330.017470.0036.1727.62082.89272941.7413651066.00.00.00.0...0.00.00.00.00.00.01.00.4745440.5251900.096434
3383300.015736.0043.8537.540123.81738042.34720198.00.00.00.0...0.00.00.01.00.00.00.00.4283440.4758050.099748
1128350.020403.0036.5125.690117.14930436.084927123.00.00.00.0...0.00.00.01.00.00.00.00.4843470.5410390.104785
2636350.018193.0016.9631.720112.40942934.584441160.00.00.00.0...0.00.00.01.00.00.00.00.4656850.4200980.108516
2992350.023253.6823.7218.450113.67268430.91849434.00.00.00.0...0.00.00.01.00.00.00.00.5902820.6636180.110509
881165.020822.0039.5724.600118.12835437.6946428.00.00.00.0...0.01.00.00.00.00.00.00.5617820.6332260.112826
2644350.019871.7621.0630.780113.87598636.116424294.00.00.00.0...0.00.00.01.00.00.00.00.5215700.4676940.115195
2627330.020682.0011.5228.850113.86606235.24837572.00.00.00.0...0.00.00.01.00.00.00.00.5639660.6379250.115937
3666330.014813.0043.7414.460124.61384346.144809154.00.00.00.0...0.00.01.00.00.00.00.00.4030540.4563620.116811
2215315.021691.5923.8011.900116.86026039.95361727.00.00.00.0...0.00.00.01.00.00.00.00.5490920.4908240.118715
1717350.022600.0328.6718.68076.05487639.4840971288.00.00.00.0...0.00.00.00.00.00.01.00.6282430.5605380.120785
2686660.017624.1531.7932.040115.27088735.77554048.00.00.00.0...0.00.00.01.00.00.00.00.4288970.3819880.122801
3228300.013075.0028.4527.010123.94318241.899725124.00.00.00.0...0.01.00.00.00.00.00.00.3562030.3170930.123338
2311300.019779.0017.4931.090114.52586337.95993363.00.00.00.0...0.00.00.01.00.00.00.00.5058980.4499740.124284
1064330.022054.0020.0820.800117.10314936.71876127.00.00.00.0...0.00.00.01.00.00.00.00.5498200.6282800.124881
1710350.020519.7524.2312.71076.05487639.4840971288.00.00.00.0...0.00.00.00.00.00.01.00.5165070.4347810.187970
1651330.017913.0035.0822.07087.70363043.909559724.00.00.00.0...0.00.00.01.00.00.00.00.4710820.3952660.191812
3391300.013874.0024.0120.430121.22852541.14387944.00.00.00.0...0.00.00.01.00.00.00.00.3786570.4705810.195341
3793215.016471.1130.1038.670129.60480344.608202250.00.00.00.0...0.00.00.01.00.00.00.00.4287700.3216350.333093
1022330.020634.7724.6625.390119.27628936.66874775.00.00.00.0...0.00.00.01.00.00.00.00.5302510.0731126.252559
343670.014109.0045.2013.640125.94174744.106509199.00.00.00.0...0.00.00.01.00.00.00.00.3982570.0388029.263788
\n

55 rows × 66 columns

\n
" + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_data[test_data.error_rate > 0.05].sort_values(by='error_rate')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 49, + "outputs": [ + { + "data": { + "text/plain": "(376, 66)" + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_data.shape" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "power_eva_df = pd.DataFrame.from_records([y_true_xgb, y_pred_xgb]).T\n", + "power_eva_df.to_csv('./发电测试结果.csv', index=False, encoding='utf-8-sig')" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "gb_model.save_model('./models/power_model.txt')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### 发热建模" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def xgb_cv(max_depth, learning_rate, min_child_weight, subsample, colsample_bytree, reg_alpha, gamma):\n", + " val = cross_val_score(estimator=XGBRegressor(max_depth=int(max_depth),\n", + " learning_rate=learning_rate,\n", + " n_estimators=2000,\n", + " min_child_weight=min_child_weight,\n", + " subsample=max(min(subsample, 1), 0),\n", + " colsample_bytree=max(min(colsample_bytree, 1), 0),\n", + " reg_alpha=max(reg_alpha, 0), gamma=gamma, objective='reg:squarederror',\n", + " booster='gbtree',\n", + " seed=10), X=train[feature_cols], y=train['供热碳排放因子(kg/MJ)'], scoring='r2',\n", + " cv=10).max()\n", + " return val" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "params_xgb = {'objective': 'reg:squarederror',\n", + " 'booster': 'gbtree',\n", + " 'eta': 0.005,\n", + " 'max_depth': 60,\n", + " 'subsample': 0.5,\n", + " 'colsample_bytree': 0.9,\n", + " 'min_child_weight': 30,\n", + " 'seed': 666}\n", + "\n", + "num_boost_round = 2000" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE: 2.7E-05, RMSE: 0.0052, MAE: 0.0025, MAPE: 4.651 %, R_2: 0.7287\n", + "MSE: 2.8E-05, RMSE: 0.0053, MAE: 0.0028, MAPE: 4.129 %, R_2: 0.7189\n", + "MSE: 2.0E-05, RMSE: 0.0045, MAE: 0.0026, MAPE: 3.629 %, R_2: 0.7839\n", + "MSE: 1.6E-05, RMSE: 0.004, MAE: 0.0025, MAPE: 3.73 %, R_2: 0.8376\n", + "MSE: 3.0E-05, RMSE: 0.0054, MAE: 0.0029, MAPE: 5.181 %, R_2: 0.7219\n", + "MSE: 1.9E-05, RMSE: 0.0044, MAE: 0.0025, MAPE: 3.849 %, R_2: 0.8013\n", + "MSE: 3.1E-05, RMSE: 0.0056, MAE: 0.0028, MAPE: 5.661 %, R_2: 0.7182\n", + "MSE: 2.3E-05, RMSE: 0.0048, MAE: 0.0026, MAPE: 4.386 %, R_2: 0.7888\n", + "MSE: 2.0E-05, RMSE: 0.0045, MAE: 0.0024, MAPE: 3.456 %, R_2: 0.8005\n", + "MSE: 2.3E-05, RMSE: 0.0048, MAE: 0.0027, MAPE: 3.897 %, R_2: 0.7742\n" + ] + } + ], + "source": [ + "eva_list = list()\n", + "for (train_index, test_index) in kf.split(train_data):\n", + " train = train_data.loc[train_index]\n", + " test = train_data.loc[test_index]\n", + " train, valid = train_test_split(train, test_size=0.11, random_state=42)\n", + " X_train, Y_train = train[feature_cols], train['供热碳排放因子(kg/MJ)']\n", + " X_valid, Y_valid = valid[feature_cols], valid['供热碳排放因子(kg/MJ)']\n", + " X_test, Y_test = valid[feature_cols], valid['供热碳排放因子(kg/MJ)']\n", + " dtrain = xgb.DMatrix(X_train, Y_train)\n", + " dvalid = xgb.DMatrix(X_valid, Y_valid)\n", + " watchlist = [(dvalid, 'eval')]\n", + " gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", + " early_stopping_rounds=100, verbose_eval=False)\n", + " y_pred = gb_model.predict(xgb.DMatrix(X_test))\n", + " y_true = Y_test.values\n", + " MSE = mean_squared_error(y_true, y_pred)\n", + " RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n", + " MAE = mean_absolute_error(y_true, y_pred)\n", + " MAPE = mean_absolute_percentage_error(y_true, y_pred)\n", + " R_2 = r2_score(y_true, y_pred)\n", + " print('MSE:', format(MSE, '.1E'), end=', ')\n", + " print('RMSE:', round(RMSE, 4), end=', ')\n", + " print('MAE:', round(MAE, 4), end=', ')\n", + " print('MAPE:', round(MAPE*100, 3), '%', end=', ')\n", + " print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差\n", + " eva_list.append([MSE, RMSE, MAE, MAPE, R_2])\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 55, + "outputs": [], + "source": [ + "\n", + "dtrain = xgb.DMatrix(train_X, train_y.values[:, 1])\n", + "dvalid = xgb.DMatrix(valid_X, valid_y.values[:, 1])\n", + "watchlist = [(dtrain, 'train'), (dvalid, 'eval')]\n", + "\n", + "gb_model_heat = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", + " early_stopping_rounds=200, verbose_eval=False)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "y_pred_heat = gb_model_heat.predict(xgb.DMatrix(test_X[feature_cols]))\n", + "y_true_heat = test_y.values[:, 1]" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE: 1.7E-05\n", + "RMSE: 0.0041\n", + "MAE: 0.0024\n", + "MAPE: 3.61 %\n", + "R_2: 0.8188\n" + ] + } + ], + "source": [ + "MSE = mean_squared_error(y_true_heat, y_pred_heat)\n", + "RMSE = np.sqrt(mean_squared_error(y_true_heat, y_pred_heat))\n", + "MAE = mean_absolute_error(y_true_heat, y_pred_heat)\n", + "MAPE = mean_absolute_percentage_error(y_true_heat, y_pred_heat)\n", + "R_2 = r2_score(y_true_heat, y_pred_heat)\n", + "print('MSE:', format(MSE, '.1E'))\n", + "print('RMSE:', round(RMSE, 4))\n", + "print('MAE:', round(MAE, 4))\n", + "print('MAPE:', round(MAPE*100, 2), '%')\n", + "print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差a" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "pd.DataFrame.from_records([y_true_heat, y_pred_heat]).T.to_csv('./供热测试结果.csv', index=False, encoding='utf-8-sig')" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "gb_model_heat.save_model('./models/heat_model.txt')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "### 煤种标准化工程" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + } + ], + "source": [ + "new_values = use_data.groupby(['煤种', '入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)'])['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ)\n煤种 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%) \n无烟煤 19827.00 11.18 2539.00 0.561424 0.087794\n烟煤 16733.00 22.53 27.46 0.441511 0.064259\n 16740.00 18.99 37.00 0.487225 0.064535\n 27.93 24.43 0.418457 0.064747\n 16741.00 26.69 25.92 0.433679 0.061822\n... ... ...\n贫煤 21938.00 13.40 22.58 0.615856 0.099905\n 22042.72 12.96 25.69 0.636563 0.079468\n 22149.00 12.43 25.10 0.629733 0.082772\n 22272.51 11.83 22.97 0.627877 0.083234\n 22475.97 8.90 23.98 0.620331 0.086574\n\n[3579 rows x 2 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)
煤种入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)
无烟煤19827.0011.182539.000.5614240.087794
烟煤16733.0022.5327.460.4415110.064259
16740.0018.9937.000.4872250.064535
27.9324.430.4184570.064747
16741.0026.6925.920.4336790.061822
..................
贫煤21938.0013.4022.580.6158560.099905
22042.7212.9625.690.6365630.079468
22149.0012.4325.100.6297330.082772
22272.5111.8322.970.6278770.083234
22475.978.9023.980.6203310.086574
\n

3579 rows × 2 columns

\n
" + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_values" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " 煤种 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%)\n0 无烟煤 19827.00 11.18 2539.00\n1 烟煤 16733.00 22.53 27.46\n2 烟煤 16740.00 18.99 37.00\n3 烟煤 16740.00 27.93 24.43\n4 烟煤 16741.00 26.69 25.92\n... ... ... ... ...\n3574 贫煤 21938.00 13.40 22.58\n3575 贫煤 22042.72 12.96 25.69\n3576 贫煤 22149.00 12.43 25.10\n3577 贫煤 22272.51 11.83 22.97\n3578 贫煤 22475.97 8.90 23.98\n\n[3579 rows x 4 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
煤种入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)
0无烟煤19827.0011.182539.00
1烟煤16733.0022.5327.46
2烟煤16740.0018.9937.00
3烟煤16740.0027.9324.43
4烟煤16741.0026.6925.92
...............
3574贫煤21938.0013.4022.58
3575贫煤22042.7212.9625.69
3576贫煤22149.0012.4325.10
3577贫煤22272.5111.8322.97
3578贫煤22475.978.9023.98
\n

3579 rows × 4 columns

\n
" + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "coal_df = new_values.reset_index().drop(columns=['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'])\n", + "coal_df" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "coal_params_dict = dict()\n", + "for coal_type in coal_df['煤种'].unique().tolist():\n", + " options = coal_df[coal_df['煤种']==coal_type][['入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)']].values\n", + " coal_params_dict[coal_type] = options" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " 企业名称 机组编号 铭牌容量 (MW) 机组类型 参数分类 冷凝器型式 入炉煤低位热值(kJ/kg) \\\n0 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 21602.05000 \n1 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 21926.81000 \n2 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 21261.93062 \n3 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 20840.00000 \n4 江苏利港电力有限公司 1 350.0 凝气式 亚临界 水冷-开式循环 20706.00000 \n... ... ... ... ... ... ... ... \n5689 浙江浙能电力股份有限公司台州发电厂 8 350.0 凝气式 亚临界 水冷-开式循环 21973.00000 \n5690 浙江浙能电力股份有限公司台州发电厂 8 350.0 凝气式 亚临界 水冷-开式循环 21372.00000 \n5691 浙江浙能电力股份有限公司台州发电厂 8 350.0 凝气式 亚临界 水冷-开式循环 20856.00000 \n5692 榆能榆神热电有限公司 1 350.0 抽凝式 超临界 间接空冷 25514.00000 \n5693 榆能榆神热电有限公司 2 350.0 抽凝式 超临界 间接空冷 25514.00000 \n\n 燃煤挥发份Var(%) 燃煤灰份Aar(%) 煤种 所处地区 longitude latitude altitude \\\n0 26.09 16.80 烟煤 江苏省 120.096620 31.942361 1 \n1 26.68 15.41 烟煤 江苏省 120.096620 31.942361 1 \n2 26.46 15.18 烟煤 江苏省 120.096620 31.942361 1 \n3 26.43 14.55 烟煤 江苏省 120.096620 31.942361 1 \n4 26.43 14.96 烟煤 江苏省 120.096620 31.942361 1 \n... ... ... .. ... ... ... ... \n5689 37.43 17.12 烟煤 浙江省 121.465840 28.704623 73 \n5690 39.87 18.01 烟煤 浙江省 121.465840 28.704623 73 \n5691 39.32 19.74 烟煤 浙江省 121.465840 28.704623 73 \n5692 38.84 7.28 烟煤 陕西省 109.820265 38.304383 1151 \n5693 38.84 7.28 烟煤 陕西省 109.820265 38.304383 1151 \n\n 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) 核心设备类型 汽轮机类型 冷却方式 \n0 0.586990 0.076843 煤粉锅炉 凝气式 水冷-开式循环 \n1 0.632859 0.077676 煤粉锅炉 凝气式 水冷-开式循环 \n2 0.609196 0.074823 煤粉锅炉 凝气式 水冷-开式循环 \n3 0.602178 0.081628 煤粉锅炉 凝气式 水冷-开式循环 \n4 0.590254 0.081103 煤粉锅炉 凝气式 水冷-开式循环 \n... ... ... ... ... ... \n5689 0.628300 0.078776 煤粉锅炉 凝气式 水冷-开式循环 \n5690 0.595019 0.076622 煤粉锅炉 凝气式 水冷-开式循环 \n5691 0.565718 0.074772 煤粉锅炉 凝气式 水冷-开式循环 \n5692 0.664456 0.091482 煤粉锅炉 抽凝式 空冷-间接空冷 \n5693 0.661759 0.091483 煤粉锅炉 抽凝式 空冷-间接空冷 \n\n[5041 rows x 19 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
企业名称机组编号铭牌容量 (MW)机组类型参数分类冷凝器型式入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)煤种所处地区longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)核心设备类型汽轮机类型冷却方式
0江苏利港电力有限公司1350.0凝气式亚临界水冷-开式循环21602.0500026.0916.80烟煤江苏省120.09662031.94236110.5869900.076843煤粉锅炉凝气式水冷-开式循环
1江苏利港电力有限公司1350.0凝气式亚临界水冷-开式循环21926.8100026.6815.41烟煤江苏省120.09662031.94236110.6328590.077676煤粉锅炉凝气式水冷-开式循环
2江苏利港电力有限公司1350.0凝气式亚临界水冷-开式循环21261.9306226.4615.18烟煤江苏省120.09662031.94236110.6091960.074823煤粉锅炉凝气式水冷-开式循环
3江苏利港电力有限公司1350.0凝气式亚临界水冷-开式循环20840.0000026.4314.55烟煤江苏省120.09662031.94236110.6021780.081628煤粉锅炉凝气式水冷-开式循环
4江苏利港电力有限公司1350.0凝气式亚临界水冷-开式循环20706.0000026.4314.96烟煤江苏省120.09662031.94236110.5902540.081103煤粉锅炉凝气式水冷-开式循环
............................................................
5689浙江浙能电力股份有限公司台州发电厂8350.0凝气式亚临界水冷-开式循环21973.0000037.4317.12烟煤浙江省121.46584028.704623730.6283000.078776煤粉锅炉凝气式水冷-开式循环
5690浙江浙能电力股份有限公司台州发电厂8350.0凝气式亚临界水冷-开式循环21372.0000039.8718.01烟煤浙江省121.46584028.704623730.5950190.076622煤粉锅炉凝气式水冷-开式循环
5691浙江浙能电力股份有限公司台州发电厂8350.0凝气式亚临界水冷-开式循环20856.0000039.3219.74烟煤浙江省121.46584028.704623730.5657180.074772煤粉锅炉凝气式水冷-开式循环
5692榆能榆神热电有限公司1350.0抽凝式超临界间接空冷25514.0000038.847.28烟煤陕西省109.82026538.30438311510.6644560.091482煤粉锅炉抽凝式空冷-间接空冷
5693榆能榆神热电有限公司2350.0抽凝式超临界间接空冷25514.0000038.847.28烟煤陕西省109.82026538.30438311510.6617590.091483煤粉锅炉抽凝式空冷-间接空冷
\n

5041 rows × 19 columns

\n
" + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "use_data" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + }, + { + "data": { + "text/plain": " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude 煤种 \\\n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.471140 31.065113 3 烟煤 \n1 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.471140 31.065113 3 烟煤 \n2 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.471140 31.065113 3 烟煤 \n3 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.471140 31.065113 3 烟煤 \n4 上海市 凝气式 亚临界 水冷-开式循环 320.0 121.601480 31.358794 2 褐煤 \n... ... ... ... ... ... ... ... ... .. \n3789 黑龙江省 纯凝式 超高压 水冷 200.0 126.575647 45.918566 118 褐煤 \n3790 黑龙江省 纯凝式 超高压 水冷 210.0 131.695864 46.580444 91 褐煤 \n3791 黑龙江省 背压式 超高压 水冷-开式循环 200.0 123.639146 47.210696 151 褐煤 \n3792 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 129.604803 44.608202 250 褐煤 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 129.604803 44.608202 250 褐煤 \n\n 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) \n0 0.623923 0.078064 \n1 0.639474 0.079308 \n2 0.635351 0.078691 \n3 0.674456 0.085853 \n4 0.506816 0.060934 \n... ... ... \n3789 0.500172 0.064200 \n3790 0.518301 0.063249 \n3791 0.224312 0.053770 \n3792 0.290814 0.068027 \n3793 0.321635 0.067798 \n\n[3794 rows x 11 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitude煤种发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)
0上海市凝气式亚临界水冷-开式循环300.0121.47114031.0651133烟煤0.6239230.078064
1上海市凝气式亚临界水冷-开式循环300.0121.47114031.0651133烟煤0.6394740.079308
2上海市凝气式亚临界水冷-开式循环300.0121.47114031.0651133烟煤0.6353510.078691
3上海市凝气式亚临界水冷-开式循环300.0121.47114031.0651133烟煤0.6744560.085853
4上海市凝气式亚临界水冷-开式循环320.0121.60148031.3587942褐煤0.5068160.060934
....................................
3789黑龙江省纯凝式超高压水冷200.0126.57564745.918566118褐煤0.5001720.064200
3790黑龙江省纯凝式超高压水冷210.0131.69586446.58044491褐煤0.5183010.063249
3791黑龙江省背压式超高压水冷-开式循环200.0123.63914647.210696151褐煤0.2243120.053770
3792黑龙江省背压式超高压水冷-闭式循环215.0129.60480344.608202250褐煤0.2908140.068027
3793黑龙江省背压式超高压水冷-闭式循环215.0129.60480344.608202250褐煤0.3216350.067798
\n

3794 rows × 11 columns

\n
" + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_use_data = use_data.groupby(use_cols+['煤种'])['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'].mean().reset_index().drop(columns=['入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)'])\n", + "new_use_data" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_use_data['coal_params'] = new_use_data['煤种'].apply(lambda x: coal_params_dict.get(x))" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_use_data.drop(columns='煤种', inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_data = new_use_data.explode(column='coal_params')" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.471140 31.065113 3 \n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.471140 31.065113 3 \n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.471140 31.065113 3 \n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.471140 31.065113 3 \n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.471140 31.065113 3 \n... ... ... ... ... ... ... ... ... \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 129.604803 44.608202 250 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 129.604803 44.608202 250 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 129.604803 44.608202 250 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 129.604803 44.608202 250 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 129.604803 44.608202 250 \n\n 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) coal_params \n0 0.623923 0.078064 [16733.0, 22.53, 27.46] \n0 0.623923 0.078064 [16740.0, 18.99, 37.0] \n0 0.623923 0.078064 [16740.0, 27.93, 24.43] \n0 0.623923 0.078064 [16741.0, 26.69, 25.92] \n0 0.623923 0.078064 [16741.51, 19.51, 35.62] \n... ... ... ... \n3793 0.321635 0.067798 [16723.0, 40.63, 39.94] \n3793 0.321635 0.067798 [16725.0, 26.36, 28.51] \n3793 0.321635 0.067798 [16725.19, 34.59, 37.71] \n3793 0.321635 0.067798 [16725.85, 43.2, 12.0] \n3793 0.321635 0.067798 [16729.0, 51.42, 17.33] \n\n[8019537 rows x 11 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)coal_params
0上海市凝气式亚临界水冷-开式循环300.0121.47114031.06511330.6239230.078064[16733.0, 22.53, 27.46]
0上海市凝气式亚临界水冷-开式循环300.0121.47114031.06511330.6239230.078064[16740.0, 18.99, 37.0]
0上海市凝气式亚临界水冷-开式循环300.0121.47114031.06511330.6239230.078064[16740.0, 27.93, 24.43]
0上海市凝气式亚临界水冷-开式循环300.0121.47114031.06511330.6239230.078064[16741.0, 26.69, 25.92]
0上海市凝气式亚临界水冷-开式循环300.0121.47114031.06511330.6239230.078064[16741.51, 19.51, 35.62]
....................................
3793黑龙江省背压式超高压水冷-闭式循环215.0129.60480344.6082022500.3216350.067798[16723.0, 40.63, 39.94]
3793黑龙江省背压式超高压水冷-闭式循环215.0129.60480344.6082022500.3216350.067798[16725.0, 26.36, 28.51]
3793黑龙江省背压式超高压水冷-闭式循环215.0129.60480344.6082022500.3216350.067798[16725.19, 34.59, 37.71]
3793黑龙江省背压式超高压水冷-闭式循环215.0129.60480344.6082022500.3216350.067798[16725.85, 43.2, 12.0]
3793黑龙江省背压式超高压水冷-闭式循环215.0129.60480344.6082022500.3216350.067798[16729.0, 51.42, 17.33]
\n

8019537 rows × 11 columns

\n
" + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_data" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_data['入炉煤低位热值(kJ/kg)'] = new_data.coal_params.apply(lambda x: x[0]).values\n", + "new_data['燃煤挥发份Var(%)'] = new_data.coal_params.apply(lambda x: x[1]).values\n", + "new_data['燃煤灰份Aar(%)'] = new_data.coal_params.apply(lambda x: x[2]).values" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "norm_data = new_data.drop(columns='coal_params')" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.47114 31.065113 3 \n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.47114 31.065113 3 \n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.47114 31.065113 3 \n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.47114 31.065113 3 \n0 上海市 凝气式 亚临界 水冷-开式循环 300.0 121.47114 31.065113 3 \n\n 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%) \n0 0.623923 0.078064 16733.00 22.53 27.46 \n0 0.623923 0.078064 16740.00 18.99 37.00 \n0 0.623923 0.078064 16740.00 27.93 24.43 \n0 0.623923 0.078064 16741.00 26.69 25.92 \n0 0.623923 0.078064 16741.51 19.51 35.62 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)
0上海市凝气式亚临界水冷-开式循环300.0121.4711431.06511330.6239230.07806416733.0022.5327.46
0上海市凝气式亚临界水冷-开式循环300.0121.4711431.06511330.6239230.07806416740.0018.9937.00
0上海市凝气式亚临界水冷-开式循环300.0121.4711431.06511330.6239230.07806416740.0027.9324.43
0上海市凝气式亚临界水冷-开式循环300.0121.4711431.06511330.6239230.07806416741.0026.6925.92
0上海市凝气式亚临界水冷-开式循环300.0121.4711431.06511330.6239230.07806416741.5119.5135.62
\n
" + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "norm_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "for col in num_cols:\n", + " norm_data[col] = np.log1p(norm_data[col])\n", + " # total_data[col] = (total_data[col] - total_data[col].min()) / (total_data[col].max() - total_data[col].min())\n", + "norm_data_dummpy = pd.get_dummies(norm_data, columns=object_cols)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "norm_data_dummpy.drop(columns=['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'], inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_xgb_data = xgb.DMatrix(norm_data_dummpy[feature_cols])" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "norm_data['power_co2_factor'] = gb_model.predict(new_xgb_data)\n", + "norm_data['heat_co2_factor'] = gb_model_heat.predict(new_xgb_data)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "normaled_data = norm_data.drop(columns=['入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'])" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n0 上海市 凝气式 亚临界 水冷-开式循环 5.707110 4.807875 3.467769 1.386294 \n0 上海市 凝气式 亚临界 水冷-开式循环 5.707110 4.807875 3.467769 1.386294 \n0 上海市 凝气式 亚临界 水冷-开式循环 5.707110 4.807875 3.467769 1.386294 \n0 上海市 凝气式 亚临界 水冷-开式循环 5.707110 4.807875 3.467769 1.386294 \n0 上海市 凝气式 亚临界 水冷-开式循环 5.707110 4.807875 3.467769 1.386294 \n... ... ... ... ... ... ... ... ... \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 5.375278 4.872176 3.820088 5.525453 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 5.375278 4.872176 3.820088 5.525453 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 5.375278 4.872176 3.820088 5.525453 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 5.375278 4.872176 3.820088 5.525453 \n3793 黑龙江省 背压式 超高压 水冷-闭式循环 5.375278 4.872176 3.820088 5.525453 \n\n power_co2_factor heat_co2_factor \n0 0.063166 0.063012 \n0 0.062317 0.062422 \n0 0.062508 0.062922 \n0 0.062466 0.062950 \n0 0.062743 0.063012 \n... ... ... \n3793 0.067768 0.068277 \n3793 0.066563 0.066854 \n3793 0.068115 0.068242 \n3793 0.066680 0.066995 \n3793 0.067563 0.067869 \n\n[8019537 rows x 10 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitudepower_co2_factorheat_co2_factor
0上海市凝气式亚临界水冷-开式循环5.7071104.8078753.4677691.3862940.0631660.063012
0上海市凝气式亚临界水冷-开式循环5.7071104.8078753.4677691.3862940.0623170.062422
0上海市凝气式亚临界水冷-开式循环5.7071104.8078753.4677691.3862940.0625080.062922
0上海市凝气式亚临界水冷-开式循环5.7071104.8078753.4677691.3862940.0624660.062950
0上海市凝气式亚临界水冷-开式循环5.7071104.8078753.4677691.3862940.0627430.063012
.................................
3793黑龙江省背压式超高压水冷-闭式循环5.3752784.8721763.8200885.5254530.0677680.068277
3793黑龙江省背压式超高压水冷-闭式循环5.3752784.8721763.8200885.5254530.0665630.066854
3793黑龙江省背压式超高压水冷-闭式循环5.3752784.8721763.8200885.5254530.0681150.068242
3793黑龙江省背压式超高压水冷-闭式循环5.3752784.8721763.8200885.5254530.0666800.066995
3793黑龙江省背压式超高压水冷-闭式循环5.3752784.8721763.8200885.5254530.0675630.067869
\n

8019537 rows × 10 columns

\n
" + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "normaled_data" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "target_cols = ['power_co2_factor', 'heat_co2_factor']" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "save_data = normaled_data.groupby([x for x in normaled_data.columns if x not in target_cols])[target_cols].mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "save_data.reset_index().to_csv('./results/去煤种化数据.csv', encoding='utf-8-sig', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/两张表特征对齐.ipynb b/两张表特征对齐.ipynb new file mode 100644 index 0000000..0c79b8a --- /dev/null +++ b/两张表特征对齐.ipynb @@ -0,0 +1,591 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "8950aafd-80e8-4078-874c-966efdc4b0ac", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "50832980-f7e1-4a19-a5e0-b8a378ebd39b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
电厂名称机组编号铭牌容量 (MW)机组类型参数分类冷凝器型式入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)煤种所处地区longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)
0江苏利港电力有限公司1350.0纯凝式亚临界水冷21602.0500026.0916.80烟煤江苏省120.0966231.9423611.00.5869900.076843
1江苏利港电力有限公司1350.0纯凝式亚临界水冷21926.8100026.6815.41烟煤江苏省120.0966231.9423611.00.6328590.077676
2江苏利港电力有限公司1350.0纯凝式亚临界水冷21261.9306226.4615.18烟煤江苏省120.0966231.9423611.00.6091960.074823
\n", + "
" + ], + "text/plain": [ + " 电厂名称 机组编号 铭牌容量 (MW) 机组类型 参数分类 冷凝器型式 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) \\\n", + "0 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 21602.05000 26.09 \n", + "1 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 21926.81000 26.68 \n", + "2 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 21261.93062 26.46 \n", + "\n", + " 燃煤灰份Aar(%) 煤种 所处地区 longitude latitude altitude 发电碳排放因子(kg/kWh) \\\n", + "0 16.80 烟煤 江苏省 120.09662 31.942361 1.0 0.586990 \n", + "1 15.41 烟煤 江苏省 120.09662 31.942361 1.0 0.632859 \n", + "2 15.18 烟煤 江苏省 120.09662 31.942361 1.0 0.609196 \n", + "\n", + " 供热碳排放因子(kg/MJ) \n", + "0 0.076843 \n", + "1 0.077676 \n", + "2 0.074823 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data = pd.read_csv('train_data.csv')\n", + "total_data.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "499cac72-c6a3-4b86-8aed-6fc010b12693", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(5741, 16)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7ab5d82e-19bd-4aa4-9cd6-d2004718b00d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
发电类型地区城市企业名称机组编号机组状态机组数量单机容量(MW)总容量(MW)核心设备类型汽轮机类型压力参数冷却方式
0煤电安徽省安庆市国能神皖安庆发电有限责任公司1在役1320.0320.0煤粉锅炉凝气式亚临界水冷-开式循环
1煤电安徽省安庆市国能神皖安庆发电有限责任公司2在役1320.0320.0煤粉锅炉凝气式亚临界水冷-开式循环
2煤电安徽省安庆市国能神皖安庆发电有限责任公司3在役11000.01000.0煤粉锅炉凝气式超超临界水冷-闭式循环
\n", + "
" + ], + "text/plain": [ + " 发电类型 地区 城市 企业名称 机组编号 机组状态 机组数量 单机容量(MW) 总容量(MW) 核心设备类型 \\\n", + "0 煤电 安徽省 安庆市 国能神皖安庆发电有限责任公司 1 在役 1 320.0 320.0 煤粉锅炉 \n", + "1 煤电 安徽省 安庆市 国能神皖安庆发电有限责任公司 2 在役 1 320.0 320.0 煤粉锅炉 \n", + "2 煤电 安徽省 安庆市 国能神皖安庆发电有限责任公司 3 在役 1 1000.0 1000.0 煤粉锅炉 \n", + "\n", + " 汽轮机类型 压力参数 冷却方式 \n", + "0 凝气式 亚临界 水冷-开式循环 \n", + "1 凝气式 亚临界 水冷-开式循环 \n", + "2 凝气式 超超临界 水冷-闭式循环 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "unit_data = pd.read_excel('./data/煤电机组情况(含企业名称).xlsx')\n", + "unit_data.head(3)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "c4d54203-5343-43df-b594-f6a13e6f47a1", + "metadata": {}, + "outputs": [], + "source": [ + "total_data.rename(columns={'电厂名称':'企业名称'}, inplace=True)\n", + "total_data['机组编号'] = total_data['机组编号'].astype('str')\n", + "unit_data['机组编号'] = unit_data['机组编号'].astype('str')" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "757e26c3-cd1b-48a3-9668-78e13f40436f", + "metadata": {}, + "outputs": [], + "source": [ + "def change_type(x:str):\n", + " if pd.isna(x):\n", + " return x\n", + " x = x.strip()\n", + " if '纯凝' in x:\n", + " return '纯凝式'\n", + " if '供热' in x:\n", + " return '供热式'\n", + " if '煤粉' in x:\n", + " return '煤粉锅炉'\n", + " if x.startswith('循环流化床'):\n", + " return '循环流化床锅炉'\n", + " if '三废' in x:\n", + " return '三废炉'\n", + " if '直接空冷' in x:\n", + " return '直接空冷'\n", + " if '间接空冷' in x:\n", + " return '间接空冷'\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "fcc7c556-ae7b-4be1-9163-709ce1ca084c", + "metadata": {}, + "outputs": [], + "source": [ + "merge_data = total_data.merge(unit_data[['企业名称','机组编号','汽轮机类型', '压力参数', '冷却方式']], how='left', on=['企业名称', '机组编号'])" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "3af6ad2f-a881-4ee6-9a27-ecbe75c97b31", + "metadata": {}, + "outputs": [], + "source": [ + "merge_data['机组类型'] = merge_data.apply(lambda x: x['机组类型'] if pd.isna(x['汽轮机类型']) else x['汽轮机类型'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "dec103bc-c868-4557-ba83-9bbb02f8e9f8", + "metadata": {}, + "outputs": [], + "source": [ + "merge_data['参数分类'] = merge_data.apply(lambda x: x['参数分类'] if pd.isna(x['压力参数']) else x['压力参数'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "454273f0-51ab-4a75-9c44-9ae8b7cc2a79", + "metadata": {}, + "outputs": [], + "source": [ + "merge_data['冷凝器型式'] = merge_data.apply(lambda x: x['冷凝器型式'] if pd.isna(x['冷却方式']) else x['冷却方式'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "d3c9cb26-63b4-4c72-9c5b-d90a2c5867ca", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "水冷-闭式循环 2143\n", + "水冷 1166\n", + "水冷-开式循环 1101\n", + "空冷-直接空冷 492\n", + "直接空冷 241\n", + "空冷-间接空冷 154\n", + "间接空冷 74\n", + "空冷 19\n", + "其他 2\n", + "Name: 冷凝器型式, dtype: int64" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merge_data['冷凝器型式'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "30b2d793-7b44-434a-96e3-c6ce15295881", + "metadata": {}, + "outputs": [], + "source": [ + "use_data = merge_data[merge_data.columns[:-3]].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "fbdf13c0-6174-463b-9dd0-9ed736e6d126", + "metadata": {}, + "outputs": [], + "source": [ + "for col in ['机组类型', '参数分类', '冷凝器型式']:\n", + " use_data[col] = use_data[col].apply(change_type)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "ff803c5a-5e56-462b-81fc-639877395d69", + "metadata": {}, + "outputs": [], + "source": [ + "use_data.to_excel('train_data.xlsx', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "28d8d579-d816-4117-8c49-a755fdffe1a1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
发电类型地区城市企业名称机组编号机组状态机组数量单机容量(MW)总容量(MW)核心设备类型汽轮机类型压力参数冷却方式
4026煤电山西省临汾市国家能源集团华北电力有限公司霍州发电厂1在役1600.0600.0煤粉锅炉凝气式超临界空冷-直接空冷
4027煤电山西省临汾市国家能源集团华北电力有限公司霍州发电厂2在役1600.0600.0煤粉锅炉凝气式超临界空冷-直接空冷
4056煤电山西省吕梁市霍州煤电集团吕梁山煤电有限公司方山发电厂1在役160.060.0循环流化床锅炉抽凝式高压空冷-直接空冷
4057煤电山西省吕梁市霍州煤电集团吕梁山煤电有限公司方山发电厂2在役160.060.0循环流化床锅炉抽凝式高压空冷-直接空冷
\n", + "
" + ], + "text/plain": [ + " 发电类型 地区 城市 企业名称 机组编号 机组状态 机组数量 单机容量(MW) 总容量(MW) \\\n", + "4026 煤电 山西省 临汾市 国家能源集团华北电力有限公司霍州发电厂 1 在役 1 600.0 600.0 \n", + "4027 煤电 山西省 临汾市 国家能源集团华北电力有限公司霍州发电厂 2 在役 1 600.0 600.0 \n", + "4056 煤电 山西省 吕梁市 霍州煤电集团吕梁山煤电有限公司方山发电厂 1 在役 1 60.0 60.0 \n", + "4057 煤电 山西省 吕梁市 霍州煤电集团吕梁山煤电有限公司方山发电厂 2 在役 1 60.0 60.0 \n", + "\n", + " 核心设备类型 汽轮机类型 压力参数 冷却方式 \n", + "4026 煤粉锅炉 凝气式 超临界 空冷-直接空冷 \n", + "4027 煤粉锅炉 凝气式 超临界 空冷-直接空冷 \n", + "4056 循环流化床锅炉 抽凝式 高压 空冷-直接空冷 \n", + "4057 循环流化床锅炉 抽凝式 高压 空冷-直接空冷 " + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "unit_data[unit_data['企业名称'].str.contains('霍州')]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20c531d0-62eb-4475-ab3a-3c8477f36a55", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/基于attention+LSTM对天数据建模.ipynb b/基于attention+LSTM对天数据建模.ipynb new file mode 100644 index 0000000..10dda4d --- /dev/null +++ b/基于attention+LSTM对天数据建模.ipynb @@ -0,0 +1,737 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "outputs": [ + { + "ename": "ImportError", + "evalue": "cannot import name 'get_config' from 'tensorflow.python.eager.context' (C:\\Users\\zhaojh\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\context.py)", + "output_type": "error", + "traceback": [ + "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[1;31mImportError\u001B[0m Traceback (most recent call last)", + "\u001B[1;32m~\\AppData\\Local\\Temp\\ipykernel_7812\\4169542727.py\u001B[0m in \u001B[0;36m\u001B[1;34m\u001B[0m\n\u001B[0;32m 1\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mnumpy\u001B[0m \u001B[1;32mas\u001B[0m \u001B[0mnp\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 2\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mpandas\u001B[0m \u001B[1;32mas\u001B[0m \u001B[0mpd\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m----> 3\u001B[1;33m \u001B[1;32mimport\u001B[0m \u001B[0mkeras\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 4\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0mkeras\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mlayers\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mDense\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mConv1D\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mInput\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mBidirectional\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mLSTM\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mMultiply\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mDropout\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mFlatten\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mSoftmax\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mLambda\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 5\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0mkeras\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mmodels\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mModel\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\keras\\__init__.py\u001B[0m in \u001B[0;36m\u001B[1;34m\u001B[0m\n\u001B[0;32m 23\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 24\u001B[0m \u001B[1;31m# See b/110718070#comment18 for more details about this import.\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m---> 25\u001B[1;33m \u001B[1;32mfrom\u001B[0m \u001B[0mkeras\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mmodels\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 26\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 27\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0mkeras\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mengine\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0minput_layer\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mInput\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\keras\\models.py\u001B[0m in \u001B[0;36m\u001B[1;34m\u001B[0m\n\u001B[0;32m 17\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 18\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mtensorflow\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mcompat\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mv2\u001B[0m \u001B[1;32mas\u001B[0m \u001B[0mtf\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m---> 19\u001B[1;33m \u001B[1;32mfrom\u001B[0m \u001B[0mkeras\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mbackend\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 20\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0mkeras\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mmetrics\u001B[0m \u001B[1;32mas\u001B[0m \u001B[0mmetrics_module\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 21\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0mkeras\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0moptimizer_v1\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\keras\\backend.py\u001B[0m in \u001B[0;36m\u001B[1;34m\u001B[0m\n\u001B[0;32m 35\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0mtensorflow\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mpython\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mdistribute\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mdistribute_coordinator\u001B[0m \u001B[1;32mas\u001B[0m \u001B[0mdc\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 36\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0mtensorflow\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mpython\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mdistribute\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mdistribute_coordinator_context\u001B[0m \u001B[1;32mas\u001B[0m \u001B[0mdc_context\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m---> 37\u001B[1;33m \u001B[1;32mfrom\u001B[0m \u001B[0mtensorflow\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mpython\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0meager\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mcontext\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mget_config\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 38\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0mtensorflow\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mpython\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mframework\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mconfig\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 39\u001B[0m \u001B[1;32mfrom\u001B[0m \u001B[0mkeras\u001B[0m \u001B[1;32mimport\u001B[0m \u001B[0mbackend_config\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;31mImportError\u001B[0m: cannot import name 'get_config' from 'tensorflow.python.eager.context' (C:\\Users\\zhaojh\\AppData\\Roaming\\Python\\Python37\\site-packages\\tensorflow\\python\\eager\\context.py)" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import keras\n", + "from keras.layers import Dense, Conv1D, Input, Bidirectional, LSTM, Multiply, Dropout, Flatten, Softmax, Lambda\n", + "from keras.models import Model" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "data = pd.read_csv('./train_data_processed.csv')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "obj_cols = data.columns[-32:]\n", + "num_cols = [x for x in data.columns if x not in obj_cols]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [], + "source": [ + "maxs = data[num_cols].max()\n", + "mins = data[num_cols].min()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [], + "source": [ + "for col in num_cols:\n", + " data[col] = (data[col] - mins[col]) / (maxs[col] - mins[col])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [], + "source": [ + "optim = keras.optimizers.Adam(learning_rate=5e-4)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [], + "source": [ + "def build_model(n_features, n_outs):\n", + " inputs = Input(shape=(1, n_features))\n", + " x = Conv1D(filters=64, kernel_size=1, activation='relu')(inputs)\n", + " x = Dropout(rate=0.1)(x)\n", + " lstm_out = Bidirectional(LSTM(units=128, return_sequences=True))(x)\n", + " attention_pre = Dense(1, name='attention_vec')(lstm_out)\n", + " attention_probs = Softmax()(attention_pre)\n", + " attention_mul = Multiply()([attention_probs, lstm_out])\n", + " attention_mul = Flatten()(attention_mul)\n", + " output = Dense(32, activation='relu')(attention_mul)\n", + " output = Dense(n_outs, activation='sigmoid')(output)\n", + " model = Model(inputs=[inputs], outputs=output)\n", + " model.summary()\n", + " model.compile(loss='mse', optimizer=optim,)\n", + " return model" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: \"model_1\"\n", + "__________________________________________________________________________________________________\n", + "Layer (type) Output Shape Param # Connected to \n", + "==================================================================================================\n", + "input_1 (InputLayer) (None, 1, 251) 0 \n", + "__________________________________________________________________________________________________\n", + "conv1d_1 (Conv1D) (None, 1, 64) 16128 input_1[0][0] \n", + "__________________________________________________________________________________________________\n", + "dropout_1 (Dropout) (None, 1, 64) 0 conv1d_1[0][0] \n", + "__________________________________________________________________________________________________\n", + "bidirectional_1 (Bidirectional) (None, 1, 256) 197632 dropout_1[0][0] \n", + "__________________________________________________________________________________________________\n", + "attention_vec (Dense) (None, 1, 1) 257 bidirectional_1[0][0] \n", + "__________________________________________________________________________________________________\n", + "softmax_1 (Softmax) (None, 1, 1) 0 attention_vec[0][0] \n", + "__________________________________________________________________________________________________\n", + "multiply_1 (Multiply) (None, 1, 256) 0 softmax_1[0][0] \n", + " bidirectional_1[0][0] \n", + "__________________________________________________________________________________________________\n", + "flatten_1 (Flatten) (None, 256) 0 multiply_1[0][0] \n", + "__________________________________________________________________________________________________\n", + "dense_1 (Dense) (None, 32) 8224 flatten_1[0][0] \n", + "__________________________________________________________________________________________________\n", + "dense_2 (Dense) (None, 1) 33 dense_1[0][0] \n", + "==================================================================================================\n", + "Total params: 222,274\n", + "Trainable params: 222,274\n", + "Non-trainable params: 0\n", + "__________________________________________________________________________________________________\n" + ] + } + ], + "source": [ + "model = build_model(len(data.columns) - 1, 1)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [], + "source": [ + "from tensorflow.python.keras.utils.vis_utils import plot_model" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": "" + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "plot_model(model, to_file='model.png')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 18, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 19, + "outputs": [ + { + "data": { + "text/plain": "251" + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "feature_cols = [x for x in data.columns if x != '燃料消耗量']\n", + "len(feature_cols)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 20, + "outputs": [], + "source": [ + "train_data, valid = train_test_split(data, test_size=0.2, shuffle=True, random_state=666)\n", + "valid_data, test_data = train_test_split(valid, test_size=0.5, shuffle=True, random_state=666)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 21, + "outputs": [], + "source": [ + "X_train, Y_train = train_data[feature_cols], train_data['燃料消耗量']\n", + "X_valid, Y_valid = valid_data[feature_cols], valid_data['燃料消耗量']\n", + "X_test, Y_test = test_data[feature_cols], test_data['燃料消耗量']" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 22, + "outputs": [ + { + "data": { + "text/plain": "((922, 1, 251), (922, 1))" + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "x_train = np.expand_dims(X_train.values, axis=1)\n", + "y_train = Y_train.values.reshape(-1, 1)\n", + "x_train.shape, y_train.shape" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 23, + "outputs": [], + "source": [ + "x_valid = np.expand_dims(X_valid.values, axis=1)\n", + "y_valid = Y_valid.values.reshape(-1, 1)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 24, + "outputs": [], + "source": [ + "x_test = np.expand_dims(X_test.values, axis=1)\n", + "y_test = Y_test.values.reshape(-1, 1)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 25, + "outputs": [], + "source": [ + "callbacks = [keras.callbacks.EarlyStopping(monitor='val_loss', patience=int(10)),\n", + " keras.callbacks.ModelCheckpoint('./best_model.h5', monitor='val_loss', verbose=0, save_best_only=False, save_weights_only=False, mode='auto', period=1)]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 26, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train on 922 samples, validate on 116 samples\n", + "Epoch 1/100\n", + "922/922 [==============================] - 1s 1ms/step - loss: 0.0396 - val_loss: 0.0128\n", + "Epoch 2/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0074 - val_loss: 0.0057\n", + "Epoch 3/100\n", + "922/922 [==============================] - 0s 78us/step - loss: 0.0048 - val_loss: 0.0040\n", + "Epoch 4/100\n", + "922/922 [==============================] - 0s 61us/step - loss: 0.0035 - val_loss: 0.0028\n", + "Epoch 5/100\n", + "922/922 [==============================] - 0s 77us/step - loss: 0.0030 - val_loss: 0.0023\n", + "Epoch 6/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0025 - val_loss: 0.0020\n", + "Epoch 7/100\n", + "922/922 [==============================] - 0s 86us/step - loss: 0.0023 - val_loss: 0.0020\n", + "Epoch 8/100\n", + "922/922 [==============================] - 0s 78us/step - loss: 0.0023 - val_loss: 0.0018\n", + "Epoch 9/100\n", + "922/922 [==============================] - 0s 67us/step - loss: 0.0022 - val_loss: 0.0017\n", + "Epoch 10/100\n", + "922/922 [==============================] - 0s 61us/step - loss: 0.0019 - val_loss: 0.0016\n", + "Epoch 11/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0019 - val_loss: 0.0016\n", + "Epoch 12/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0019 - val_loss: 0.0017\n", + "Epoch 13/100\n", + "922/922 [==============================] - 0s 52us/step - loss: 0.0019 - val_loss: 0.0017\n", + "Epoch 14/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0018 - val_loss: 0.0015\n", + "Epoch 15/100\n", + "922/922 [==============================] - 0s 87us/step - loss: 0.0018 - val_loss: 0.0015\n", + "Epoch 16/100\n", + "922/922 [==============================] - 0s 52us/step - loss: 0.0016 - val_loss: 0.0013\n", + "Epoch 17/100\n", + "922/922 [==============================] - 0s 68us/step - loss: 0.0015 - val_loss: 0.0014\n", + "Epoch 18/100\n", + "922/922 [==============================] - 0s 73us/step - loss: 0.0014 - val_loss: 0.0013\n", + "Epoch 19/100\n", + "922/922 [==============================] - 0s 52us/step - loss: 0.0017 - val_loss: 0.0014\n", + "Epoch 20/100\n", + "922/922 [==============================] - 0s 70us/step - loss: 0.0017 - val_loss: 0.0013\n", + "Epoch 21/100\n", + "922/922 [==============================] - 0s 73us/step - loss: 0.0015 - val_loss: 0.0013\n", + "Epoch 22/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0014 - val_loss: 0.0013\n", + "Epoch 23/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0014 - val_loss: 0.0012\n", + "Epoch 24/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0013 - val_loss: 0.0014\n", + "Epoch 25/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0013 - val_loss: 0.0012\n", + "Epoch 26/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0013 - val_loss: 0.0011\n", + "Epoch 27/100\n", + "922/922 [==============================] - 0s 65us/step - loss: 0.0013 - val_loss: 0.0012\n", + "Epoch 28/100\n", + "922/922 [==============================] - 0s 89us/step - loss: 0.0013 - val_loss: 0.0013\n", + "Epoch 29/100\n", + "922/922 [==============================] - 0s 52us/step - loss: 0.0013 - val_loss: 0.0011\n", + "Epoch 30/100\n", + "922/922 [==============================] - 0s 70us/step - loss: 0.0013 - val_loss: 0.0012\n", + "Epoch 31/100\n", + "922/922 [==============================] - 0s 60us/step - loss: 0.0014 - val_loss: 0.0013\n", + "Epoch 32/100\n", + "922/922 [==============================] - 0s 70us/step - loss: 0.0012 - val_loss: 0.0012\n", + "Epoch 33/100\n", + "922/922 [==============================] - 0s 51us/step - loss: 0.0013 - val_loss: 0.0012\n", + "Epoch 34/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0012 - val_loss: 0.0012\n", + "Epoch 35/100\n", + "922/922 [==============================] - 0s 60us/step - loss: 0.0013 - val_loss: 0.0014\n", + "Epoch 36/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0012 - val_loss: 0.0011\n", + "Epoch 37/100\n", + "922/922 [==============================] - 0s 62us/step - loss: 0.0011 - val_loss: 0.0011\n", + "Epoch 38/100\n", + "922/922 [==============================] - 0s 77us/step - loss: 0.0012 - val_loss: 0.0012\n", + "Epoch 39/100\n", + "922/922 [==============================] - 0s 60us/step - loss: 0.0013 - val_loss: 0.0011\n", + "Epoch 40/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0012 - val_loss: 0.0011\n", + "Epoch 41/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0012 - val_loss: 0.0014\n", + "Epoch 42/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0013 - val_loss: 0.0014\n", + "Epoch 43/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0013 - val_loss: 0.0011\n", + "Epoch 44/100\n", + "922/922 [==============================] - 0s 51us/step - loss: 0.0012 - val_loss: 0.0011\n", + "Epoch 45/100\n", + "922/922 [==============================] - 0s 95us/step - loss: 0.0011 - val_loss: 0.0011\n", + "Epoch 46/100\n", + "922/922 [==============================] - 0s 68us/step - loss: 0.0011 - val_loss: 0.0011\n", + "Epoch 47/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0011 - val_loss: 0.0012\n", + "Epoch 48/100\n", + "922/922 [==============================] - 0s 66us/step - loss: 0.0011 - val_loss: 0.0010\n", + "Epoch 49/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 9.9569e-04 - val_loss: 9.4151e-04\n", + "Epoch 50/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0010 - val_loss: 0.0011\n", + "Epoch 51/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0010 - val_loss: 0.0011\n", + "Epoch 52/100\n", + "922/922 [==============================] - 0s 77us/step - loss: 0.0010 - val_loss: 9.9602e-04\n", + "Epoch 53/100\n", + "922/922 [==============================] - 0s 52us/step - loss: 9.3358e-04 - val_loss: 0.0012\n", + "Epoch 54/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0010 - val_loss: 9.6337e-04\n", + "Epoch 55/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 9.9897e-04 - val_loss: 9.9428e-04\n", + "Epoch 56/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0010 - val_loss: 9.1976e-04\n", + "Epoch 57/100\n", + "922/922 [==============================] - 0s 60us/step - loss: 9.8333e-04 - val_loss: 0.0011\n", + "Epoch 58/100\n", + "922/922 [==============================] - 0s 86us/step - loss: 0.0011 - val_loss: 0.0010\n", + "Epoch 59/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 0.0010 - val_loss: 0.0010\n", + "Epoch 60/100\n", + "922/922 [==============================] - 0s 51us/step - loss: 9.6106e-04 - val_loss: 9.5494e-04\n", + "Epoch 61/100\n", + "922/922 [==============================] - 0s 87us/step - loss: 9.1071e-04 - val_loss: 8.9771e-04\n", + "Epoch 62/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 9.1379e-04 - val_loss: 9.4967e-04\n", + "Epoch 63/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 9.3075e-04 - val_loss: 9.1627e-04\n", + "Epoch 64/100\n", + "922/922 [==============================] - 0s 78us/step - loss: 8.8605e-04 - val_loss: 9.3663e-04\n", + "Epoch 65/100\n", + "922/922 [==============================] - 0s 69us/step - loss: 9.5708e-04 - val_loss: 0.0011\n", + "Epoch 66/100\n", + "922/922 [==============================] - 0s 68us/step - loss: 9.5701e-04 - val_loss: 8.9826e-04\n", + "Epoch 67/100\n", + "922/922 [==============================] - 0s 60us/step - loss: 9.4454e-04 - val_loss: 0.0011\n", + "Epoch 68/100\n", + "922/922 [==============================] - 0s 74us/step - loss: 9.5393e-04 - val_loss: 9.7981e-04\n", + "Epoch 69/100\n", + "922/922 [==============================] - 0s 104us/step - loss: 9.5125e-04 - val_loss: 0.0010\n", + "Epoch 70/100\n", + "922/922 [==============================] - 0s 78us/step - loss: 9.5720e-04 - val_loss: 9.7615e-04\n", + "Epoch 71/100\n", + "922/922 [==============================] - 0s 64us/step - loss: 9.2241e-04 - val_loss: 0.0010\n" + ] + }, + { + "data": { + "text/plain": "" + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model.fit(x_train, y_train, epochs=100, batch_size=32, validation_data=(x_test, y_test), shuffle=True,\n", + " callbacks=callbacks)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 27, + "outputs": [], + "source": [ + "y_pred = model.predict(x_test)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 28, + "outputs": [ + { + "data": { + "text/plain": "array([0.30161506, 0.12067786, 0.43900865, 0.4143401 , 0.11434203,\n 0.87028706, 0.15387392, 0.86223227, 0.8570186 , 0.4433931 ,\n 0.7649788 , 0.36369222, 0.33063045, 0.7437426 , 0.3493362 ,\n 0.7671248 , 0.7743846 , 0.12363896, 0.4528606 , 0.75037146,\n 0.45131576, 0.34990048, 0.7552419 , 0.7508755 , 0.7558205 ,\n 0.3391131 , 0.76618046, 0.38217723, 0.30887872, 0.36930698,\n 0.3591324 , 0.83817935, 0.9321221 , 0.35430533, 0.11836711,\n 0.764429 , 0.7478696 , 0.74976325, 0.3656214 , 0.3482211 ,\n 0.3658831 , 0.35415024, 0.29030812, 0.7965492 , 0.9372817 ,\n 0.11179626, 0.33758143, 0.305908 , 0.12149343, 0.41378874,\n 0.09611899, 0.36266702, 0.76215094, 0.41939664, 0.7642038 ,\n 0.36630815, 0.36369124, 0.775969 , 0.7431689 , 0.49149197,\n 0.35072863, 0.7608663 , 0.88904417, 0.11546668, 0.32508087,\n 0.78478754, 0.2949888 , 0.9328996 , 0.26087016, 0.15387377,\n 0.7867287 , 0.35840425, 0.8485855 , 0.36474293, 0.86086893,\n 0.85082245, 0.37929475, 0.88898706, 0.44798538, 0.74498856,\n 0.7642088 , 0.9374167 , 0.24628928, 0.1150094 , 0.35409844,\n 0.34573317, 0.1182591 , 0.35334843, 0.8806509 , 0.3744196 ,\n 0.12240422, 0.7410463 , 0.3571657 , 0.44970232, 0.8927134 ,\n 0.76465344, 0.7640152 , 0.33744502, 0.7715051 , 0.44094718,\n 0.33831298, 0.93699497, 0.30656263, 0.10126469, 0.8242742 ,\n 0.85100025, 0.42451733, 0.362445 , 0.77677643, 0.40487826,\n 0.78558755, 0.339495 , 0.8240729 , 0.7534524 , 0.93057597,\n 0.3128613 ], dtype=float32)" + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred = np.squeeze(y_pred)\n", + "y_pred" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 29, + "outputs": [], + "source": [ + "from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 30, + "outputs": [], + "source": [ + "y_true = np.squeeze(y_test)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 31, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE: 1.02E-03\n", + "RMSE: 0.032\n", + "MAE: 0.0236\n", + "MAPE: 7.07%\n", + "R_2: 0.9858\n" + ] + } + ], + "source": [ + "MSE = mean_squared_error(y_true, y_pred)\n", + "RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n", + "MAE = mean_absolute_error(y_true, y_pred)\n", + "MAPE = mean_absolute_percentage_error(y_true, y_pred)\n", + "R_2 = r2_score(y_true, y_pred)\n", + "print(f\"MSE: {format(MSE, '.2E')}\")\n", + "print(f'RMSE: {round(RMSE, 4)}')\n", + "print(f'MAE: {round(MAE, 4)}')\n", + "print(f'MAPE: {round(MAPE * 100, 2)}%')\n", + "print(f'R_2: {round(R_2, 4)}')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 32, + "outputs": [], + "source": [ + "def recover(x, col='燃料消耗量'):\n", + " return np.expm1(x * (maxs[col] - mins[col]) + mins[col])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 33, + "outputs": [], + "source": [ + "y_true_recover = recover(y_true)\n", + "y_pred_recover = recover(y_pred)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 34, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE: 7.76E+04\n", + "RMSE: 278.5442\n", + "MAE: 166.5543\n", + "MAPE: 9.54%\n", + "R_2: 0.9717\n" + ] + } + ], + "source": [ + "MSE = mean_squared_error(y_true_recover, y_pred_recover)\n", + "RMSE = np.sqrt(mean_squared_error(y_true_recover, y_pred_recover))\n", + "MAE = mean_absolute_error(y_true_recover, y_pred_recover)\n", + "MAPE = mean_absolute_percentage_error(y_true_recover, y_pred_recover)\n", + "R_2 = r2_score(y_true_recover, y_pred_recover)\n", + "print(f\"MSE: {format(MSE, '.2E')}\")\n", + "print(f'RMSE: {round(RMSE, 4)}')\n", + "print(f'MAE: {round(MAE, 4)}')\n", + "print(f'MAPE: {round(MAPE * 100, 2)}%')\n", + "print(f'R_2: {round(R_2, 4)}')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/基于煤种标准化的数据建模及预测.ipynb b/基于煤种标准化的数据建模及预测.ipynb new file mode 100644 index 0000000..68e1e5d --- /dev/null +++ b/基于煤种标准化的数据建模及预测.ipynb @@ -0,0 +1,1536 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import xgboost as xgb\n", + "import seaborn as sns\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n1 上海市 凝气式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n2 上海市 凝气式 亚临界 水冷 5.771441 4.808939 3.476886 1.098612 \n3 上海市 凝气式 超超临界 水冷 6.908755 4.807356 3.458373 1.609438 \n4 上海市 纯凝式 亚临界 水冷 5.860786 4.807839 3.478627 2.833213 \n\n power_co2_factor heat_co2_factor \n0 0.574332 0.072680 \n1 0.582164 0.072391 \n2 0.569281 0.071041 \n3 0.506250 0.070460 \n4 0.565226 0.073717 ", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitudepower_co2_factorheat_co2_factor
0上海市供热式亚临界水冷5.7071104.8078753.4677691.3862940.5743320.072680
1上海市凝气式亚临界水冷5.7071104.8078753.4677691.3862940.5821640.072391
2上海市凝气式亚临界水冷5.7714414.8089393.4768861.0986120.5692810.071041
3上海市凝气式超超临界水冷6.9087554.8073563.4583731.6094380.5062500.070460
4上海市纯凝式亚临界水冷5.8607864.8078393.4786272.8332130.5652260.073717
\n
" + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_csv('./results/去煤种化数据.csv')\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "(['所处地区', '机组类型', '参数分类', '冷凝器型式'],\n Index(['铭牌容量 (MW)', 'longitude', 'latitude', 'altitude'], dtype='object'))" + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "object_cols = data.columns[:4].tolist()\n", + "num_cols = data.columns[4:8]\n", + "object_cols, num_cols" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "test_data = pd.read_excel('./data/煤电机组情况(含企业名称).xlsx')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "test_geo_info = pd.read_excel('./data/电厂地理信息.xlsx')\n", + "test_geo_info.rename(columns={'name':'企业名称'}, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "test_data = test_data.merge(test_geo_info, how='left', on='企业名称').drop(columns='address')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "test_data_cp = test_data.copy()\n", + "test_data = test_data[['地区', '汽轮机类型', '压力参数', '冷却方式', '单机容量(MW)', 'lat', 'lng', 'altitude']].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "test_data.columns = data.columns[:8].tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "test_data['na_cols'] = test_data.isna().sum(axis=1).values" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "test_data = test_data[test_data['铭牌容量 (MW)']>=30].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "0.965160147200342" + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_data[test_data.na_cols <= 1]['铭牌容量 (MW)'].sum() /10 / 112228" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_test_data = test_data[test_data.na_cols <= 1].drop(columns='na_cols').reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "水冷 413\n空冷 110\n其他 1\nName: 冷凝器型式, dtype: int64" + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data['冷凝器型式'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "水冷-闭式循环 1442\n水冷-开式循环 737\n空冷-直接空冷 497\n其他 255\n空冷-间接空冷 221\n水冷 52\n空冷 14\n间接空冷 4\n直接空冷 2\nName: 冷凝器型式, dtype: int64" + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_test_data['冷凝器型式'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def change_type(x:str):\n", + " if '水冷' in x:\n", + " return '水冷'\n", + " elif '空冷' in x:\n", + " return \"空冷\"\n", + " else:\n", + " return '其他'" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_test_data.fillna('其他', inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_test_data['冷凝器型式'] = new_test_data['冷凝器型式'].apply(change_type)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "亚临界 265\n超临界 156\n超超临界 69\n超高压 32\n高压 2\nName: 参数分类, dtype: int64" + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data['参数分类'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "亚临界 1072\n高压 726\n超临界 608\n超高压 403\n超超临界 358\n中压 57\nName: 参数分类, dtype: int64" + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_test_data['参数分类'].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_test_data['机组类型'] = new_test_data['机组类型'].apply(lambda x: x if x.endswith('式') else x + '式')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "for col in num_cols:\n", + " new_test_data[col] = new_test_data[col].apply(lambda x: 0 if x<0 else x)\n", + " new_test_data[col] = np.log1p(new_test_data[col])" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude\n0 安徽省 凝气式 亚临界 水冷 5.771441 3.451583 4.772094 2.397895\n1 安徽省 凝气式 亚临界 水冷 5.771441 3.451583 4.772094 2.397895\n2 安徽省 凝气式 超超临界 水冷 6.908755 3.451583 4.772094 2.397895\n3 安徽省 凝气式 超超临界 水冷 6.908755 3.451583 4.772094 2.397895\n4 安徽省 抽凝式 高压 水冷 3.713572 3.451583 4.772094 2.397895\n... ... ... ... ... ... ... ... ...\n3219 重庆市 抽背式 高压 其他 3.931826 3.427489 4.682353 5.645447\n3220 重庆市 抽背式 高压 其他 3.931826 3.427489 4.682353 5.645447\n3221 重庆市 抽凝式 高压 水冷 3.912023 3.427489 4.682353 5.645447\n3222 重庆市 背压式 高压 其他 3.433987 3.428715 4.682208 5.690359\n3223 重庆市 抽凝式 高压 水冷 4.836282 3.428715 4.682208 5.690359\n\n[3224 rows x 8 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitude
0安徽省凝气式亚临界水冷5.7714413.4515834.7720942.397895
1安徽省凝气式亚临界水冷5.7714413.4515834.7720942.397895
2安徽省凝气式超超临界水冷6.9087553.4515834.7720942.397895
3安徽省凝气式超超临界水冷6.9087553.4515834.7720942.397895
4安徽省抽凝式高压水冷3.7135723.4515834.7720942.397895
...........................
3219重庆市抽背式高压其他3.9318263.4274894.6823535.645447
3220重庆市抽背式高压其他3.9318263.4274894.6823535.645447
3221重庆市抽凝式高压水冷3.9120233.4274894.6823535.645447
3222重庆市背压式高压其他3.4339873.4287154.6822085.690359
3223重庆市抽凝式高压水冷4.8362823.4287154.6822085.690359
\n

3224 rows × 8 columns

\n
" + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_test_data" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n1 上海市 凝气式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n2 上海市 凝气式 亚临界 水冷 5.771441 4.808939 3.476886 1.098612 \n3 上海市 凝气式 超超临界 水冷 6.908755 4.807356 3.458373 1.609438 \n4 上海市 纯凝式 亚临界 水冷 5.860786 4.807839 3.478627 2.833213 \n... ... ... ... ... ... ... ... ... \n3219 重庆市 抽背式 高压 其他 3.931826 3.427489 4.682353 5.645447 \n3220 重庆市 抽背式 高压 其他 3.931826 3.427489 4.682353 5.645447 \n3221 重庆市 抽凝式 高压 水冷 3.912023 3.427489 4.682353 5.645447 \n3222 重庆市 背压式 高压 其他 3.433987 3.428715 4.682208 5.690359 \n3223 重庆市 抽凝式 高压 水冷 4.836282 3.428715 4.682208 5.690359 \n\n power_co2_factor heat_co2_factor \n0 0.574332 0.072680 \n1 0.582164 0.072391 \n2 0.569281 0.071041 \n3 0.506250 0.070460 \n4 0.565226 0.073717 \n... ... ... \n3219 NaN NaN \n3220 NaN NaN \n3221 NaN NaN \n3222 NaN NaN \n3223 NaN NaN \n\n[3748 rows x 10 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitudepower_co2_factorheat_co2_factor
0上海市供热式亚临界水冷5.7071104.8078753.4677691.3862940.5743320.072680
1上海市凝气式亚临界水冷5.7071104.8078753.4677691.3862940.5821640.072391
2上海市凝气式亚临界水冷5.7714414.8089393.4768861.0986120.5692810.071041
3上海市凝气式超超临界水冷6.9087554.8073563.4583731.6094380.5062500.070460
4上海市纯凝式亚临界水冷5.8607864.8078393.4786272.8332130.5652260.073717
.................................
3219重庆市抽背式高压其他3.9318263.4274894.6823535.645447NaNNaN
3220重庆市抽背式高压其他3.9318263.4274894.6823535.645447NaNNaN
3221重庆市抽凝式高压水冷3.9120233.4274894.6823535.645447NaNNaN
3222重庆市背压式高压其他3.4339873.4287154.6822085.690359NaNNaN
3223重庆市抽凝式高压水冷4.8362823.4287154.6822085.690359NaNNaN
\n

3748 rows × 10 columns

\n
" + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "merge_data = pd.concat([data, new_test_data], axis=0)\n", + "merge_data" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " 铭牌容量 (MW) longitude latitude altitude power_co2_factor \\\n0 5.707110 4.807875 3.467769 1.386294 0.574332 \n1 5.707110 4.807875 3.467769 1.386294 0.582164 \n2 5.771441 4.808939 3.476886 1.098612 0.569281 \n3 6.908755 4.807356 3.458373 1.609438 0.506250 \n4 5.860786 4.807839 3.478627 2.833213 0.565226 \n... ... ... ... ... ... \n3219 3.931826 3.427489 4.682353 5.645447 NaN \n3220 3.931826 3.427489 4.682353 5.645447 NaN \n3221 3.912023 3.427489 4.682353 5.645447 NaN \n3222 3.433987 3.428715 4.682208 5.690359 NaN \n3223 4.836282 3.428715 4.682208 5.690359 NaN \n\n heat_co2_factor 所处地区_上海市 所处地区_云南省 所处地区_内蒙古 所处地区_内蒙古自治区 ... \\\n0 0.072680 1 0 0 0 ... \n1 0.072391 1 0 0 0 ... \n2 0.071041 1 0 0 0 ... \n3 0.070460 1 0 0 0 ... \n4 0.073717 1 0 0 0 ... \n... ... ... ... ... ... ... \n3219 NaN 0 0 0 0 ... \n3220 NaN 0 0 0 0 ... \n3221 NaN 0 0 0 0 ... \n3222 NaN 0 0 0 0 ... \n3223 NaN 0 0 0 0 ... \n\n 机组类型_背压式 参数分类_中压 参数分类_亚临界 参数分类_超临界 参数分类_超超临界 参数分类_超高压 参数分类_高压 \\\n0 0 0 1 0 0 0 0 \n1 0 0 1 0 0 0 0 \n2 0 0 1 0 0 0 0 \n3 0 0 0 0 1 0 0 \n4 0 0 1 0 0 0 0 \n... ... ... ... ... ... ... ... \n3219 0 0 0 0 0 0 1 \n3220 0 0 0 0 0 0 1 \n3221 0 0 0 0 0 0 1 \n3222 1 0 0 0 0 0 1 \n3223 0 0 0 0 0 0 1 \n\n 冷凝器型式_其他 冷凝器型式_水冷 冷凝器型式_空冷 \n0 0 1 0 \n1 0 1 0 \n2 0 1 0 \n3 0 1 0 \n4 0 1 0 \n... ... ... ... \n3219 1 0 0 \n3220 1 0 0 \n3221 0 1 0 \n3222 1 0 0 \n3223 0 1 0 \n\n[3748 rows x 63 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
铭牌容量 (MW)longitudelatitudealtitudepower_co2_factorheat_co2_factor所处地区_上海市所处地区_云南省所处地区_内蒙古所处地区_内蒙古自治区...机组类型_背压式参数分类_中压参数分类_亚临界参数分类_超临界参数分类_超超临界参数分类_超高压参数分类_高压冷凝器型式_其他冷凝器型式_水冷冷凝器型式_空冷
05.7071104.8078753.4677691.3862940.5743320.0726801000...0010000010
15.7071104.8078753.4677691.3862940.5821640.0723911000...0010000010
25.7714414.8089393.4768861.0986120.5692810.0710411000...0010000010
36.9087554.8073563.4583731.6094380.5062500.0704601000...0000100010
45.8607864.8078393.4786272.8332130.5652260.0737171000...0010000010
..................................................................
32193.9318263.4274894.6823535.645447NaNNaN0000...0000001100
32203.9318263.4274894.6823535.645447NaNNaN0000...0000001100
32213.9120233.4274894.6823535.645447NaNNaN0000...0000001010
32223.4339873.4287154.6822085.690359NaNNaN0000...1000001100
32234.8362823.4287154.6822085.690359NaNNaN0000...0000001010
\n

3748 rows × 63 columns

\n
" + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "use_data = pd.get_dummies(merge_data, columns=object_cols)\n", + "use_data" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "use_data.to_csv('./去煤种化后的训练数据.csv', encoding='utf-8-sig', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "train_set = use_data[~use_data.power_co2_factor.isna()].copy()\n", + "test_set = use_data[use_data.power_co2_factor.isna()].copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "feature_cols = [x for x in train_set.columns if 'factor' not in x]" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "train_data = train_set.copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "from sklearn.model_selection import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "train, valid = train_test_split(train_data.dropna(), test_size=0.1, shuffle=True, random_state=666)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "dtest = xgb.DMatrix(test_set[feature_cols])" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "params_xgb = {'objective': 'reg:squarederror',\n", + " 'booster': 'gbtree',\n", + " 'eta': 0.01,\n", + " 'max_depth': 30,\n", + " 'subsample': 0.8,\n", + " 'colsample_bytree': 0.95,\n", + " 'min_child_weight': 60,\n", + " 'seed': 42}" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "outputs": [], + "source": [ + "from sklearn.model_selection import KFold" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 80, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE: 6.9E-04, RMSE: 0.0262, MAE: 0.018, MAPE: 3.81 %, R_2: 0.8015\n", + "MSE: 4.6E-04, RMSE: 0.0215, MAE: 0.0155, MAPE: 3.24 %, R_2: 0.8596\n", + "MSE: 1.1E-03, RMSE: 0.0337, MAE: 0.0214, MAPE: 4.6 %, R_2: 0.6518\n", + "MSE: 8.7E-04, RMSE: 0.0295, MAE: 0.019, MAPE: 4.14 %, R_2: 0.7524\n", + "MSE: 1.1E-03, RMSE: 0.0326, MAE: 0.0219, MAPE: 4.62 %, R_2: 0.695\n", + "MSE: 1.1E-03, RMSE: 0.0336, MAE: 0.0237, MAPE: 5.23 %, R_2: 0.6424\n", + "MSE: 6.0E-04, RMSE: 0.0245, MAE: 0.0164, MAPE: 3.46 %, R_2: 0.8288\n", + "MSE: 9.4E-04, RMSE: 0.0307, MAE: 0.0224, MAPE: 4.96 %, R_2: 0.7396\n", + "MSE: 6.6E-04, RMSE: 0.0256, MAE: 0.0174, MAPE: 3.73 %, R_2: 0.8133\n", + "MSE: 7.0E-04, RMSE: 0.0264, MAE: 0.017, MAPE: 3.59 %, R_2: 0.8201\n" + ] + } + ], + "source": [ + "kf = KFold(n_splits=10, shuffle=True, random_state=666)\n", + "eva_list = list()\n", + "for (train_index, test_index) in kf.split(train_data):\n", + " train = train_data.loc[train_index]\n", + " test = train_data.loc[test_index]\n", + " train, valid = train_test_split(train, test_size=0.1, random_state=666)\n", + " X_train, Y_train = train[feature_cols], train['power_co2_factor']\n", + " X_valid, Y_valid = valid[feature_cols], valid['power_co2_factor']\n", + " X_test, Y_test = valid[feature_cols], valid['power_co2_factor']\n", + " dtrain = xgb.DMatrix(X_train, Y_train)\n", + " dvalid = xgb.DMatrix(X_valid, Y_valid)\n", + " watchlist = [(dvalid, 'eval')]\n", + " gb_model = xgb.train(params_xgb, dtrain, 2000, evals=watchlist,\n", + " early_stopping_rounds=100, verbose_eval=False)\n", + " y_pred = gb_model.predict(xgb.DMatrix(X_test))\n", + " y_true = Y_test.values\n", + " MSE = mean_squared_error(y_true, y_pred)\n", + " RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n", + " MAE = mean_absolute_error(y_true, y_pred)\n", + " MAPE = mean_absolute_percentage_error(y_true, y_pred)\n", + " R_2 = r2_score(y_true, y_pred)\n", + " print('MSE:', format(MSE, '.1E'), end=', ')\n", + " print('RMSE:', round(RMSE, 4), end=', ')\n", + " print('MAE:', round(MAE, 4), end=', ')\n", + " print('MAPE:', round(MAPE*100, 2), '%', end=', ')\n", + " print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差\n", + " eva_list.append([MSE, RMSE, MAE, MAPE, R_2])\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 83, + "outputs": [ + { + "data": { + "text/plain": "MSE 0.000747\nRMSE 0.027126\nMAE 0.018437\nMAPE 0.039442\nR_2 0.788768\ndtype: float64" + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame.from_records(eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R_2']).drop(index=[2, 5]).mean()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "\n", + "num_boost_round = 2000\n", + "\n", + "dtrain = xgb.DMatrix(train[feature_cols], train['power_co2_factor'].values)\n", + "dvalid = xgb.DMatrix(valid[feature_cols], valid['power_co2_factor'].values)\n", + "watchlist = [(dtrain, 'train'), (dvalid, 'eval')]\n", + "\n", + "gb_model_power = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", + " early_stopping_rounds=200, verbose_eval=False)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 59, + "outputs": [], + "source": [ + "power_pred, power_real = gb_model_power.predict(dvalid), valid['power_co2_factor'].values" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 60, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE: 5.2E-04\n", + "RMSE: 0.023\n", + "MAE: 0.016\n", + "MAPE: 3.46 %\n", + "R_2: 0.819\n" + ] + } + ], + "source": [ + "MSE = mean_squared_error(power_real, power_pred)\n", + "RMSE = np.sqrt(mean_squared_error(power_real, power_pred))\n", + "MAE = mean_absolute_error(power_real, power_pred)\n", + "MAPE = mean_absolute_percentage_error(power_real, power_pred)\n", + "R_2 = r2_score(power_real, power_pred)\n", + "print('MSE:', format(MSE, '.1E'))\n", + "print('RMSE:', round(RMSE, 3))\n", + "print('MAE:', round(MAE, 3))\n", + "print('MAPE:', round(MAPE*100, 2), '%')\n", + "print('R_2:', round(R_2, 3)) #R方为负就说明拟合效果比平均值差a" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_test_data['power_co2_factor'] = gb_model_power.predict(dtest)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitudepower_co2_factor
0安徽省凝气式亚临界水冷5.7714413.4515834.7720942.3978950.513529
1安徽省凝气式亚临界水冷5.7714413.4515834.7720942.3978950.513529
2安徽省凝气式超超临界水冷6.9087553.4515834.7720942.3978950.478943
3安徽省凝气式超超临界水冷6.9087553.4515834.7720942.3978950.478943
4安徽省抽凝式高压水冷3.7135723.4515834.7720942.3978950.510681
..............................
3219重庆市抽背式高压其他3.9318263.4274894.6823535.6454470.510508
3220重庆市抽背式高压其他3.9318263.4274894.6823535.6454470.510508
3221重庆市抽凝式高压水冷3.9120233.4274894.6823535.6454470.512501
3222重庆市背压式高压其他3.4339873.4287154.6822085.6903590.509951
3223重庆市抽凝式高压水冷4.8362823.4287154.6822085.6903590.511886
\n", + "

3224 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n", + "0 安徽省 凝气式 亚临界 水冷 5.771441 3.451583 4.772094 2.397895 \n", + "1 安徽省 凝气式 亚临界 水冷 5.771441 3.451583 4.772094 2.397895 \n", + "2 安徽省 凝气式 超超临界 水冷 6.908755 3.451583 4.772094 2.397895 \n", + "3 安徽省 凝气式 超超临界 水冷 6.908755 3.451583 4.772094 2.397895 \n", + "4 安徽省 抽凝式 高压 水冷 3.713572 3.451583 4.772094 2.397895 \n", + "... ... ... ... ... ... ... ... ... \n", + "3219 重庆市 抽背式 高压 其他 3.931826 3.427489 4.682353 5.645447 \n", + "3220 重庆市 抽背式 高压 其他 3.931826 3.427489 4.682353 5.645447 \n", + "3221 重庆市 抽凝式 高压 水冷 3.912023 3.427489 4.682353 5.645447 \n", + "3222 重庆市 背压式 高压 其他 3.433987 3.428715 4.682208 5.690359 \n", + "3223 重庆市 抽凝式 高压 水冷 4.836282 3.428715 4.682208 5.690359 \n", + "\n", + " power_co2_factor \n", + "0 0.513529 \n", + "1 0.513529 \n", + "2 0.478943 \n", + "3 0.478943 \n", + "4 0.510681 \n", + "... ... \n", + "3219 0.510508 \n", + "3220 0.510508 \n", + "3221 0.512501 \n", + "3222 0.509951 \n", + "3223 0.511886 \n", + "\n", + "[3224 rows x 9 columns]" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_test_data" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "params_xgb = {'objective': 'reg:squarederror',\n", + " 'booster': 'gbtree',\n", + " 'eta': 0.01,\n", + " 'max_depth': 15,\n", + " 'subsample': 0.7,\n", + " 'colsample_bytree': 0.9,\n", + " 'min_child_weight': 10,\n", + " 'seed': 666}\n", + "\n", + "num_boost_round = 1200" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE: 1.2E-05, RMSE: 0.0034, MAE: 0.002, MAPE: 2.93 %, R_2: 0.7571\n", + "MSE: 3.9E-06, RMSE: 0.002, MAE: 0.0014, MAPE: 2.01 %, R_2: 0.9072\n", + "MSE: 2.1E-05, RMSE: 0.0045, MAE: 0.0024, MAPE: 3.67 %, R_2: 0.4898\n", + "MSE: 1.3E-05, RMSE: 0.0036, MAE: 0.002, MAPE: 3.01 %, R_2: 0.6941\n", + "MSE: 1.2E-05, RMSE: 0.0034, MAE: 0.002, MAPE: 2.92 %, R_2: 0.7163\n", + "MSE: 1.5E-05, RMSE: 0.0039, MAE: 0.0022, MAPE: 3.29 %, R_2: 0.6265\n", + "MSE: 5.8E-06, RMSE: 0.0024, MAE: 0.0014, MAPE: 2.06 %, R_2: 0.8744\n", + "MSE: 1.7E-05, RMSE: 0.0041, MAE: 0.0024, MAPE: 3.64 %, R_2: 0.6661\n", + "MSE: 8.4E-06, RMSE: 0.0029, MAE: 0.0018, MAPE: 2.61 %, R_2: 0.8057\n", + "MSE: 7.0E-06, RMSE: 0.0026, MAE: 0.0016, MAPE: 2.29 %, R_2: 0.8514\n" + ] + } + ], + "source": [ + "kf = KFold(n_splits=10, shuffle=True, random_state=666)\n", + "eva_list = list()\n", + "for (train_index, test_index) in kf.split(train_data):\n", + " train = train_data.loc[train_index]\n", + " test = train_data.loc[test_index]\n", + " train, valid = train_test_split(train, test_size=0.1, random_state=666)\n", + " X_train, Y_train = train[feature_cols], train['heat_co2_factor']\n", + " X_valid, Y_valid = valid[feature_cols], valid['heat_co2_factor']\n", + " X_test, Y_test = valid[feature_cols], valid['heat_co2_factor']\n", + " dtrain = xgb.DMatrix(X_train, Y_train)\n", + " dvalid = xgb.DMatrix(X_valid, Y_valid)\n", + " watchlist = [(dvalid, 'eval')]\n", + " gb_model = xgb.train(params_xgb, dtrain, 2000, evals=watchlist,\n", + " early_stopping_rounds=100, verbose_eval=False)\n", + " y_pred = gb_model.predict(xgb.DMatrix(X_test))\n", + " y_true = Y_test.values\n", + " MSE = mean_squared_error(y_true, y_pred)\n", + " RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n", + " MAE = mean_absolute_error(y_true, y_pred)\n", + " MAPE = mean_absolute_percentage_error(y_true, y_pred)\n", + " R_2 = r2_score(y_true, y_pred)\n", + " print('MSE:', format(MSE, '.1E'), end=', ')\n", + " print('RMSE:', round(RMSE, 4), end=', ')\n", + " print('MAE:', round(MAE, 4), end=', ')\n", + " print('MAPE:', round(MAPE*100, 2), '%', end=', ')\n", + " print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差\n", + " eva_list.append([MSE, RMSE, MAE, MAPE, R_2])\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 86, + "outputs": [ + { + "data": { + "text/plain": "MSE 0.000010\nRMSE 0.003161\nMAE 0.001866\nMAPE 0.027510\nR_2 0.766523\ndtype: float64" + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame.from_records(eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R_2']).drop(index=[2]).mean()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "\n", + "dtrain = xgb.DMatrix(train[feature_cols], train['heat_co2_factor'].values)\n", + "dvalid = xgb.DMatrix(valid[feature_cols], valid['heat_co2_factor'].values)\n", + "watchlist = [(dtrain, 'train'), (dvalid, 'eval')]\n", + "\n", + "gb_model_heat = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", + " early_stopping_rounds=100, verbose_eval=False)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "new_test_data['heat_co2_factor'] = gb_model_heat.predict(dtest)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "for col in num_cols:\n", + " new_test_data[col] = np.expm1(new_test_data[col])" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitudepower_co2_factorheat_co2_factor
0安徽省凝气式亚临界水冷320.030.550295117.16639110.00.5135290.073187
1安徽省凝气式亚临界水冷320.030.550295117.16639110.00.5135290.073187
2安徽省凝气式超超临界水冷1000.030.550295117.16639110.00.4789430.071981
3安徽省凝气式超超临界水冷1000.030.550295117.16639110.00.4789430.071981
4安徽省抽凝式高压水冷40.030.550295117.16639110.00.5106810.072166
.................................
3219重庆市抽背式高压其他50.029.799200107.023948282.00.5105080.071945
3220重庆市抽背式高压其他50.029.799200107.023948282.00.5105080.071945
3221重庆市抽凝式高压水冷49.029.799200107.023948282.00.5125010.072097
3222重庆市背压式高压其他30.029.836998107.008326295.00.5099510.071945
3223重庆市抽凝式高压水冷125.029.836998107.008326295.00.5118860.072097
\n", + "

3224 rows × 10 columns

\n", + "
" + ], + "text/plain": [ + " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n", + "0 安徽省 凝气式 亚临界 水冷 320.0 30.550295 117.166391 10.0 \n", + "1 安徽省 凝气式 亚临界 水冷 320.0 30.550295 117.166391 10.0 \n", + "2 安徽省 凝气式 超超临界 水冷 1000.0 30.550295 117.166391 10.0 \n", + "3 安徽省 凝气式 超超临界 水冷 1000.0 30.550295 117.166391 10.0 \n", + "4 安徽省 抽凝式 高压 水冷 40.0 30.550295 117.166391 10.0 \n", + "... ... ... ... ... ... ... ... ... \n", + "3219 重庆市 抽背式 高压 其他 50.0 29.799200 107.023948 282.0 \n", + "3220 重庆市 抽背式 高压 其他 50.0 29.799200 107.023948 282.0 \n", + "3221 重庆市 抽凝式 高压 水冷 49.0 29.799200 107.023948 282.0 \n", + "3222 重庆市 背压式 高压 其他 30.0 29.836998 107.008326 295.0 \n", + "3223 重庆市 抽凝式 高压 水冷 125.0 29.836998 107.008326 295.0 \n", + "\n", + " power_co2_factor heat_co2_factor \n", + "0 0.513529 0.073187 \n", + "1 0.513529 0.073187 \n", + "2 0.478943 0.071981 \n", + "3 0.478943 0.071981 \n", + "4 0.510681 0.072166 \n", + "... ... ... \n", + "3219 0.510508 0.071945 \n", + "3220 0.510508 0.071945 \n", + "3221 0.512501 0.072097 \n", + "3222 0.509951 0.071945 \n", + "3223 0.511886 0.072097 \n", + "\n", + "[3224 rows x 10 columns]" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_test_data" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "rst = new_test_data.copy()" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "collapsed": false, + "jupyter": { + "outputs_hidden": false + }, + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "rst = pd.read_excel('./results/全国机组预测数据.xlsx')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "rst.drop(columns=rst.columns[0], inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "def change_cap(x):\n", + " if x <= 300:\n", + " return '300MW以下'\n", + " elif x<=600:\n", + " return '300-600MW'\n", + " elif x<=1000:\n", + " return '600-1000MW'\n", + " else:\n", + " return \"1000MW以上\"" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "rst['容量类型'] = rst['铭牌容量 (MW)'].apply(change_cap)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "rst.to_excel('./results/全国机组预测数据.xlsx', index=False)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} \ No newline at end of file diff --git a/特征分组建模_lightgbm.ipynb b/特征分组建模_lightgbm.ipynb new file mode 100644 index 0000000..c59fe3e --- /dev/null +++ b/特征分组建模_lightgbm.ipynb @@ -0,0 +1,3155 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import warnings\n", + "\n", + "warnings.filterwarnings(\"ignore\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import lightgbm as lgb\n", + "import numpy as np\n", + "import xgboost as xgb\n", + "import seaborn as sns\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.model_selection import KFold\n", + "from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": " 铭牌容量 (MW) 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%) longitude latitude \\\n0 5.70711 9.818311 3.297687 2.815409 4.807875 3.467769 \n1 5.70711 9.821572 3.297687 2.815409 4.807875 3.467769 \n2 5.70711 9.878580 3.310543 2.769459 4.807875 3.467769 \n3 5.70711 9.883285 3.324316 2.532108 4.807875 3.467769 \n4 5.70711 9.909768 3.255015 2.766319 4.807875 3.467769 \n\n altitude 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) 所处地区_上海市 ... 机组类型_供热式 \\\n0 1.386294 0.537574 0.070992 1.0 ... 1.0 \n1 1.386294 0.545516 0.072476 1.0 ... 1.0 \n2 1.386294 0.595849 0.064745 1.0 ... 1.0 \n3 1.386294 0.584432 0.068390 1.0 ... 1.0 \n4 1.386294 0.605369 0.066996 1.0 ... 1.0 \n\n 机组类型_纯凝式 参数分类_亚临界 参数分类_超临界 参数分类_超超临界 参数分类_超高压 参数分类_高压 冷凝器型式_水冷 \\\n0 0.0 1.0 0.0 0.0 0.0 0.0 1.0 \n1 0.0 1.0 0.0 0.0 0.0 0.0 1.0 \n2 0.0 1.0 0.0 0.0 0.0 0.0 1.0 \n3 0.0 1.0 0.0 0.0 0.0 0.0 1.0 \n4 0.0 1.0 0.0 0.0 0.0 0.0 1.0 \n\n 冷凝器型式_直接空冷 冷凝器型式_间接空冷 \n0 0.0 0.0 \n1 0.0 0.0 \n2 0.0 0.0 \n3 0.0 0.0 \n4 0.0 0.0 \n\n[5 rows x 60 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
铭牌容量 (MW)入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)所处地区_上海市...机组类型_供热式机组类型_纯凝式参数分类_亚临界参数分类_超临界参数分类_超超临界参数分类_超高压参数分类_高压冷凝器型式_水冷冷凝器型式_直接空冷冷凝器型式_间接空冷
05.707119.8183113.2976872.8154094.8078753.4677691.3862940.5375740.0709921.0...1.00.01.00.00.00.00.01.00.00.0
15.707119.8215723.2976872.8154094.8078753.4677691.3862940.5455160.0724761.0...1.00.01.00.00.00.00.01.00.00.0
25.707119.8785803.3105432.7694594.8078753.4677691.3862940.5958490.0647451.0...1.00.01.00.00.00.00.01.00.00.0
35.707119.8832853.3243162.5321084.8078753.4677691.3862940.5844320.0683901.0...1.00.01.00.00.00.00.01.00.00.0
45.707119.9097683.2550152.7663194.8078753.4677691.3862940.6053690.0669961.0...1.00.01.00.00.00.00.01.00.00.0
\n

5 rows × 60 columns

\n
" + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data = pd.read_csv('./train_data_processed.csv')\n", + "total_data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/plain": "(3080, 60)" + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [ + { + "data": { + "text/plain": "Index(['铭牌容量 (MW)', '入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', 'longitude',\n 'latitude', 'altitude', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)', '所处地区_上海市',\n '所处地区_云南省', '所处地区_内蒙古', '所处地区_内蒙古自治区', '所处地区_北京市', '所处地区_吉林省',\n '所处地区_四川省', '所处地区_天津市', '所处地区_宁夏', '所处地区_宁夏回族自治区', '所处地区_安徽省',\n '所处地区_山东省', '所处地区_山西', '所处地区_山西省', '所处地区_广东省', '所处地区_广西', '所处地区_广西省',\n '所处地区_新疆', '所处地区_新疆维吾尔自治区', '所处地区_江苏省', '所处地区_江西省', '所处地区_河北',\n '所处地区_河北省', '所处地区_河南', '所处地区_河南省', '所处地区_浙江省', '所处地区_海南省', '所处地区_湖北',\n '所处地区_湖北省', '所处地区_湖南', '所处地区_湖南省', '所处地区_甘肃省', '所处地区_福建省', '所处地区_贵州省',\n '所处地区_辽宁省', '所处地区_重庆市', '所处地区_陕西省', '所处地区_青海省', '所处地区_黑龙江', '所处地区_黑龙江省',\n '机组类型_供热', '机组类型_供热式', '机组类型_纯凝式', '参数分类_亚临界', '参数分类_超临界', '参数分类_超超临界',\n '参数分类_超高压', '参数分类_高压', '冷凝器型式_水冷', '冷凝器型式_直接空冷', '冷凝器型式_间接空冷'],\n dtype='object')" + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data.columns" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [], + "source": [ + "feature_cols = [x for x in total_data.columns if '因子' not in x]\n", + "target_cols = [x for x in total_data.columns if x not in feature_cols]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [ + { + "data": { + "text/plain": " 铭牌容量 (MW) 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%) longitude latitude \\\n0 4.615121 9.527411 3.823629 3.007661 4.834910 3.862442 \n1 4.836282 9.920745 3.625673 3.201526 4.700990 3.563714 \n2 4.836282 9.923023 3.623807 3.231200 4.700990 3.563714 \n3 4.836282 9.932727 3.272227 3.236716 4.700990 3.563714 \n4 4.836282 9.936819 3.278653 3.173460 4.700990 3.563714 \n... ... ... ... ... ... ... \n3075 6.966967 9.754581 3.100543 3.378270 4.676091 3.667429 \n3076 6.966967 9.755162 3.082827 3.361070 4.676091 3.667429 \n3077 6.966967 9.762903 3.095125 3.288775 4.676091 3.667429 \n3078 6.966967 9.776506 3.096934 3.328268 4.676091 3.667429 \n3079 6.966967 9.792277 3.073156 3.384051 4.676091 3.667429 \n\n altitude 所处地区_上海市 所处地区_云南省 所处地区_内蒙古 ... 参数分类_亚临界 参数分类_超临界 \\\n0 4.983607 0.0 0.0 0.0 ... 0.0 0.0 \n1 5.981414 0.0 0.0 0.0 ... 0.0 0.0 \n2 5.981414 0.0 0.0 0.0 ... 0.0 0.0 \n3 5.981414 0.0 0.0 0.0 ... 0.0 0.0 \n4 5.981414 0.0 0.0 0.0 ... 0.0 0.0 \n... ... ... ... ... ... ... ... \n3075 7.020191 0.0 0.0 0.0 ... 0.0 0.0 \n3076 7.020191 0.0 0.0 0.0 ... 0.0 0.0 \n3077 7.020191 0.0 0.0 0.0 ... 0.0 0.0 \n3078 7.020191 0.0 0.0 0.0 ... 0.0 0.0 \n3079 7.020191 0.0 0.0 0.0 ... 0.0 0.0 \n\n 参数分类_超超临界 参数分类_超高压 参数分类_高压 冷凝器型式_水冷 冷凝器型式_直接空冷 冷凝器型式_间接空冷 \\\n0 0.0 0.0 1.0 1.0 0.0 0.0 \n1 0.0 1.0 0.0 1.0 0.0 0.0 \n2 0.0 1.0 0.0 1.0 0.0 0.0 \n3 0.0 1.0 0.0 1.0 0.0 0.0 \n4 0.0 1.0 0.0 1.0 0.0 0.0 \n... ... ... ... ... ... ... \n3075 1.0 0.0 0.0 0.0 1.0 0.0 \n3076 1.0 0.0 0.0 0.0 1.0 0.0 \n3077 1.0 0.0 0.0 0.0 1.0 0.0 \n3078 1.0 0.0 0.0 0.0 1.0 0.0 \n3079 1.0 0.0 0.0 0.0 1.0 0.0 \n\n 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) \n0 0.483547 0.058613 \n1 0.575553 0.085880 \n2 0.607741 0.084890 \n3 0.595382 0.082342 \n4 0.578838 0.082685 \n... ... ... \n3075 0.426880 0.061722 \n3076 0.456768 0.060739 \n3077 0.455534 0.061277 \n3078 0.450064 0.062032 \n3079 0.468720 0.063016 \n\n[3080 rows x 60 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
铭牌容量 (MW)入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)longitudelatitudealtitude所处地区_上海市所处地区_云南省所处地区_内蒙古...参数分类_亚临界参数分类_超临界参数分类_超超临界参数分类_超高压参数分类_高压冷凝器型式_水冷冷凝器型式_直接空冷冷凝器型式_间接空冷发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)
04.6151219.5274113.8236293.0076614.8349103.8624424.9836070.00.00.0...0.00.00.00.01.01.00.00.00.4835470.058613
14.8362829.9207453.6256733.2015264.7009903.5637145.9814140.00.00.0...0.00.00.01.00.01.00.00.00.5755530.085880
24.8362829.9230233.6238073.2312004.7009903.5637145.9814140.00.00.0...0.00.00.01.00.01.00.00.00.6077410.084890
34.8362829.9327273.2722273.2367164.7009903.5637145.9814140.00.00.0...0.00.00.01.00.01.00.00.00.5953820.082342
44.8362829.9368193.2786533.1734604.7009903.5637145.9814140.00.00.0...0.00.00.01.00.01.00.00.00.5788380.082685
..................................................................
30756.9669679.7545813.1005433.3782704.6760913.6674297.0201910.00.00.0...0.00.01.00.00.00.01.00.00.4268800.061722
30766.9669679.7551623.0828273.3610704.6760913.6674297.0201910.00.00.0...0.00.01.00.00.00.01.00.00.4567680.060739
30776.9669679.7629033.0951253.2887754.6760913.6674297.0201910.00.00.0...0.00.01.00.00.00.01.00.00.4555340.061277
30786.9669679.7765063.0969343.3282684.6760913.6674297.0201910.00.00.0...0.00.01.00.00.00.01.00.00.4500640.062032
30796.9669679.7922773.0731563.3840514.6760913.6674297.0201910.00.00.0...0.00.01.00.00.00.01.00.00.4687200.063016
\n

3080 rows × 60 columns

\n
" + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "use_data = total_data.groupby(feature_cols)[target_cols].mean().reset_index()\n", + "use_data" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [], + "source": [ + "for col in use_data.columns:\n", + " use_data[col] = use_data[col].astype(float)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "train_data, test_data = train_test_split(use_data.dropna(), test_size=0.1, shuffle=True, random_state=666)\n", + "train_data, valid_data = train_test_split(train_data.dropna(), test_size=0.2, shuffle=True, random_state=666)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "X_train, Y_train = train_data[feature_cols], train_data[target_cols[0]]\n", + "X_valid, Y_valid = valid_data[feature_cols], valid_data[target_cols[0]]\n", + "X_test, Y_test = test_data[feature_cols], test_data[target_cols[0]]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "lgb_train = lgb.Dataset(X_train, Y_train)\n", + "lgb_eval = lgb.Dataset(X_valid, Y_valid)\n", + "lgb_test = lgb.Dataset(X_test, Y_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "params_gbm = {\n", + " 'task': 'train',\n", + " 'boosting_type': 'gbdt', # 设置提升类型\n", + " 'objective': 'l1', # 目标函数\n", + " 'metric': {'rmse'}, # 评估函数\n", + " 'max_depth': 12,\n", + " 'num_leaves': 20, # 叶子节点数\n", + " 'learning_rate': 0.05, # 学习速率\n", + " 'feature_fraction': 0.9, # 建树的特征选择比例\n", + " 'bagging_fraction': 0.9, # 建树的样本采样比例\n", + " 'bagging_freq': 10, # k 意味着每 k 次迭代执行bagging\n", + " 'verbose': -1 # <0 显示致命的, =0 显示错误 (警告), >0 显示信息\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1]\tvalid_0's rmse: 0.0692875\n", + "Training until validation scores don't improve for 100 rounds\n", + "[2]\tvalid_0's rmse: 0.06714\n", + "[3]\tvalid_0's rmse: 0.0646839\n", + "[4]\tvalid_0's rmse: 0.0623338\n", + "[5]\tvalid_0's rmse: 0.0600964\n", + "[6]\tvalid_0's rmse: 0.0580108\n", + "[7]\tvalid_0's rmse: 0.056067\n", + "[8]\tvalid_0's rmse: 0.0544344\n", + "[9]\tvalid_0's rmse: 0.0529408\n", + "[10]\tvalid_0's rmse: 0.051276\n", + "[11]\tvalid_0's rmse: 0.0497692\n", + "[12]\tvalid_0's rmse: 0.0483588\n", + "[13]\tvalid_0's rmse: 0.0470211\n", + "[14]\tvalid_0's rmse: 0.0460061\n", + "[15]\tvalid_0's rmse: 0.0448745\n", + "[16]\tvalid_0's rmse: 0.043796\n", + "[17]\tvalid_0's rmse: 0.0428645\n", + "[18]\tvalid_0's rmse: 0.0419008\n", + "[19]\tvalid_0's rmse: 0.0409544\n", + "[20]\tvalid_0's rmse: 0.0400698\n", + "[21]\tvalid_0's rmse: 0.0392848\n", + "[22]\tvalid_0's rmse: 0.038578\n", + "[23]\tvalid_0's rmse: 0.0378727\n", + "[24]\tvalid_0's rmse: 0.0371929\n", + "[25]\tvalid_0's rmse: 0.0366533\n", + "[26]\tvalid_0's rmse: 0.0360842\n", + "[27]\tvalid_0's rmse: 0.0355757\n", + "[28]\tvalid_0's rmse: 0.0350562\n", + "[29]\tvalid_0's rmse: 0.0345382\n", + "[30]\tvalid_0's rmse: 0.0340975\n", + "[31]\tvalid_0's rmse: 0.0337632\n", + "[32]\tvalid_0's rmse: 0.0334232\n", + "[33]\tvalid_0's rmse: 0.0330998\n", + "[34]\tvalid_0's rmse: 0.0328678\n", + "[35]\tvalid_0's rmse: 0.0325827\n", + "[36]\tvalid_0's rmse: 0.0323483\n", + "[37]\tvalid_0's rmse: 0.0321363\n", + "[38]\tvalid_0's rmse: 0.0318823\n", + "[39]\tvalid_0's rmse: 0.0316983\n", + "[40]\tvalid_0's rmse: 0.0315094\n", + "[41]\tvalid_0's rmse: 0.0313339\n", + "[42]\tvalid_0's rmse: 0.0311663\n", + "[43]\tvalid_0's rmse: 0.031002\n", + "[44]\tvalid_0's rmse: 0.0308446\n", + "[45]\tvalid_0's rmse: 0.0307193\n", + "[46]\tvalid_0's rmse: 0.03058\n", + "[47]\tvalid_0's rmse: 0.0304975\n", + "[48]\tvalid_0's rmse: 0.0303807\n", + "[49]\tvalid_0's rmse: 0.0302476\n", + "[50]\tvalid_0's rmse: 0.0301379\n", + "[51]\tvalid_0's rmse: 0.03\n", + "[52]\tvalid_0's rmse: 0.0299129\n", + "[53]\tvalid_0's rmse: 0.0298092\n", + "[54]\tvalid_0's rmse: 0.0297318\n", + "[55]\tvalid_0's rmse: 0.0296587\n", + "[56]\tvalid_0's rmse: 0.0295906\n", + "[57]\tvalid_0's rmse: 0.0295262\n", + "[58]\tvalid_0's rmse: 0.0294317\n", + "[59]\tvalid_0's rmse: 0.0293666\n", + "[60]\tvalid_0's rmse: 0.029295\n", + "[61]\tvalid_0's rmse: 0.0292621\n", + "[62]\tvalid_0's rmse: 0.0291822\n", + "[63]\tvalid_0's rmse: 0.0291453\n", + "[64]\tvalid_0's rmse: 0.029071\n", + "[65]\tvalid_0's rmse: 0.0289955\n", + "[66]\tvalid_0's rmse: 0.0289425\n", + "[67]\tvalid_0's rmse: 0.0288803\n", + "[68]\tvalid_0's rmse: 0.0288438\n", + "[69]\tvalid_0's rmse: 0.0288004\n", + "[70]\tvalid_0's rmse: 0.0287685\n", + "[71]\tvalid_0's rmse: 0.0287379\n", + "[72]\tvalid_0's rmse: 0.0286942\n", + "[73]\tvalid_0's rmse: 0.028654\n", + "[74]\tvalid_0's rmse: 0.0286255\n", + "[75]\tvalid_0's rmse: 0.0285826\n", + "[76]\tvalid_0's rmse: 0.0285438\n", + "[77]\tvalid_0's rmse: 0.0284903\n", + "[78]\tvalid_0's rmse: 0.0284767\n", + "[79]\tvalid_0's rmse: 0.0284401\n", + "[80]\tvalid_0's rmse: 0.0284152\n", + "[81]\tvalid_0's rmse: 0.0283845\n", + "[82]\tvalid_0's rmse: 0.028375\n", + "[83]\tvalid_0's rmse: 0.0283271\n", + "[84]\tvalid_0's rmse: 0.0283098\n", + "[85]\tvalid_0's rmse: 0.0282848\n", + "[86]\tvalid_0's rmse: 0.0282564\n", + "[87]\tvalid_0's rmse: 0.0282311\n", + "[88]\tvalid_0's rmse: 0.0281999\n", + "[89]\tvalid_0's rmse: 0.0281744\n", + "[90]\tvalid_0's rmse: 0.0281694\n", + "[91]\tvalid_0's rmse: 0.0281849\n", + "[92]\tvalid_0's rmse: 0.0281936\n", + "[93]\tvalid_0's rmse: 0.0281859\n", + "[94]\tvalid_0's rmse: 0.028193\n", + "[95]\tvalid_0's rmse: 0.0281768\n", + "[96]\tvalid_0's rmse: 0.0281729\n", + "[97]\tvalid_0's rmse: 0.0281829\n", + "[98]\tvalid_0's rmse: 0.0281698\n", + "[99]\tvalid_0's rmse: 0.0281678\n", + "[100]\tvalid_0's rmse: 0.0281451\n", + "[101]\tvalid_0's rmse: 0.0281243\n", + "[102]\tvalid_0's rmse: 0.028098\n", + "[103]\tvalid_0's rmse: 0.028089\n", + "[104]\tvalid_0's rmse: 0.0280947\n", + "[105]\tvalid_0's rmse: 0.0280915\n", + "[106]\tvalid_0's rmse: 0.0280942\n", + "[107]\tvalid_0's rmse: 0.0280905\n", + "[108]\tvalid_0's rmse: 0.0280888\n", + "[109]\tvalid_0's rmse: 0.0280827\n", + "[110]\tvalid_0's rmse: 0.028075\n", + "[111]\tvalid_0's rmse: 0.0280506\n", + "[112]\tvalid_0's rmse: 0.0280414\n", + "[113]\tvalid_0's rmse: 0.0280254\n", + "[114]\tvalid_0's rmse: 0.0280016\n", + "[115]\tvalid_0's rmse: 0.0279858\n", + "[116]\tvalid_0's rmse: 0.027973\n", + "[117]\tvalid_0's rmse: 0.027962\n", + "[118]\tvalid_0's rmse: 0.0279404\n", + "[119]\tvalid_0's rmse: 0.0279082\n", + "[120]\tvalid_0's rmse: 0.0279064\n", + "[121]\tvalid_0's rmse: 0.0279041\n", + "[122]\tvalid_0's rmse: 0.0278874\n", + "[123]\tvalid_0's rmse: 0.0278608\n", + "[124]\tvalid_0's rmse: 0.0278517\n", + "[125]\tvalid_0's rmse: 0.0278507\n", + "[126]\tvalid_0's rmse: 0.0278408\n", + "[127]\tvalid_0's rmse: 0.0278322\n", + "[128]\tvalid_0's rmse: 0.0278089\n", + "[129]\tvalid_0's rmse: 0.0278084\n", + "[130]\tvalid_0's rmse: 0.0277843\n", + "[131]\tvalid_0's rmse: 0.0277892\n", + "[132]\tvalid_0's rmse: 0.0277827\n", + "[133]\tvalid_0's rmse: 0.0277758\n", + "[134]\tvalid_0's rmse: 0.0277766\n", + "[135]\tvalid_0's rmse: 0.0277853\n", + "[136]\tvalid_0's rmse: 0.0277744\n", + "[137]\tvalid_0's rmse: 0.0277624\n", + "[138]\tvalid_0's rmse: 0.0277481\n", + "[139]\tvalid_0's rmse: 0.027733\n", + "[140]\tvalid_0's rmse: 0.0277201\n", + "[141]\tvalid_0's rmse: 0.0277112\n", + "[142]\tvalid_0's rmse: 0.0277081\n", + "[143]\tvalid_0's rmse: 0.0276965\n", + "[144]\tvalid_0's rmse: 0.0276911\n", + "[145]\tvalid_0's rmse: 0.0276786\n", + "[146]\tvalid_0's rmse: 0.0276798\n", + "[147]\tvalid_0's rmse: 0.0276724\n", + "[148]\tvalid_0's rmse: 0.0276479\n", + "[149]\tvalid_0's rmse: 0.0276436\n", + "[150]\tvalid_0's rmse: 0.0276115\n", + "[151]\tvalid_0's rmse: 0.0275966\n", + "[152]\tvalid_0's rmse: 0.0275874\n", + "[153]\tvalid_0's rmse: 0.0275693\n", + "[154]\tvalid_0's rmse: 0.0275769\n", + "[155]\tvalid_0's rmse: 0.0275677\n", + "[156]\tvalid_0's rmse: 0.0275517\n", + "[157]\tvalid_0's rmse: 0.0275422\n", + "[158]\tvalid_0's rmse: 0.0275326\n", + "[159]\tvalid_0's rmse: 0.0275205\n", + "[160]\tvalid_0's rmse: 0.0275234\n", + "[161]\tvalid_0's rmse: 0.0275164\n", + "[162]\tvalid_0's rmse: 0.0275097\n", + "[163]\tvalid_0's rmse: 0.0275092\n", + "[164]\tvalid_0's rmse: 0.0274879\n", + "[165]\tvalid_0's rmse: 0.0274696\n", + "[166]\tvalid_0's rmse: 0.0274685\n", + "[167]\tvalid_0's rmse: 0.0274698\n", + "[168]\tvalid_0's rmse: 0.0274655\n", + "[169]\tvalid_0's rmse: 0.0274796\n", + "[170]\tvalid_0's rmse: 0.0274609\n", + "[171]\tvalid_0's rmse: 0.0274455\n", + "[172]\tvalid_0's rmse: 0.0274493\n", + "[173]\tvalid_0's rmse: 0.0274369\n", + "[174]\tvalid_0's rmse: 0.0274299\n", + "[175]\tvalid_0's rmse: 0.0274234\n", + "[176]\tvalid_0's rmse: 0.0274104\n", + "[177]\tvalid_0's rmse: 0.0273984\n", + "[178]\tvalid_0's rmse: 0.0273957\n", + "[179]\tvalid_0's rmse: 0.0273894\n", + "[180]\tvalid_0's rmse: 0.0273696\n", + "[181]\tvalid_0's rmse: 0.0273432\n", + "[182]\tvalid_0's rmse: 0.027342\n", + "[183]\tvalid_0's rmse: 0.0273113\n", + "[184]\tvalid_0's rmse: 0.0273034\n", + "[185]\tvalid_0's rmse: 0.0272787\n", + "[186]\tvalid_0's rmse: 0.027264\n", + "[187]\tvalid_0's rmse: 0.0272687\n", + "[188]\tvalid_0's rmse: 0.0272646\n", + "[189]\tvalid_0's rmse: 0.027269\n", + "[190]\tvalid_0's rmse: 0.0272657\n", + "[191]\tvalid_0's rmse: 0.0272644\n", + "[192]\tvalid_0's rmse: 0.027266\n", + "[193]\tvalid_0's rmse: 0.0272565\n", + "[194]\tvalid_0's rmse: 0.0272468\n", + "[195]\tvalid_0's rmse: 0.0272463\n", + "[196]\tvalid_0's rmse: 0.027222\n", + "[197]\tvalid_0's rmse: 0.0271824\n", + "[198]\tvalid_0's rmse: 0.02718\n", + "[199]\tvalid_0's rmse: 0.0271605\n", + "[200]\tvalid_0's rmse: 0.0271487\n", + "[201]\tvalid_0's rmse: 0.0271442\n", + "[202]\tvalid_0's rmse: 0.0271446\n", + "[203]\tvalid_0's rmse: 0.0271367\n", + "[204]\tvalid_0's rmse: 0.0271474\n", + "[205]\tvalid_0's rmse: 0.0271404\n", + "[206]\tvalid_0's rmse: 0.0271376\n", + "[207]\tvalid_0's rmse: 0.0271251\n", + "[208]\tvalid_0's rmse: 0.0271296\n", + "[209]\tvalid_0's rmse: 0.0271322\n", + "[210]\tvalid_0's rmse: 0.0271364\n", + "[211]\tvalid_0's rmse: 0.027128\n", + "[212]\tvalid_0's rmse: 0.0271156\n", + "[213]\tvalid_0's rmse: 0.0271112\n", + "[214]\tvalid_0's rmse: 0.0271093\n", + "[215]\tvalid_0's rmse: 0.0271047\n", + "[216]\tvalid_0's rmse: 0.0270906\n", + "[217]\tvalid_0's rmse: 0.0270941\n", + "[218]\tvalid_0's rmse: 0.0270903\n", + "[219]\tvalid_0's rmse: 0.0270865\n", + "[220]\tvalid_0's rmse: 0.0270923\n", + "[221]\tvalid_0's rmse: 0.0270943\n", + "[222]\tvalid_0's rmse: 0.0270857\n", + "[223]\tvalid_0's rmse: 0.0270803\n", + "[224]\tvalid_0's rmse: 0.0270701\n", + "[225]\tvalid_0's rmse: 0.0270644\n", + "[226]\tvalid_0's rmse: 0.0270723\n", + "[227]\tvalid_0's rmse: 0.0270654\n", + "[228]\tvalid_0's rmse: 0.027069\n", + "[229]\tvalid_0's rmse: 0.0270634\n", + "[230]\tvalid_0's rmse: 0.027059\n", + "[231]\tvalid_0's rmse: 0.0270559\n", + "[232]\tvalid_0's rmse: 0.0270541\n", + "[233]\tvalid_0's rmse: 0.0270546\n", + "[234]\tvalid_0's rmse: 0.0270555\n", + "[235]\tvalid_0's rmse: 0.0270554\n", + "[236]\tvalid_0's rmse: 0.0270527\n", + "[237]\tvalid_0's rmse: 0.027045\n", + "[238]\tvalid_0's rmse: 0.0270457\n", + "[239]\tvalid_0's rmse: 0.0270406\n", + "[240]\tvalid_0's rmse: 0.0270462\n", + "[241]\tvalid_0's rmse: 0.0270405\n", + "[242]\tvalid_0's rmse: 0.0270448\n", + "[243]\tvalid_0's rmse: 0.0270406\n", + "[244]\tvalid_0's rmse: 0.0270415\n", + "[245]\tvalid_0's rmse: 0.0270421\n", + "[246]\tvalid_0's rmse: 0.0270327\n", + "[247]\tvalid_0's rmse: 0.0270246\n", + "[248]\tvalid_0's rmse: 0.0270194\n", + "[249]\tvalid_0's rmse: 0.0270177\n", + "[250]\tvalid_0's rmse: 0.0270092\n", + "[251]\tvalid_0's rmse: 0.0270089\n", + "[252]\tvalid_0's rmse: 0.0270085\n", + "[253]\tvalid_0's rmse: 0.0269901\n", + "[254]\tvalid_0's rmse: 0.0269891\n", + "[255]\tvalid_0's rmse: 0.0269845\n", + "[256]\tvalid_0's rmse: 0.0269845\n", + "[257]\tvalid_0's rmse: 0.0269555\n", + "[258]\tvalid_0's rmse: 0.026949\n", + "[259]\tvalid_0's rmse: 0.0269442\n", + "[260]\tvalid_0's rmse: 0.0269473\n", + "[261]\tvalid_0's rmse: 0.026946\n", + "[262]\tvalid_0's rmse: 0.0269368\n", + "[263]\tvalid_0's rmse: 0.0269311\n", + "[264]\tvalid_0's rmse: 0.0269294\n", + "[265]\tvalid_0's rmse: 0.0269236\n", + "[266]\tvalid_0's rmse: 0.0269203\n", + "[267]\tvalid_0's rmse: 0.0269202\n", + "[268]\tvalid_0's rmse: 0.0269171\n", + "[269]\tvalid_0's rmse: 0.0269116\n", + "[270]\tvalid_0's rmse: 0.026909\n", + "[271]\tvalid_0's rmse: 0.0269102\n", + "[272]\tvalid_0's rmse: 0.0269057\n", + "[273]\tvalid_0's rmse: 0.0269039\n", + "[274]\tvalid_0's rmse: 0.0269003\n", + "[275]\tvalid_0's rmse: 0.0268963\n", + "[276]\tvalid_0's rmse: 0.0268905\n", + "[277]\tvalid_0's rmse: 0.0268955\n", + "[278]\tvalid_0's rmse: 0.0268977\n", + "[279]\tvalid_0's rmse: 0.0269015\n", + "[280]\tvalid_0's rmse: 0.0269013\n", + "[281]\tvalid_0's rmse: 0.0268988\n", + "[282]\tvalid_0's rmse: 0.0268985\n", + "[283]\tvalid_0's rmse: 0.0268988\n", + "[284]\tvalid_0's rmse: 0.0268935\n", + "[285]\tvalid_0's rmse: 0.0268928\n", + "[286]\tvalid_0's rmse: 0.0268898\n", + "[287]\tvalid_0's rmse: 0.0268862\n", + "[288]\tvalid_0's rmse: 0.0268827\n", + "[289]\tvalid_0's rmse: 0.0268775\n", + "[290]\tvalid_0's rmse: 0.0268797\n", + "[291]\tvalid_0's rmse: 0.0268748\n", + "[292]\tvalid_0's rmse: 0.0268375\n", + "[293]\tvalid_0's rmse: 0.026812\n", + "[294]\tvalid_0's rmse: 0.0268085\n", + "[295]\tvalid_0's rmse: 0.0268076\n", + "[296]\tvalid_0's rmse: 0.026803\n", + "[297]\tvalid_0's rmse: 0.0267955\n", + "[298]\tvalid_0's rmse: 0.0267948\n", + "[299]\tvalid_0's rmse: 0.0267962\n", + "[300]\tvalid_0's rmse: 0.0267929\n", + "[301]\tvalid_0's rmse: 0.026792\n", + "[302]\tvalid_0's rmse: 0.026785\n", + "[303]\tvalid_0's rmse: 0.0267811\n", + "[304]\tvalid_0's rmse: 0.0267687\n", + "[305]\tvalid_0's rmse: 0.0267677\n", + "[306]\tvalid_0's rmse: 0.0267618\n", + "[307]\tvalid_0's rmse: 0.0267611\n", + "[308]\tvalid_0's rmse: 0.0267278\n", + "[309]\tvalid_0's rmse: 0.026727\n", + "[310]\tvalid_0's rmse: 0.0267222\n", + "[311]\tvalid_0's rmse: 0.0267172\n", + "[312]\tvalid_0's rmse: 0.0267138\n", + "[313]\tvalid_0's rmse: 0.0267119\n", + "[314]\tvalid_0's rmse: 0.0267091\n", + "[315]\tvalid_0's rmse: 0.0267093\n", + "[316]\tvalid_0's rmse: 0.0267089\n", + "[317]\tvalid_0's rmse: 0.0267078\n", + "[318]\tvalid_0's rmse: 0.0267068\n", + "[319]\tvalid_0's rmse: 0.0267062\n", + "[320]\tvalid_0's rmse: 0.0267035\n", + "[321]\tvalid_0's rmse: 0.0267021\n", + "[322]\tvalid_0's rmse: 0.0266997\n", + "[323]\tvalid_0's rmse: 0.026701\n", + "[324]\tvalid_0's rmse: 0.0266997\n", + "[325]\tvalid_0's rmse: 0.0266999\n", + "[326]\tvalid_0's rmse: 0.0267043\n", + "[327]\tvalid_0's rmse: 0.0267048\n", + "[328]\tvalid_0's rmse: 0.0266922\n", + "[329]\tvalid_0's rmse: 0.0266828\n", + "[330]\tvalid_0's rmse: 0.0266837\n", + "[331]\tvalid_0's rmse: 0.0266863\n", + "[332]\tvalid_0's rmse: 0.0266764\n", + "[333]\tvalid_0's rmse: 0.0266769\n", + "[334]\tvalid_0's rmse: 0.0266686\n", + "[335]\tvalid_0's rmse: 0.0266701\n", + "[336]\tvalid_0's rmse: 0.0266739\n", + "[337]\tvalid_0's rmse: 0.0266749\n", + "[338]\tvalid_0's rmse: 0.0266749\n", + "[339]\tvalid_0's rmse: 0.0266745\n", + "[340]\tvalid_0's rmse: 0.0266731\n", + "[341]\tvalid_0's rmse: 0.0266707\n", + "[342]\tvalid_0's rmse: 0.0266627\n", + "[343]\tvalid_0's rmse: 0.0266618\n", + "[344]\tvalid_0's rmse: 0.0266607\n", + "[345]\tvalid_0's rmse: 0.0266595\n", + "[346]\tvalid_0's rmse: 0.0266483\n", + "[347]\tvalid_0's rmse: 0.0266501\n", + "[348]\tvalid_0's rmse: 0.0266484\n", + "[349]\tvalid_0's rmse: 0.0266469\n", + "[350]\tvalid_0's rmse: 0.0266446\n", + "[351]\tvalid_0's rmse: 0.0266422\n", + "[352]\tvalid_0's rmse: 0.0266445\n", + "[353]\tvalid_0's rmse: 0.026642\n", + "[354]\tvalid_0's rmse: 0.0266332\n", + "[355]\tvalid_0's rmse: 0.0266333\n", + "[356]\tvalid_0's rmse: 0.0266291\n", + "[357]\tvalid_0's rmse: 0.0266298\n", + "[358]\tvalid_0's rmse: 0.0266302\n", + "[359]\tvalid_0's rmse: 0.026626\n", + "[360]\tvalid_0's rmse: 0.0266191\n", + "[361]\tvalid_0's rmse: 0.0266188\n", + "[362]\tvalid_0's rmse: 0.0266132\n", + "[363]\tvalid_0's rmse: 0.0266094\n", + "[364]\tvalid_0's rmse: 0.0266022\n", + "[365]\tvalid_0's rmse: 0.0266027\n", + "[366]\tvalid_0's rmse: 0.0266001\n", + "[367]\tvalid_0's rmse: 0.0266011\n", + "[368]\tvalid_0's rmse: 0.0265957\n", + "[369]\tvalid_0's rmse: 0.026593\n", + "[370]\tvalid_0's rmse: 0.0265889\n", + "[371]\tvalid_0's rmse: 0.0265887\n", + "[372]\tvalid_0's rmse: 0.0265821\n", + "[373]\tvalid_0's rmse: 0.026579\n", + "[374]\tvalid_0's rmse: 0.0265765\n", + "[375]\tvalid_0's rmse: 0.0265742\n", + "[376]\tvalid_0's rmse: 0.0265724\n", + "[377]\tvalid_0's rmse: 0.0265683\n", + "[378]\tvalid_0's rmse: 0.0265671\n", + "[379]\tvalid_0's rmse: 0.0265605\n", + "[380]\tvalid_0's rmse: 0.026561\n", + "[381]\tvalid_0's rmse: 0.0265544\n", + "[382]\tvalid_0's rmse: 0.026555\n", + "[383]\tvalid_0's rmse: 0.0265526\n", + "[384]\tvalid_0's rmse: 0.0265483\n", + "[385]\tvalid_0's rmse: 0.0265519\n", + "[386]\tvalid_0's rmse: 0.0265494\n", + "[387]\tvalid_0's rmse: 0.0265502\n", + "[388]\tvalid_0's rmse: 0.0265525\n", + "[389]\tvalid_0's rmse: 0.0265567\n", + "[390]\tvalid_0's rmse: 0.0265403\n", + "[391]\tvalid_0's rmse: 0.0265361\n", + "[392]\tvalid_0's rmse: 0.0265342\n", + "[393]\tvalid_0's rmse: 0.026529\n", + "[394]\tvalid_0's rmse: 0.0265267\n", + "[395]\tvalid_0's rmse: 0.0265303\n", + "[396]\tvalid_0's rmse: 0.0265306\n", + "[397]\tvalid_0's rmse: 0.0265338\n", + "[398]\tvalid_0's rmse: 0.0265294\n", + "[399]\tvalid_0's rmse: 0.0265253\n", + "[400]\tvalid_0's rmse: 0.0265248\n", + "[401]\tvalid_0's rmse: 0.0265266\n", + "[402]\tvalid_0's rmse: 0.0265279\n", + "[403]\tvalid_0's rmse: 0.0265289\n", + "[404]\tvalid_0's rmse: 0.0265279\n", + "[405]\tvalid_0's rmse: 0.0265228\n", + "[406]\tvalid_0's rmse: 0.0265323\n", + "[407]\tvalid_0's rmse: 0.0265335\n", + "[408]\tvalid_0's rmse: 0.0265318\n", + "[409]\tvalid_0's rmse: 0.0265298\n", + "[410]\tvalid_0's rmse: 0.0265275\n", + "[411]\tvalid_0's rmse: 0.0265259\n", + "[412]\tvalid_0's rmse: 0.0265261\n", + "[413]\tvalid_0's rmse: 0.0265267\n", + "[414]\tvalid_0's rmse: 0.0265261\n", + "[415]\tvalid_0's rmse: 0.0265255\n", + "[416]\tvalid_0's rmse: 0.0265275\n", + "[417]\tvalid_0's rmse: 0.0265225\n", + "[418]\tvalid_0's rmse: 0.0265226\n", + "[419]\tvalid_0's rmse: 0.0265222\n", + "[420]\tvalid_0's rmse: 0.026521\n", + "[421]\tvalid_0's rmse: 0.0265169\n", + "[422]\tvalid_0's rmse: 0.0265139\n", + "[423]\tvalid_0's rmse: 0.0265126\n", + "[424]\tvalid_0's rmse: 0.0265136\n", + "[425]\tvalid_0's rmse: 0.0265079\n", + "[426]\tvalid_0's rmse: 0.0265017\n", + "[427]\tvalid_0's rmse: 0.0264914\n", + "[428]\tvalid_0's rmse: 0.026489\n", + "[429]\tvalid_0's rmse: 0.0264918\n", + "[430]\tvalid_0's rmse: 0.0264906\n", + "[431]\tvalid_0's rmse: 0.0264809\n", + "[432]\tvalid_0's rmse: 0.0264809\n", + "[433]\tvalid_0's rmse: 0.0264819\n", + "[434]\tvalid_0's rmse: 0.0264775\n", + "[435]\tvalid_0's rmse: 0.0264744\n", + "[436]\tvalid_0's rmse: 0.026474\n", + "[437]\tvalid_0's rmse: 0.0264713\n", + "[438]\tvalid_0's rmse: 0.0264702\n", + "[439]\tvalid_0's rmse: 0.0264686\n", + "[440]\tvalid_0's rmse: 0.0264654\n", + "[441]\tvalid_0's rmse: 0.0264663\n", + "[442]\tvalid_0's rmse: 0.0264543\n", + "[443]\tvalid_0's rmse: 0.0264538\n", + "[444]\tvalid_0's rmse: 0.0264507\n", + "[445]\tvalid_0's rmse: 0.0264509\n", + "[446]\tvalid_0's rmse: 0.0264456\n", + "[447]\tvalid_0's rmse: 0.0264483\n", + "[448]\tvalid_0's rmse: 0.0264169\n", + "[449]\tvalid_0's rmse: 0.0264151\n", + "[450]\tvalid_0's rmse: 0.0264172\n", + "[451]\tvalid_0's rmse: 0.0264171\n", + "[452]\tvalid_0's rmse: 0.0264175\n", + "[453]\tvalid_0's rmse: 0.0264149\n", + "[454]\tvalid_0's rmse: 0.0264144\n", + "[455]\tvalid_0's rmse: 0.0264154\n", + "[456]\tvalid_0's rmse: 0.0264147\n", + "[457]\tvalid_0's rmse: 0.0264118\n", + "[458]\tvalid_0's rmse: 0.0264138\n", + "[459]\tvalid_0's rmse: 0.0264151\n", + "[460]\tvalid_0's rmse: 0.026415\n", + "[461]\tvalid_0's rmse: 0.0264159\n", + "[462]\tvalid_0's rmse: 0.0264121\n", + "[463]\tvalid_0's rmse: 0.026414\n", + "[464]\tvalid_0's rmse: 0.0264093\n", + "[465]\tvalid_0's rmse: 0.0264118\n", + "[466]\tvalid_0's rmse: 0.0264118\n", + "[467]\tvalid_0's rmse: 0.0264099\n", + "[468]\tvalid_0's rmse: 0.0264113\n", + "[469]\tvalid_0's rmse: 0.0264101\n", + "[470]\tvalid_0's rmse: 0.0264118\n", + "[471]\tvalid_0's rmse: 0.0264092\n", + "[472]\tvalid_0's rmse: 0.0264044\n", + "[473]\tvalid_0's rmse: 0.0263975\n", + "[474]\tvalid_0's rmse: 0.0263909\n", + "[475]\tvalid_0's rmse: 0.0263866\n", + "[476]\tvalid_0's rmse: 0.0263848\n", + "[477]\tvalid_0's rmse: 0.0263839\n", + "[478]\tvalid_0's rmse: 0.0263787\n", + "[479]\tvalid_0's rmse: 0.0263797\n", + "[480]\tvalid_0's rmse: 0.0263769\n", + "[481]\tvalid_0's rmse: 0.0263744\n", + "[482]\tvalid_0's rmse: 0.0263693\n", + "[483]\tvalid_0's rmse: 0.0263673\n", + "[484]\tvalid_0's rmse: 0.0263626\n", + "[485]\tvalid_0's rmse: 0.0263591\n", + "[486]\tvalid_0's rmse: 0.0263569\n", + "[487]\tvalid_0's rmse: 0.0263557\n", + "[488]\tvalid_0's rmse: 0.0263559\n", + "[489]\tvalid_0's rmse: 0.026358\n", + "[490]\tvalid_0's rmse: 0.0263566\n", + "[491]\tvalid_0's rmse: 0.0263564\n", + "[492]\tvalid_0's rmse: 0.0263568\n", + "[493]\tvalid_0's rmse: 0.0263562\n", + "[494]\tvalid_0's rmse: 0.0263561\n", + "[495]\tvalid_0's rmse: 0.0263508\n", + "[496]\tvalid_0's rmse: 0.0263498\n", + "[497]\tvalid_0's rmse: 0.026346\n", + "[498]\tvalid_0's rmse: 0.0263474\n", + "[499]\tvalid_0's rmse: 0.026346\n", + "[500]\tvalid_0's rmse: 0.026342\n", + "[501]\tvalid_0's rmse: 0.0263415\n", + "[502]\tvalid_0's rmse: 0.0263404\n", + "[503]\tvalid_0's rmse: 0.0263355\n", + "[504]\tvalid_0's rmse: 0.0263363\n", + "[505]\tvalid_0's rmse: 0.0263362\n", + "[506]\tvalid_0's rmse: 0.0263356\n", + "[507]\tvalid_0's rmse: 0.0263345\n", + "[508]\tvalid_0's rmse: 0.0263343\n", + "[509]\tvalid_0's rmse: 0.0263294\n", + "[510]\tvalid_0's rmse: 0.0263279\n", + "[511]\tvalid_0's rmse: 0.0263274\n", + "[512]\tvalid_0's rmse: 0.0263227\n", + "[513]\tvalid_0's rmse: 0.0263228\n", + "[514]\tvalid_0's rmse: 0.0263178\n", + "[515]\tvalid_0's rmse: 0.0263175\n", + "[516]\tvalid_0's rmse: 0.0263152\n", + "[517]\tvalid_0's rmse: 0.0263062\n", + "[518]\tvalid_0's rmse: 0.0263098\n", + "[519]\tvalid_0's rmse: 0.0263065\n", + "[520]\tvalid_0's rmse: 0.0263043\n", + "[521]\tvalid_0's rmse: 0.0263029\n", + "[522]\tvalid_0's rmse: 0.0263005\n", + "[523]\tvalid_0's rmse: 0.0263013\n", + "[524]\tvalid_0's rmse: 0.0263\n", + "[525]\tvalid_0's rmse: 0.0262944\n", + "[526]\tvalid_0's rmse: 0.0262956\n", + "[527]\tvalid_0's rmse: 0.0262945\n", + "[528]\tvalid_0's rmse: 0.0262948\n", + "[529]\tvalid_0's rmse: 0.0262927\n", + "[530]\tvalid_0's rmse: 0.0262942\n", + "[531]\tvalid_0's rmse: 0.0262821\n", + "[532]\tvalid_0's rmse: 0.0262828\n", + "[533]\tvalid_0's rmse: 0.0262794\n", + "[534]\tvalid_0's rmse: 0.0262778\n", + "[535]\tvalid_0's rmse: 0.0262769\n", + "[536]\tvalid_0's rmse: 0.0262763\n", + "[537]\tvalid_0's rmse: 0.0262754\n", + "[538]\tvalid_0's rmse: 0.026275\n", + "[539]\tvalid_0's rmse: 0.0262742\n", + "[540]\tvalid_0's rmse: 0.02625\n", + "[541]\tvalid_0's rmse: 0.0262449\n", + "[542]\tvalid_0's rmse: 0.0262456\n", + "[543]\tvalid_0's rmse: 0.0262468\n", + "[544]\tvalid_0's rmse: 0.0262448\n", + "[545]\tvalid_0's rmse: 0.0262438\n", + "[546]\tvalid_0's rmse: 0.0262417\n", + "[547]\tvalid_0's rmse: 0.026231\n", + "[548]\tvalid_0's rmse: 0.0262339\n", + "[549]\tvalid_0's rmse: 0.0262327\n", + "[550]\tvalid_0's rmse: 0.0262289\n", + "[551]\tvalid_0's rmse: 0.0262244\n", + "[552]\tvalid_0's rmse: 0.0262075\n", + "[553]\tvalid_0's rmse: 0.0262031\n", + "[554]\tvalid_0's rmse: 0.0262028\n", + "[555]\tvalid_0's rmse: 0.0261984\n", + "[556]\tvalid_0's rmse: 0.0261981\n", + "[557]\tvalid_0's rmse: 0.0261977\n", + "[558]\tvalid_0's rmse: 0.0262004\n", + "[559]\tvalid_0's rmse: 0.0261955\n", + "[560]\tvalid_0's rmse: 0.0261955\n", + "[561]\tvalid_0's rmse: 0.0261947\n", + "[562]\tvalid_0's rmse: 0.0261983\n", + "[563]\tvalid_0's rmse: 0.0261981\n", + "[564]\tvalid_0's rmse: 0.0261992\n", + "[565]\tvalid_0's rmse: 0.0261974\n", + "[566]\tvalid_0's rmse: 0.0261936\n", + "[567]\tvalid_0's rmse: 0.0261954\n", + "[568]\tvalid_0's rmse: 0.0261987\n", + "[569]\tvalid_0's rmse: 0.0261837\n", + "[570]\tvalid_0's rmse: 0.0261839\n", + "[571]\tvalid_0's rmse: 0.026185\n", + "[572]\tvalid_0's rmse: 0.0261849\n", + "[573]\tvalid_0's rmse: 0.0261842\n", + "[574]\tvalid_0's rmse: 0.0261826\n", + "[575]\tvalid_0's rmse: 0.0261834\n", + "[576]\tvalid_0's rmse: 0.0261825\n", + "[577]\tvalid_0's rmse: 0.0261717\n", + "[578]\tvalid_0's rmse: 0.026171\n", + "[579]\tvalid_0's rmse: 0.0261609\n", + "[580]\tvalid_0's rmse: 0.02616\n", + "[581]\tvalid_0's rmse: 0.0261573\n", + "[582]\tvalid_0's rmse: 0.026159\n", + "[583]\tvalid_0's rmse: 0.0261576\n", + "[584]\tvalid_0's rmse: 0.0261557\n", + "[585]\tvalid_0's rmse: 0.0261582\n", + "[586]\tvalid_0's rmse: 0.026158\n", + "[587]\tvalid_0's rmse: 0.0261573\n", + "[588]\tvalid_0's rmse: 0.0261571\n", + "[589]\tvalid_0's rmse: 0.0261535\n", + "[590]\tvalid_0's rmse: 0.0261534\n", + "[591]\tvalid_0's rmse: 0.0261534\n", + "[592]\tvalid_0's rmse: 0.0261436\n", + "[593]\tvalid_0's rmse: 0.0261423\n", + "[594]\tvalid_0's rmse: 0.0261409\n", + "[595]\tvalid_0's rmse: 0.0261377\n", + "[596]\tvalid_0's rmse: 0.0261358\n", + "[597]\tvalid_0's rmse: 0.0261367\n", + "[598]\tvalid_0's rmse: 0.026137\n", + "[599]\tvalid_0's rmse: 0.0261357\n", + "[600]\tvalid_0's rmse: 0.0261344\n", + "[601]\tvalid_0's rmse: 0.0261345\n", + "[602]\tvalid_0's rmse: 0.026133\n", + "[603]\tvalid_0's rmse: 0.0261313\n", + "[604]\tvalid_0's rmse: 0.0261344\n", + "[605]\tvalid_0's rmse: 0.0261339\n", + "[606]\tvalid_0's rmse: 0.0261321\n", + "[607]\tvalid_0's rmse: 0.0261288\n", + "[608]\tvalid_0's rmse: 0.0261285\n", + "[609]\tvalid_0's rmse: 0.0261298\n", + "[610]\tvalid_0's rmse: 0.026131\n", + "[611]\tvalid_0's rmse: 0.0261265\n", + "[612]\tvalid_0's rmse: 0.0261043\n", + "[613]\tvalid_0's rmse: 0.0261023\n", + "[614]\tvalid_0's rmse: 0.0261013\n", + "[615]\tvalid_0's rmse: 0.0260971\n", + "[616]\tvalid_0's rmse: 0.0260979\n", + "[617]\tvalid_0's rmse: 0.0260987\n", + "[618]\tvalid_0's rmse: 0.0260728\n", + "[619]\tvalid_0's rmse: 0.026069\n", + "[620]\tvalid_0's rmse: 0.0260678\n", + "[621]\tvalid_0's rmse: 0.0260587\n", + "[622]\tvalid_0's rmse: 0.0260571\n", + "[623]\tvalid_0's rmse: 0.0260564\n", + "[624]\tvalid_0's rmse: 0.026054\n", + "[625]\tvalid_0's rmse: 0.0260544\n", + "[626]\tvalid_0's rmse: 0.0260502\n", + "[627]\tvalid_0's rmse: 0.0260444\n", + "[628]\tvalid_0's rmse: 0.026044\n", + "[629]\tvalid_0's rmse: 0.02604\n", + "[630]\tvalid_0's rmse: 0.0260386\n", + "[631]\tvalid_0's rmse: 0.0260394\n", + "[632]\tvalid_0's rmse: 0.0260378\n", + "[633]\tvalid_0's rmse: 0.0260397\n", + "[634]\tvalid_0's rmse: 0.0260395\n", + "[635]\tvalid_0's rmse: 0.0260398\n", + "[636]\tvalid_0's rmse: 0.0260376\n", + "[637]\tvalid_0's rmse: 0.026039\n", + "[638]\tvalid_0's rmse: 0.0260362\n", + "[639]\tvalid_0's rmse: 0.0260345\n", + "[640]\tvalid_0's rmse: 0.0260342\n", + "[641]\tvalid_0's rmse: 0.0260336\n", + "[642]\tvalid_0's rmse: 0.0260337\n", + "[643]\tvalid_0's rmse: 0.0260325\n", + "[644]\tvalid_0's rmse: 0.0260305\n", + "[645]\tvalid_0's rmse: 0.0260308\n", + "[646]\tvalid_0's rmse: 0.0260319\n", + "[647]\tvalid_0's rmse: 0.0260334\n", + "[648]\tvalid_0's rmse: 0.0260338\n", + "[649]\tvalid_0's rmse: 0.0260325\n", + "[650]\tvalid_0's rmse: 0.0260265\n", + "[651]\tvalid_0's rmse: 0.0260269\n", + "[652]\tvalid_0's rmse: 0.0260251\n", + "[653]\tvalid_0's rmse: 0.0260252\n", + "[654]\tvalid_0's rmse: 0.0260251\n", + "[655]\tvalid_0's rmse: 0.0260257\n", + "[656]\tvalid_0's rmse: 0.0260234\n", + "[657]\tvalid_0's rmse: 0.0260219\n", + "[658]\tvalid_0's rmse: 0.0260211\n", + "[659]\tvalid_0's rmse: 0.0260209\n", + "[660]\tvalid_0's rmse: 0.0260217\n", + "[661]\tvalid_0's rmse: 0.0260234\n", + "[662]\tvalid_0's rmse: 0.0260244\n", + "[663]\tvalid_0's rmse: 0.0260219\n", + "[664]\tvalid_0's rmse: 0.0260216\n", + "[665]\tvalid_0's rmse: 0.026023\n", + "[666]\tvalid_0's rmse: 0.026025\n", + "[667]\tvalid_0's rmse: 0.0260245\n", + "[668]\tvalid_0's rmse: 0.026022\n", + "[669]\tvalid_0's rmse: 0.0260216\n", + "[670]\tvalid_0's rmse: 0.0260231\n", + "[671]\tvalid_0's rmse: 0.0260226\n", + "[672]\tvalid_0's rmse: 0.0260197\n", + "[673]\tvalid_0's rmse: 0.0260191\n", + "[674]\tvalid_0's rmse: 0.0260193\n", + "[675]\tvalid_0's rmse: 0.0260178\n", + "[676]\tvalid_0's rmse: 0.0260171\n", + "[677]\tvalid_0's rmse: 0.0260153\n", + "[678]\tvalid_0's rmse: 0.0260153\n", + "[679]\tvalid_0's rmse: 0.026013\n", + "[680]\tvalid_0's rmse: 0.0260116\n", + "[681]\tvalid_0's rmse: 0.0260089\n", + "[682]\tvalid_0's rmse: 0.0260046\n", + "[683]\tvalid_0's rmse: 0.0260029\n", + "[684]\tvalid_0's rmse: 0.0260038\n", + "[685]\tvalid_0's rmse: 0.0260018\n", + "[686]\tvalid_0's rmse: 0.0260058\n", + "[687]\tvalid_0's rmse: 0.0260083\n", + "[688]\tvalid_0's rmse: 0.0260081\n", + "[689]\tvalid_0's rmse: 0.0260076\n", + "[690]\tvalid_0's rmse: 0.0260032\n", + "[691]\tvalid_0's rmse: 0.0260018\n", + "[692]\tvalid_0's rmse: 0.0260013\n", + "[693]\tvalid_0's rmse: 0.0260024\n", + "[694]\tvalid_0's rmse: 0.026003\n", + "[695]\tvalid_0's rmse: 0.0260023\n", + "[696]\tvalid_0's rmse: 0.0260022\n", + "[697]\tvalid_0's rmse: 0.0260018\n", + "[698]\tvalid_0's rmse: 0.0260004\n", + "[699]\tvalid_0's rmse: 0.0259998\n", + "[700]\tvalid_0's rmse: 0.0259961\n", + "[701]\tvalid_0's rmse: 0.0259964\n", + "[702]\tvalid_0's rmse: 0.0259942\n", + "[703]\tvalid_0's rmse: 0.0259951\n", + "[704]\tvalid_0's rmse: 0.0259918\n", + "[705]\tvalid_0's rmse: 0.0259913\n", + "[706]\tvalid_0's rmse: 0.0259895\n", + "[707]\tvalid_0's rmse: 0.0259881\n", + "[708]\tvalid_0's rmse: 0.0259869\n", + "[709]\tvalid_0's rmse: 0.0259796\n", + "[710]\tvalid_0's rmse: 0.0259789\n", + "[711]\tvalid_0's rmse: 0.0259766\n", + "[712]\tvalid_0's rmse: 0.0259758\n", + "[713]\tvalid_0's rmse: 0.0259746\n", + "[714]\tvalid_0's rmse: 0.0259744\n", + "[715]\tvalid_0's rmse: 0.0259761\n", + "[716]\tvalid_0's rmse: 0.0259832\n", + "[717]\tvalid_0's rmse: 0.0259813\n", + "[718]\tvalid_0's rmse: 0.0259823\n", + "[719]\tvalid_0's rmse: 0.0259815\n", + "[720]\tvalid_0's rmse: 0.0259701\n", + "[721]\tvalid_0's rmse: 0.0259693\n", + "[722]\tvalid_0's rmse: 0.0259679\n", + "[723]\tvalid_0's rmse: 0.0259668\n", + "[724]\tvalid_0's rmse: 0.0259646\n", + "[725]\tvalid_0's rmse: 0.0259639\n", + "[726]\tvalid_0's rmse: 0.0259672\n", + "[727]\tvalid_0's rmse: 0.025969\n", + "[728]\tvalid_0's rmse: 0.0259709\n", + "[729]\tvalid_0's rmse: 0.0259705\n", + "[730]\tvalid_0's rmse: 0.0259611\n", + "[731]\tvalid_0's rmse: 0.0259601\n", + "[732]\tvalid_0's rmse: 0.0259605\n", + "[733]\tvalid_0's rmse: 0.02596\n", + "[734]\tvalid_0's rmse: 0.0259589\n", + "[735]\tvalid_0's rmse: 0.0259593\n", + "[736]\tvalid_0's rmse: 0.0259612\n", + "[737]\tvalid_0's rmse: 0.0259617\n", + "[738]\tvalid_0's rmse: 0.0259604\n", + "[739]\tvalid_0's rmse: 0.0259609\n", + "[740]\tvalid_0's rmse: 0.0259575\n", + "[741]\tvalid_0's rmse: 0.0259552\n", + "[742]\tvalid_0's rmse: 0.025958\n", + "[743]\tvalid_0's rmse: 0.0259575\n", + "[744]\tvalid_0's rmse: 0.0259551\n", + "[745]\tvalid_0's rmse: 0.0259555\n", + "[746]\tvalid_0's rmse: 0.0259564\n", + "[747]\tvalid_0's rmse: 0.0259554\n", + "[748]\tvalid_0's rmse: 0.0259536\n", + "[749]\tvalid_0's rmse: 0.0259524\n", + "[750]\tvalid_0's rmse: 0.0259526\n", + "[751]\tvalid_0's rmse: 0.0259521\n", + "[752]\tvalid_0's rmse: 0.0259515\n", + "[753]\tvalid_0's rmse: 0.0259512\n", + "[754]\tvalid_0's rmse: 0.0259504\n", + "[755]\tvalid_0's rmse: 0.0259508\n", + "[756]\tvalid_0's rmse: 0.0259495\n", + "[757]\tvalid_0's rmse: 0.0259432\n", + "[758]\tvalid_0's rmse: 0.0259428\n", + "[759]\tvalid_0's rmse: 0.0259422\n", + "[760]\tvalid_0's rmse: 0.0259443\n", + "[761]\tvalid_0's rmse: 0.0259459\n", + "[762]\tvalid_0's rmse: 0.0259443\n", + "[763]\tvalid_0's rmse: 0.0259442\n", + "[764]\tvalid_0's rmse: 0.0259432\n", + "[765]\tvalid_0's rmse: 0.025944\n", + "[766]\tvalid_0's rmse: 0.0259433\n", + "[767]\tvalid_0's rmse: 0.0259438\n", + "[768]\tvalid_0's rmse: 0.0259408\n", + "[769]\tvalid_0's rmse: 0.0259404\n", + "[770]\tvalid_0's rmse: 0.0259398\n", + "[771]\tvalid_0's rmse: 0.0259375\n", + "[772]\tvalid_0's rmse: 0.025935\n", + "[773]\tvalid_0's rmse: 0.0259347\n", + "[774]\tvalid_0's rmse: 0.0259332\n", + "[775]\tvalid_0's rmse: 0.0259335\n", + "[776]\tvalid_0's rmse: 0.0259349\n", + "[777]\tvalid_0's rmse: 0.0259345\n", + "[778]\tvalid_0's rmse: 0.0259353\n", + "[779]\tvalid_0's rmse: 0.0259353\n", + "[780]\tvalid_0's rmse: 0.0259354\n", + "[781]\tvalid_0's rmse: 0.025935\n", + "[782]\tvalid_0's rmse: 0.0259362\n", + "[783]\tvalid_0's rmse: 0.0259348\n", + "[784]\tvalid_0's rmse: 0.0259347\n", + "[785]\tvalid_0's rmse: 0.0259361\n", + "[786]\tvalid_0's rmse: 0.0259417\n", + "[787]\tvalid_0's rmse: 0.0259418\n", + "[788]\tvalid_0's rmse: 0.0259422\n", + "[789]\tvalid_0's rmse: 0.0259422\n", + "[790]\tvalid_0's rmse: 0.0259419\n", + "[791]\tvalid_0's rmse: 0.0259409\n", + "[792]\tvalid_0's rmse: 0.0259409\n", + "[793]\tvalid_0's rmse: 0.0259433\n", + "[794]\tvalid_0's rmse: 0.0259438\n", + "[795]\tvalid_0's rmse: 0.0259415\n", + "[796]\tvalid_0's rmse: 0.0259423\n", + "[797]\tvalid_0's rmse: 0.0259435\n", + "[798]\tvalid_0's rmse: 0.0259416\n", + "[799]\tvalid_0's rmse: 0.0259469\n", + "[800]\tvalid_0's rmse: 0.0259488\n", + "[801]\tvalid_0's rmse: 0.0259505\n", + "[802]\tvalid_0's rmse: 0.025947\n", + "[803]\tvalid_0's rmse: 0.0259453\n", + "[804]\tvalid_0's rmse: 0.0259434\n", + "[805]\tvalid_0's rmse: 0.0259429\n", + "[806]\tvalid_0's rmse: 0.0259445\n", + "[807]\tvalid_0's rmse: 0.0259469\n", + "[808]\tvalid_0's rmse: 0.0259436\n", + "[809]\tvalid_0's rmse: 0.0259414\n", + "[810]\tvalid_0's rmse: 0.0259419\n", + "[811]\tvalid_0's rmse: 0.0259498\n", + "[812]\tvalid_0's rmse: 0.0259524\n", + "[813]\tvalid_0's rmse: 0.025951\n", + "[814]\tvalid_0's rmse: 0.0259468\n", + "[815]\tvalid_0's rmse: 0.0259462\n", + "[816]\tvalid_0's rmse: 0.0259387\n", + "[817]\tvalid_0's rmse: 0.0259382\n", + "[818]\tvalid_0's rmse: 0.0259381\n", + "[819]\tvalid_0's rmse: 0.0259391\n", + "[820]\tvalid_0's rmse: 0.0259437\n", + "[821]\tvalid_0's rmse: 0.0259455\n", + "[822]\tvalid_0's rmse: 0.0259458\n", + "[823]\tvalid_0's rmse: 0.0259459\n", + "[824]\tvalid_0's rmse: 0.0259441\n", + "[825]\tvalid_0's rmse: 0.0259408\n", + "[826]\tvalid_0's rmse: 0.0259412\n", + "[827]\tvalid_0's rmse: 0.0259419\n", + "[828]\tvalid_0's rmse: 0.0259434\n", + "[829]\tvalid_0's rmse: 0.0259429\n", + "[830]\tvalid_0's rmse: 0.0259448\n", + "[831]\tvalid_0's rmse: 0.0259442\n", + "[832]\tvalid_0's rmse: 0.0259424\n", + "[833]\tvalid_0's rmse: 0.0259416\n", + "[834]\tvalid_0's rmse: 0.0259425\n", + "[835]\tvalid_0's rmse: 0.025941\n", + "[836]\tvalid_0's rmse: 0.02594\n", + "[837]\tvalid_0's rmse: 0.0259396\n", + "[838]\tvalid_0's rmse: 0.0259382\n", + "[839]\tvalid_0's rmse: 0.0259367\n", + "[840]\tvalid_0's rmse: 0.0259381\n", + "[841]\tvalid_0's rmse: 0.0259379\n", + "[842]\tvalid_0's rmse: 0.0259268\n", + "[843]\tvalid_0's rmse: 0.0259259\n", + "[844]\tvalid_0's rmse: 0.0259228\n", + "[845]\tvalid_0's rmse: 0.0259228\n", + "[846]\tvalid_0's rmse: 0.0259187\n", + "[847]\tvalid_0's rmse: 0.0259171\n", + "[848]\tvalid_0's rmse: 0.0259177\n", + "[849]\tvalid_0's rmse: 0.0259164\n", + "[850]\tvalid_0's rmse: 0.0259161\n", + "[851]\tvalid_0's rmse: 0.0259161\n", + "[852]\tvalid_0's rmse: 0.0259147\n", + "[853]\tvalid_0's rmse: 0.0259145\n", + "[854]\tvalid_0's rmse: 0.0259144\n", + "[855]\tvalid_0's rmse: 0.0259125\n", + "[856]\tvalid_0's rmse: 0.0259127\n", + "[857]\tvalid_0's rmse: 0.0259115\n", + "[858]\tvalid_0's rmse: 0.0259104\n", + "[859]\tvalid_0's rmse: 0.0259119\n", + "[860]\tvalid_0's rmse: 0.0259109\n", + "[861]\tvalid_0's rmse: 0.02591\n", + "[862]\tvalid_0's rmse: 0.0259099\n", + "[863]\tvalid_0's rmse: 0.0259097\n", + "[864]\tvalid_0's rmse: 0.0259133\n", + "[865]\tvalid_0's rmse: 0.0259116\n", + "[866]\tvalid_0's rmse: 0.0259111\n", + "[867]\tvalid_0's rmse: 0.0259095\n", + "[868]\tvalid_0's rmse: 0.0258982\n", + "[869]\tvalid_0's rmse: 0.0258979\n", + "[870]\tvalid_0's rmse: 0.0258956\n", + "[871]\tvalid_0's rmse: 0.0258967\n", + "[872]\tvalid_0's rmse: 0.0258972\n", + "[873]\tvalid_0's rmse: 0.0258971\n", + "[874]\tvalid_0's rmse: 0.0259015\n", + "[875]\tvalid_0's rmse: 0.0258999\n", + "[876]\tvalid_0's rmse: 0.0258987\n", + "[877]\tvalid_0's rmse: 0.0258987\n", + "[878]\tvalid_0's rmse: 0.0258985\n", + "[879]\tvalid_0's rmse: 0.0259\n", + "[880]\tvalid_0's rmse: 0.0259008\n", + "[881]\tvalid_0's rmse: 0.0259018\n", + "[882]\tvalid_0's rmse: 0.0259037\n", + "[883]\tvalid_0's rmse: 0.0259048\n", + "[884]\tvalid_0's rmse: 0.0259063\n", + "[885]\tvalid_0's rmse: 0.0259055\n", + "[886]\tvalid_0's rmse: 0.0259052\n", + "[887]\tvalid_0's rmse: 0.0259047\n", + "[888]\tvalid_0's rmse: 0.0259042\n", + "[889]\tvalid_0's rmse: 0.0259046\n", + "[890]\tvalid_0's rmse: 0.0259049\n", + "[891]\tvalid_0's rmse: 0.0259044\n", + "[892]\tvalid_0's rmse: 0.0259046\n", + "[893]\tvalid_0's rmse: 0.0259035\n", + "[894]\tvalid_0's rmse: 0.0259016\n", + "[895]\tvalid_0's rmse: 0.0259031\n", + "[896]\tvalid_0's rmse: 0.0259025\n", + "[897]\tvalid_0's rmse: 0.0259047\n", + "[898]\tvalid_0's rmse: 0.0259051\n", + "[899]\tvalid_0's rmse: 0.0259101\n", + "[900]\tvalid_0's rmse: 0.0259099\n", + "[901]\tvalid_0's rmse: 0.0259106\n", + "[902]\tvalid_0's rmse: 0.0259101\n", + "[903]\tvalid_0's rmse: 0.0259044\n", + "[904]\tvalid_0's rmse: 0.0259034\n", + "[905]\tvalid_0's rmse: 0.0259038\n", + "[906]\tvalid_0's rmse: 0.0259047\n", + "[907]\tvalid_0's rmse: 0.0259061\n", + "[908]\tvalid_0's rmse: 0.025906\n", + "[909]\tvalid_0's rmse: 0.025901\n", + "[910]\tvalid_0's rmse: 0.0258971\n", + "[911]\tvalid_0's rmse: 0.0258968\n", + "[912]\tvalid_0's rmse: 0.0258973\n", + "[913]\tvalid_0's rmse: 0.0258965\n", + "[914]\tvalid_0's rmse: 0.025898\n", + "[915]\tvalid_0's rmse: 0.0258982\n", + "[916]\tvalid_0's rmse: 0.0258981\n", + "[917]\tvalid_0's rmse: 0.0258952\n", + "[918]\tvalid_0's rmse: 0.0258949\n", + "[919]\tvalid_0's rmse: 0.0258947\n", + "[920]\tvalid_0's rmse: 0.0258959\n", + "[921]\tvalid_0's rmse: 0.0258954\n", + "[922]\tvalid_0's rmse: 0.0258947\n", + "[923]\tvalid_0's rmse: 0.0258946\n", + "[924]\tvalid_0's rmse: 0.0258931\n", + "[925]\tvalid_0's rmse: 0.0258945\n", + "[926]\tvalid_0's rmse: 0.0258925\n", + "[927]\tvalid_0's rmse: 0.0258899\n", + "[928]\tvalid_0's rmse: 0.0258898\n", + "[929]\tvalid_0's rmse: 0.0258914\n", + "[930]\tvalid_0's rmse: 0.0258912\n", + "[931]\tvalid_0's rmse: 0.025892\n", + "[932]\tvalid_0's rmse: 0.025893\n", + "[933]\tvalid_0's rmse: 0.0258918\n", + "[934]\tvalid_0's rmse: 0.0258882\n", + "[935]\tvalid_0's rmse: 0.0258882\n", + "[936]\tvalid_0's rmse: 0.0258871\n", + "[937]\tvalid_0's rmse: 0.0258879\n", + "[938]\tvalid_0's rmse: 0.0258857\n", + "[939]\tvalid_0's rmse: 0.0258855\n", + "[940]\tvalid_0's rmse: 0.0258856\n", + "[941]\tvalid_0's rmse: 0.0258855\n", + "[942]\tvalid_0's rmse: 0.0258857\n", + "[943]\tvalid_0's rmse: 0.0258857\n", + "[944]\tvalid_0's rmse: 0.0258861\n", + "[945]\tvalid_0's rmse: 0.0258858\n", + "[946]\tvalid_0's rmse: 0.0258865\n", + "[947]\tvalid_0's rmse: 0.0258875\n", + "[948]\tvalid_0's rmse: 0.0258872\n", + "[949]\tvalid_0's rmse: 0.0258872\n", + "[950]\tvalid_0's rmse: 0.0258866\n", + "[951]\tvalid_0's rmse: 0.0258888\n", + "[952]\tvalid_0's rmse: 0.0258892\n", + "[953]\tvalid_0's rmse: 0.0258835\n", + "[954]\tvalid_0's rmse: 0.0258817\n", + "[955]\tvalid_0's rmse: 0.0258817\n", + "[956]\tvalid_0's rmse: 0.0258786\n", + "[957]\tvalid_0's rmse: 0.0258788\n", + "[958]\tvalid_0's rmse: 0.0258788\n", + "[959]\tvalid_0's rmse: 0.0258798\n", + "[960]\tvalid_0's rmse: 0.0258797\n", + "[961]\tvalid_0's rmse: 0.0258797\n", + "[962]\tvalid_0's rmse: 0.0258776\n", + "[963]\tvalid_0's rmse: 0.0258773\n", + "[964]\tvalid_0's rmse: 0.025877\n", + "[965]\tvalid_0's rmse: 0.0258773\n", + "[966]\tvalid_0's rmse: 0.025879\n", + "[967]\tvalid_0's rmse: 0.0258802\n", + "[968]\tvalid_0's rmse: 0.0258794\n", + "[969]\tvalid_0's rmse: 0.02588\n", + "[970]\tvalid_0's rmse: 0.0258797\n", + "[971]\tvalid_0's rmse: 0.0258782\n", + "[972]\tvalid_0's rmse: 0.0258827\n", + "[973]\tvalid_0's rmse: 0.0258842\n", + "[974]\tvalid_0's rmse: 0.0258837\n", + "[975]\tvalid_0's rmse: 0.0258827\n", + "[976]\tvalid_0's rmse: 0.0258818\n", + "[977]\tvalid_0's rmse: 0.0258811\n", + "[978]\tvalid_0's rmse: 0.0258813\n", + "[979]\tvalid_0's rmse: 0.0258813\n", + "[980]\tvalid_0's rmse: 0.0258805\n", + "[981]\tvalid_0's rmse: 0.0258805\n", + "[982]\tvalid_0's rmse: 0.0258791\n", + "[983]\tvalid_0's rmse: 0.0258764\n", + "[984]\tvalid_0's rmse: 0.0258765\n", + "[985]\tvalid_0's rmse: 0.0258748\n", + "[986]\tvalid_0's rmse: 0.025877\n", + "[987]\tvalid_0's rmse: 0.025878\n", + "[988]\tvalid_0's rmse: 0.0258776\n", + "[989]\tvalid_0's rmse: 0.0258761\n", + "[990]\tvalid_0's rmse: 0.0258762\n", + "[991]\tvalid_0's rmse: 0.0258591\n", + "[992]\tvalid_0's rmse: 0.0258595\n", + "[993]\tvalid_0's rmse: 0.0258594\n", + "[994]\tvalid_0's rmse: 0.0258605\n", + "[995]\tvalid_0's rmse: 0.02586\n", + "[996]\tvalid_0's rmse: 0.0258582\n", + "[997]\tvalid_0's rmse: 0.0258576\n", + "[998]\tvalid_0's rmse: 0.0258556\n", + "[999]\tvalid_0's rmse: 0.0258562\n", + "[1000]\tvalid_0's rmse: 0.0258543\n", + "[1001]\tvalid_0's rmse: 0.0258523\n", + "[1002]\tvalid_0's rmse: 0.0258534\n", + "[1003]\tvalid_0's rmse: 0.0258537\n", + "[1004]\tvalid_0's rmse: 0.0258546\n", + "[1005]\tvalid_0's rmse: 0.0258533\n", + "[1006]\tvalid_0's rmse: 0.0258519\n", + "[1007]\tvalid_0's rmse: 0.0258508\n", + "[1008]\tvalid_0's rmse: 0.0258508\n", + "[1009]\tvalid_0's rmse: 0.0258509\n", + "[1010]\tvalid_0's rmse: 0.0258469\n", + "[1011]\tvalid_0's rmse: 0.025851\n", + "[1012]\tvalid_0's rmse: 0.0258512\n", + "[1013]\tvalid_0's rmse: 0.0258474\n", + "[1014]\tvalid_0's rmse: 0.0258468\n", + "[1015]\tvalid_0's rmse: 0.0258432\n", + "[1016]\tvalid_0's rmse: 0.0258409\n", + "[1017]\tvalid_0's rmse: 0.0258283\n", + "[1018]\tvalid_0's rmse: 0.0258284\n", + "[1019]\tvalid_0's rmse: 0.0258254\n", + "[1020]\tvalid_0's rmse: 0.0258244\n", + "[1021]\tvalid_0's rmse: 0.0258246\n", + "[1022]\tvalid_0's rmse: 0.0258249\n", + "[1023]\tvalid_0's rmse: 0.0258246\n", + "[1024]\tvalid_0's rmse: 0.0258215\n", + "[1025]\tvalid_0's rmse: 0.0258211\n", + "[1026]\tvalid_0's rmse: 0.0258215\n", + "[1027]\tvalid_0's rmse: 0.0258213\n", + "[1028]\tvalid_0's rmse: 0.0258215\n", + "[1029]\tvalid_0's rmse: 0.0258233\n", + "[1030]\tvalid_0's rmse: 0.0258232\n", + "[1031]\tvalid_0's rmse: 0.0258233\n", + "[1032]\tvalid_0's rmse: 0.0258191\n", + "[1033]\tvalid_0's rmse: 0.0258196\n", + "[1034]\tvalid_0's rmse: 0.0258169\n", + "[1035]\tvalid_0's rmse: 0.025816\n", + "[1036]\tvalid_0's rmse: 0.0258137\n", + "[1037]\tvalid_0's rmse: 0.0258143\n", + "[1038]\tvalid_0's rmse: 0.0258121\n", + "[1039]\tvalid_0's rmse: 0.0258055\n", + "[1040]\tvalid_0's rmse: 0.0258055\n", + "[1041]\tvalid_0's rmse: 0.0258079\n", + "[1042]\tvalid_0's rmse: 0.0258097\n", + "[1043]\tvalid_0's rmse: 0.0258097\n", + "[1044]\tvalid_0's rmse: 0.0258109\n", + "[1045]\tvalid_0's rmse: 0.0258118\n", + "[1046]\tvalid_0's rmse: 0.0258121\n", + "[1047]\tvalid_0's rmse: 0.0258112\n", + "[1048]\tvalid_0's rmse: 0.0258103\n", + "[1049]\tvalid_0's rmse: 0.0258102\n", + "[1050]\tvalid_0's rmse: 0.0258113\n", + "[1051]\tvalid_0's rmse: 0.0258119\n", + "[1052]\tvalid_0's rmse: 0.0258115\n", + "[1053]\tvalid_0's rmse: 0.0258116\n", + "[1054]\tvalid_0's rmse: 0.0258114\n", + "[1055]\tvalid_0's rmse: 0.0258098\n", + "[1056]\tvalid_0's rmse: 0.0258097\n", + "[1057]\tvalid_0's rmse: 0.0258085\n", + "[1058]\tvalid_0's rmse: 0.0258088\n", + "[1059]\tvalid_0's rmse: 0.0258058\n", + "[1060]\tvalid_0's rmse: 0.0258033\n", + "[1061]\tvalid_0's rmse: 0.0257999\n", + "[1062]\tvalid_0's rmse: 0.025795\n", + "[1063]\tvalid_0's rmse: 0.0257936\n", + "[1064]\tvalid_0's rmse: 0.0257928\n", + "[1065]\tvalid_0's rmse: 0.025793\n", + "[1066]\tvalid_0's rmse: 0.0257934\n", + "[1067]\tvalid_0's rmse: 0.0257928\n", + "[1068]\tvalid_0's rmse: 0.0257786\n", + "[1069]\tvalid_0's rmse: 0.0257783\n", + "[1070]\tvalid_0's rmse: 0.0257778\n", + "[1071]\tvalid_0's rmse: 0.025777\n", + "[1072]\tvalid_0's rmse: 0.0257782\n", + "[1073]\tvalid_0's rmse: 0.0257767\n", + "[1074]\tvalid_0's rmse: 0.0257763\n", + "[1075]\tvalid_0's rmse: 0.0257764\n", + "[1076]\tvalid_0's rmse: 0.025776\n", + "[1077]\tvalid_0's rmse: 0.0257776\n", + "[1078]\tvalid_0's rmse: 0.0257782\n", + "[1079]\tvalid_0's rmse: 0.0257782\n", + "[1080]\tvalid_0's rmse: 0.0257781\n", + "[1081]\tvalid_0's rmse: 0.025776\n", + "[1082]\tvalid_0's rmse: 0.0257761\n", + "[1083]\tvalid_0's rmse: 0.0257762\n", + "[1084]\tvalid_0's rmse: 0.0257773\n", + "[1085]\tvalid_0's rmse: 0.0257783\n", + "[1086]\tvalid_0's rmse: 0.0257785\n", + "[1087]\tvalid_0's rmse: 0.0257788\n", + "[1088]\tvalid_0's rmse: 0.0257792\n", + "[1089]\tvalid_0's rmse: 0.02578\n", + "[1090]\tvalid_0's rmse: 0.0257788\n", + "[1091]\tvalid_0's rmse: 0.0257776\n", + "[1092]\tvalid_0's rmse: 0.0257795\n", + "[1093]\tvalid_0's rmse: 0.0257788\n", + "[1094]\tvalid_0's rmse: 0.0257782\n", + "[1095]\tvalid_0's rmse: 0.025778\n", + "[1096]\tvalid_0's rmse: 0.0257811\n", + "[1097]\tvalid_0's rmse: 0.0257814\n", + "[1098]\tvalid_0's rmse: 0.0257792\n", + "[1099]\tvalid_0's rmse: 0.0257788\n", + "[1100]\tvalid_0's rmse: 0.0257798\n", + "[1101]\tvalid_0's rmse: 0.0257804\n", + "[1102]\tvalid_0's rmse: 0.0257804\n", + "[1103]\tvalid_0's rmse: 0.0257781\n", + "[1104]\tvalid_0's rmse: 0.0257786\n", + "[1105]\tvalid_0's rmse: 0.0257794\n", + "[1106]\tvalid_0's rmse: 0.0257793\n", + "[1107]\tvalid_0's rmse: 0.0257795\n", + "[1108]\tvalid_0's rmse: 0.0257792\n", + "[1109]\tvalid_0's rmse: 0.0257754\n", + "[1110]\tvalid_0's rmse: 0.0257772\n", + "[1111]\tvalid_0's rmse: 0.0257766\n", + "[1112]\tvalid_0's rmse: 0.0257761\n", + "[1113]\tvalid_0's rmse: 0.0257759\n", + "[1114]\tvalid_0's rmse: 0.0257754\n", + "[1115]\tvalid_0's rmse: 0.0257751\n", + "[1116]\tvalid_0's rmse: 0.0257731\n", + "[1117]\tvalid_0's rmse: 0.0257728\n", + "[1118]\tvalid_0's rmse: 0.0257725\n", + "[1119]\tvalid_0's rmse: 0.025771\n", + "[1120]\tvalid_0's rmse: 0.0257698\n", + "[1121]\tvalid_0's rmse: 0.0257699\n", + "[1122]\tvalid_0's rmse: 0.0257698\n", + "[1123]\tvalid_0's rmse: 0.0257685\n", + "[1124]\tvalid_0's rmse: 0.0257678\n", + "[1125]\tvalid_0's rmse: 0.0257679\n", + "[1126]\tvalid_0's rmse: 0.0257667\n", + "[1127]\tvalid_0's rmse: 0.0257669\n", + "[1128]\tvalid_0's rmse: 0.0257648\n", + "[1129]\tvalid_0's rmse: 0.0257647\n", + "[1130]\tvalid_0's rmse: 0.0257651\n", + "[1131]\tvalid_0's rmse: 0.0257653\n", + "[1132]\tvalid_0's rmse: 0.0257657\n", + "[1133]\tvalid_0's rmse: 0.0257652\n", + "[1134]\tvalid_0's rmse: 0.0257653\n", + "[1135]\tvalid_0's rmse: 0.0257593\n", + "[1136]\tvalid_0's rmse: 0.0257585\n", + "[1137]\tvalid_0's rmse: 0.0257583\n", + "[1138]\tvalid_0's rmse: 0.0257575\n", + "[1139]\tvalid_0's rmse: 0.0257571\n", + "[1140]\tvalid_0's rmse: 0.0257562\n", + "[1141]\tvalid_0's rmse: 0.0257562\n", + "[1142]\tvalid_0's rmse: 0.0257561\n", + "[1143]\tvalid_0's rmse: 0.025755\n", + "[1144]\tvalid_0's rmse: 0.025754\n", + "[1145]\tvalid_0's rmse: 0.0257534\n", + "[1146]\tvalid_0's rmse: 0.0257535\n", + "[1147]\tvalid_0's rmse: 0.0257503\n", + "[1148]\tvalid_0's rmse: 0.0257519\n", + "[1149]\tvalid_0's rmse: 0.0257486\n", + "[1150]\tvalid_0's rmse: 0.0257485\n", + "[1151]\tvalid_0's rmse: 0.0257492\n", + "[1152]\tvalid_0's rmse: 0.0257531\n", + "[1153]\tvalid_0's rmse: 0.0257529\n", + "[1154]\tvalid_0's rmse: 0.0257521\n", + "[1155]\tvalid_0's rmse: 0.0257517\n", + "[1156]\tvalid_0's rmse: 0.0257545\n", + "[1157]\tvalid_0's rmse: 0.0257556\n", + "[1158]\tvalid_0's rmse: 0.0257559\n", + "[1159]\tvalid_0's rmse: 0.0257578\n", + "[1160]\tvalid_0's rmse: 0.0257567\n", + "[1161]\tvalid_0's rmse: 0.0257569\n", + "[1162]\tvalid_0's rmse: 0.0257559\n", + "[1163]\tvalid_0's rmse: 0.0257577\n", + "[1164]\tvalid_0's rmse: 0.0257551\n", + "[1165]\tvalid_0's rmse: 0.025756\n", + "[1166]\tvalid_0's rmse: 0.0257558\n", + "[1167]\tvalid_0's rmse: 0.0257561\n", + "[1168]\tvalid_0's rmse: 0.0257562\n", + "[1169]\tvalid_0's rmse: 0.0257558\n", + "[1170]\tvalid_0's rmse: 0.0257527\n", + "[1171]\tvalid_0's rmse: 0.0257479\n", + "[1172]\tvalid_0's rmse: 0.0257481\n", + "[1173]\tvalid_0's rmse: 0.0257445\n", + "[1174]\tvalid_0's rmse: 0.0257442\n", + "[1175]\tvalid_0's rmse: 0.0257454\n", + "[1176]\tvalid_0's rmse: 0.0257446\n", + "[1177]\tvalid_0's rmse: 0.0257455\n", + "[1178]\tvalid_0's rmse: 0.0257465\n", + "[1179]\tvalid_0's rmse: 0.0257483\n", + "[1180]\tvalid_0's rmse: 0.0257494\n", + "[1181]\tvalid_0's rmse: 0.025749\n", + "[1182]\tvalid_0's rmse: 0.0257492\n", + "[1183]\tvalid_0's rmse: 0.0257497\n", + "[1184]\tvalid_0's rmse: 0.02575\n", + "[1185]\tvalid_0's rmse: 0.0257441\n", + "[1186]\tvalid_0's rmse: 0.0257412\n", + "[1187]\tvalid_0's rmse: 0.0257376\n", + "[1188]\tvalid_0's rmse: 0.025734\n", + "[1189]\tvalid_0's rmse: 0.0257333\n", + "[1190]\tvalid_0's rmse: 0.0257326\n", + "[1191]\tvalid_0's rmse: 0.0257325\n", + "[1192]\tvalid_0's rmse: 0.0257347\n", + "[1193]\tvalid_0's rmse: 0.0257189\n", + "[1194]\tvalid_0's rmse: 0.0257085\n", + "[1195]\tvalid_0's rmse: 0.0257073\n", + "[1196]\tvalid_0's rmse: 0.025707\n", + "[1197]\tvalid_0's rmse: 0.0257055\n", + "[1198]\tvalid_0's rmse: 0.0257056\n", + "[1199]\tvalid_0's rmse: 0.0257043\n", + "[1200]\tvalid_0's rmse: 0.0257063\n", + "[1201]\tvalid_0's rmse: 0.0257056\n", + "[1202]\tvalid_0's rmse: 0.0257059\n", + "[1203]\tvalid_0's rmse: 0.0257041\n", + "[1204]\tvalid_0's rmse: 0.0257018\n", + "[1205]\tvalid_0's rmse: 0.025702\n", + "[1206]\tvalid_0's rmse: 0.0257017\n", + "[1207]\tvalid_0's rmse: 0.0256966\n", + "[1208]\tvalid_0's rmse: 0.0256931\n", + "[1209]\tvalid_0's rmse: 0.0256931\n", + "[1210]\tvalid_0's rmse: 0.025693\n", + "[1211]\tvalid_0's rmse: 0.0256934\n", + "[1212]\tvalid_0's rmse: 0.0256969\n", + "[1213]\tvalid_0's rmse: 0.0256973\n", + "[1214]\tvalid_0's rmse: 0.0256982\n", + "[1215]\tvalid_0's rmse: 0.0256965\n", + "[1216]\tvalid_0's rmse: 0.0256955\n", + "[1217]\tvalid_0's rmse: 0.0256956\n", + "[1218]\tvalid_0's rmse: 0.0256956\n", + "[1219]\tvalid_0's rmse: 0.0256943\n", + "[1220]\tvalid_0's rmse: 0.0256932\n", + "[1221]\tvalid_0's rmse: 0.0256944\n", + "[1222]\tvalid_0's rmse: 0.0256935\n", + "[1223]\tvalid_0's rmse: 0.0256947\n", + "[1224]\tvalid_0's rmse: 0.0256951\n", + "[1225]\tvalid_0's rmse: 0.0256953\n", + "[1226]\tvalid_0's rmse: 0.0256967\n", + "[1227]\tvalid_0's rmse: 0.0256974\n", + "[1228]\tvalid_0's rmse: 0.0256971\n", + "[1229]\tvalid_0's rmse: 0.025697\n", + "[1230]\tvalid_0's rmse: 0.0256973\n", + "[1231]\tvalid_0's rmse: 0.0256971\n", + "[1232]\tvalid_0's rmse: 0.0256976\n", + "[1233]\tvalid_0's rmse: 0.0256976\n", + "[1234]\tvalid_0's rmse: 0.025696\n", + "[1235]\tvalid_0's rmse: 0.0256965\n", + "[1236]\tvalid_0's rmse: 0.0256961\n", + "[1237]\tvalid_0's rmse: 0.0256962\n", + "[1238]\tvalid_0's rmse: 0.0256996\n", + "[1239]\tvalid_0's rmse: 0.0257003\n", + "[1240]\tvalid_0's rmse: 0.0257023\n", + "[1241]\tvalid_0's rmse: 0.0257018\n", + "[1242]\tvalid_0's rmse: 0.0257016\n", + "[1243]\tvalid_0's rmse: 0.0257023\n", + "[1244]\tvalid_0's rmse: 0.0257013\n", + "[1245]\tvalid_0's rmse: 0.0256968\n", + "[1246]\tvalid_0's rmse: 0.0256967\n", + "[1247]\tvalid_0's rmse: 0.0256935\n", + "[1248]\tvalid_0's rmse: 0.0256932\n", + "[1249]\tvalid_0's rmse: 0.0256959\n", + "[1250]\tvalid_0's rmse: 0.025695\n", + "[1251]\tvalid_0's rmse: 0.025695\n", + "[1252]\tvalid_0's rmse: 0.0256954\n", + "[1253]\tvalid_0's rmse: 0.0256932\n", + "[1254]\tvalid_0's rmse: 0.0256933\n", + "[1255]\tvalid_0's rmse: 0.0256942\n", + "[1256]\tvalid_0's rmse: 0.0256929\n", + "[1257]\tvalid_0's rmse: 0.0256918\n", + "[1258]\tvalid_0's rmse: 0.0256916\n", + "[1259]\tvalid_0's rmse: 0.0256913\n", + "[1260]\tvalid_0's rmse: 0.0256924\n", + "[1261]\tvalid_0's rmse: 0.0256909\n", + "[1262]\tvalid_0's rmse: 0.0256907\n", + "[1263]\tvalid_0's rmse: 0.0256914\n", + "[1264]\tvalid_0's rmse: 0.0256819\n", + "[1265]\tvalid_0's rmse: 0.0256823\n", + "[1266]\tvalid_0's rmse: 0.0256822\n", + "[1267]\tvalid_0's rmse: 0.0256828\n", + "[1268]\tvalid_0's rmse: 0.025683\n", + "[1269]\tvalid_0's rmse: 0.0256841\n", + "[1270]\tvalid_0's rmse: 0.0256839\n", + "[1271]\tvalid_0's rmse: 0.0256837\n", + "[1272]\tvalid_0's rmse: 0.0256835\n", + "[1273]\tvalid_0's rmse: 0.0256819\n", + "[1274]\tvalid_0's rmse: 0.0256814\n", + "[1275]\tvalid_0's rmse: 0.0256859\n", + "[1276]\tvalid_0's rmse: 0.0256845\n", + "[1277]\tvalid_0's rmse: 0.0256854\n", + "[1278]\tvalid_0's rmse: 0.0256899\n", + "[1279]\tvalid_0's rmse: 0.0256912\n", + "[1280]\tvalid_0's rmse: 0.0256951\n", + "[1281]\tvalid_0's rmse: 0.0256952\n", + "[1282]\tvalid_0's rmse: 0.0256956\n", + "[1283]\tvalid_0's rmse: 0.0256958\n", + "[1284]\tvalid_0's rmse: 0.0256956\n", + "[1285]\tvalid_0's rmse: 0.025695\n", + "[1286]\tvalid_0's rmse: 0.0256955\n", + "[1287]\tvalid_0's rmse: 0.0256955\n", + "[1288]\tvalid_0's rmse: 0.0256966\n", + "[1289]\tvalid_0's rmse: 0.0256969\n", + "[1290]\tvalid_0's rmse: 0.0256961\n", + "[1291]\tvalid_0's rmse: 0.0256955\n", + "[1292]\tvalid_0's rmse: 0.025695\n", + "[1293]\tvalid_0's rmse: 0.0256959\n", + "[1294]\tvalid_0's rmse: 0.0256953\n", + "[1295]\tvalid_0's rmse: 0.0256943\n", + "[1296]\tvalid_0's rmse: 0.0256935\n", + "[1297]\tvalid_0's rmse: 0.0256928\n", + "[1298]\tvalid_0's rmse: 0.0256922\n", + "[1299]\tvalid_0's rmse: 0.0256921\n", + "[1300]\tvalid_0's rmse: 0.0256929\n", + "[1301]\tvalid_0's rmse: 0.0256929\n", + "[1302]\tvalid_0's rmse: 0.0256922\n", + "[1303]\tvalid_0's rmse: 0.0256922\n", + "[1304]\tvalid_0's rmse: 0.0256903\n", + "[1305]\tvalid_0's rmse: 0.0256902\n", + "[1306]\tvalid_0's rmse: 0.025689\n", + "[1307]\tvalid_0's rmse: 0.0256867\n", + "[1308]\tvalid_0's rmse: 0.025687\n", + "[1309]\tvalid_0's rmse: 0.0256871\n", + "[1310]\tvalid_0's rmse: 0.0256871\n", + "[1311]\tvalid_0's rmse: 0.0256937\n", + "[1312]\tvalid_0's rmse: 0.0256927\n", + "[1313]\tvalid_0's rmse: 0.0256883\n", + "[1314]\tvalid_0's rmse: 0.0256881\n", + "[1315]\tvalid_0's rmse: 0.0256876\n", + "[1316]\tvalid_0's rmse: 0.0256871\n", + "[1317]\tvalid_0's rmse: 0.025685\n", + "[1318]\tvalid_0's rmse: 0.0256843\n", + "[1319]\tvalid_0's rmse: 0.0256852\n", + "[1320]\tvalid_0's rmse: 0.0256852\n", + "[1321]\tvalid_0's rmse: 0.0256852\n", + "[1322]\tvalid_0's rmse: 0.0256842\n", + "[1323]\tvalid_0's rmse: 0.0256825\n", + "[1324]\tvalid_0's rmse: 0.0256824\n", + "[1325]\tvalid_0's rmse: 0.0256792\n", + "[1326]\tvalid_0's rmse: 0.0256781\n", + "[1327]\tvalid_0's rmse: 0.0256776\n", + "[1328]\tvalid_0's rmse: 0.0256776\n", + "[1329]\tvalid_0's rmse: 0.0256782\n", + "[1330]\tvalid_0's rmse: 0.0256781\n", + "[1331]\tvalid_0's rmse: 0.0256777\n", + "[1332]\tvalid_0's rmse: 0.0256777\n", + "[1333]\tvalid_0's rmse: 0.0256772\n", + "[1334]\tvalid_0's rmse: 0.025677\n", + "[1335]\tvalid_0's rmse: 0.0256771\n", + "[1336]\tvalid_0's rmse: 0.0256768\n", + "[1337]\tvalid_0's rmse: 0.0256775\n", + "[1338]\tvalid_0's rmse: 0.0256776\n", + "[1339]\tvalid_0's rmse: 0.0256774\n", + "[1340]\tvalid_0's rmse: 0.0256753\n", + "[1341]\tvalid_0's rmse: 0.0256751\n", + "[1342]\tvalid_0's rmse: 0.0256747\n", + "[1343]\tvalid_0's rmse: 0.0256749\n", + "[1344]\tvalid_0's rmse: 0.0256746\n", + "[1345]\tvalid_0's rmse: 0.0256722\n", + "[1346]\tvalid_0's rmse: 0.0256697\n", + "[1347]\tvalid_0's rmse: 0.0256704\n", + "[1348]\tvalid_0's rmse: 0.0256681\n", + "[1349]\tvalid_0's rmse: 0.025668\n", + "[1350]\tvalid_0's rmse: 0.0256667\n", + "[1351]\tvalid_0's rmse: 0.0256684\n", + "[1352]\tvalid_0's rmse: 0.0256685\n", + "[1353]\tvalid_0's rmse: 0.0256673\n", + "[1354]\tvalid_0's rmse: 0.0256673\n", + "[1355]\tvalid_0's rmse: 0.025667\n", + "[1356]\tvalid_0's rmse: 0.0256675\n", + "[1357]\tvalid_0's rmse: 0.0256686\n", + "[1358]\tvalid_0's rmse: 0.0256681\n", + "[1359]\tvalid_0's rmse: 0.0256681\n", + "[1360]\tvalid_0's rmse: 0.0256682\n", + "[1361]\tvalid_0's rmse: 0.025668\n", + "[1362]\tvalid_0's rmse: 0.0256671\n", + "[1363]\tvalid_0's rmse: 0.0256675\n", + "[1364]\tvalid_0's rmse: 0.0256638\n", + "[1365]\tvalid_0's rmse: 0.0256638\n", + "[1366]\tvalid_0's rmse: 0.0256526\n", + "[1367]\tvalid_0's rmse: 0.0256534\n", + "[1368]\tvalid_0's rmse: 0.0256534\n", + "[1369]\tvalid_0's rmse: 0.025653\n", + "[1370]\tvalid_0's rmse: 0.0256528\n", + "[1371]\tvalid_0's rmse: 0.0256532\n", + "[1372]\tvalid_0's rmse: 0.025647\n", + "[1373]\tvalid_0's rmse: 0.0256454\n", + "[1374]\tvalid_0's rmse: 0.0256457\n", + "[1375]\tvalid_0's rmse: 0.0256426\n", + "[1376]\tvalid_0's rmse: 0.0256425\n", + "[1377]\tvalid_0's rmse: 0.0256441\n", + "[1378]\tvalid_0's rmse: 0.0256431\n", + "[1379]\tvalid_0's rmse: 0.0256452\n", + "[1380]\tvalid_0's rmse: 0.0256455\n", + "[1381]\tvalid_0's rmse: 0.0256454\n", + "[1382]\tvalid_0's rmse: 0.0256441\n", + "[1383]\tvalid_0's rmse: 0.0256446\n", + "[1384]\tvalid_0's rmse: 0.0256443\n", + "[1385]\tvalid_0's rmse: 0.0256444\n", + "[1386]\tvalid_0's rmse: 0.0256445\n", + "[1387]\tvalid_0's rmse: 0.0256436\n", + "[1388]\tvalid_0's rmse: 0.0256418\n", + "[1389]\tvalid_0's rmse: 0.0256422\n", + "[1390]\tvalid_0's rmse: 0.0256363\n", + "[1391]\tvalid_0's rmse: 0.0256359\n", + "[1392]\tvalid_0's rmse: 0.0256348\n", + "[1393]\tvalid_0's rmse: 0.0256345\n", + "[1394]\tvalid_0's rmse: 0.0256347\n", + "[1395]\tvalid_0's rmse: 0.025635\n", + "[1396]\tvalid_0's rmse: 0.0256333\n", + "[1397]\tvalid_0's rmse: 0.025633\n", + "[1398]\tvalid_0's rmse: 0.025633\n", + "[1399]\tvalid_0's rmse: 0.0256312\n", + "[1400]\tvalid_0's rmse: 0.025631\n", + "[1401]\tvalid_0's rmse: 0.025631\n", + "[1402]\tvalid_0's rmse: 0.0256313\n", + "[1403]\tvalid_0's rmse: 0.025627\n", + "[1404]\tvalid_0's rmse: 0.0256275\n", + "[1405]\tvalid_0's rmse: 0.0256277\n", + "[1406]\tvalid_0's rmse: 0.0256274\n", + "[1407]\tvalid_0's rmse: 0.0256277\n", + "[1408]\tvalid_0's rmse: 0.0256266\n", + "[1409]\tvalid_0's rmse: 0.025626\n", + "[1410]\tvalid_0's rmse: 0.0256258\n", + "[1411]\tvalid_0's rmse: 0.0256246\n", + "[1412]\tvalid_0's rmse: 0.0256245\n", + "[1413]\tvalid_0's rmse: 0.0256243\n", + "[1414]\tvalid_0's rmse: 0.0256237\n", + "[1415]\tvalid_0's rmse: 0.0256244\n", + "[1416]\tvalid_0's rmse: 0.0256238\n", + "[1417]\tvalid_0's rmse: 0.0256171\n", + "[1418]\tvalid_0's rmse: 0.0256115\n", + "[1419]\tvalid_0's rmse: 0.0256106\n", + "[1420]\tvalid_0's rmse: 0.0256105\n", + "[1421]\tvalid_0's rmse: 0.02561\n", + "[1422]\tvalid_0's rmse: 0.0256113\n", + "[1423]\tvalid_0's rmse: 0.0256111\n", + "[1424]\tvalid_0's rmse: 0.025611\n", + "[1425]\tvalid_0's rmse: 0.0256113\n", + "[1426]\tvalid_0's rmse: 0.0256108\n", + "[1427]\tvalid_0's rmse: 0.0256105\n", + "[1428]\tvalid_0's rmse: 0.0256095\n", + "[1429]\tvalid_0's rmse: 0.0256065\n", + "[1430]\tvalid_0's rmse: 0.0256062\n", + "[1431]\tvalid_0's rmse: 0.025607\n", + "[1432]\tvalid_0's rmse: 0.0256074\n", + "[1433]\tvalid_0's rmse: 0.025607\n", + "[1434]\tvalid_0's rmse: 0.0256081\n", + "[1435]\tvalid_0's rmse: 0.0256045\n", + "[1436]\tvalid_0's rmse: 0.0256057\n", + "[1437]\tvalid_0's rmse: 0.0256067\n", + "[1438]\tvalid_0's rmse: 0.0256063\n", + "[1439]\tvalid_0's rmse: 0.0256066\n", + "[1440]\tvalid_0's rmse: 0.0256061\n", + "[1441]\tvalid_0's rmse: 0.025605\n", + "[1442]\tvalid_0's rmse: 0.0256045\n", + "[1443]\tvalid_0's rmse: 0.0256032\n", + "[1444]\tvalid_0's rmse: 0.0256063\n", + "[1445]\tvalid_0's rmse: 0.0256076\n", + "[1446]\tvalid_0's rmse: 0.025608\n", + "[1447]\tvalid_0's rmse: 0.0256077\n", + "[1448]\tvalid_0's rmse: 0.0256093\n", + "[1449]\tvalid_0's rmse: 0.0256077\n", + "[1450]\tvalid_0's rmse: 0.0256074\n", + "[1451]\tvalid_0's rmse: 0.0256078\n", + "[1452]\tvalid_0's rmse: 0.025608\n", + "[1453]\tvalid_0's rmse: 0.0256081\n", + "[1454]\tvalid_0's rmse: 0.0256081\n", + "[1455]\tvalid_0's rmse: 0.0256079\n", + "[1456]\tvalid_0's rmse: 0.0256087\n", + "[1457]\tvalid_0's rmse: 0.0256062\n", + "[1458]\tvalid_0's rmse: 0.025602\n", + "[1459]\tvalid_0's rmse: 0.0256021\n", + "[1460]\tvalid_0's rmse: 0.0256041\n", + "[1461]\tvalid_0's rmse: 0.0256042\n", + "[1462]\tvalid_0's rmse: 0.025605\n", + "[1463]\tvalid_0's rmse: 0.0256056\n", + "[1464]\tvalid_0's rmse: 0.0256053\n", + "[1465]\tvalid_0's rmse: 0.0256077\n", + "[1466]\tvalid_0's rmse: 0.0256076\n", + "[1467]\tvalid_0's rmse: 0.0256083\n", + "[1468]\tvalid_0's rmse: 0.0256082\n", + "[1469]\tvalid_0's rmse: 0.0256074\n", + "[1470]\tvalid_0's rmse: 0.0256074\n", + "[1471]\tvalid_0's rmse: 0.025608\n", + "[1472]\tvalid_0's rmse: 0.0256081\n", + "[1473]\tvalid_0's rmse: 0.0256084\n", + "[1474]\tvalid_0's rmse: 0.0256081\n", + "[1475]\tvalid_0's rmse: 0.0256084\n", + "[1476]\tvalid_0's rmse: 0.0256083\n", + "[1477]\tvalid_0's rmse: 0.0256086\n", + "[1478]\tvalid_0's rmse: 0.0256084\n", + "[1479]\tvalid_0's rmse: 0.025608\n", + "[1480]\tvalid_0's rmse: 0.02561\n", + "[1481]\tvalid_0's rmse: 0.0256062\n", + "[1482]\tvalid_0's rmse: 0.0256062\n", + "[1483]\tvalid_0's rmse: 0.0256062\n", + "[1484]\tvalid_0's rmse: 0.0256056\n", + "[1485]\tvalid_0's rmse: 0.0256048\n", + "[1486]\tvalid_0's rmse: 0.0256054\n", + "[1487]\tvalid_0's rmse: 0.025605\n", + "[1488]\tvalid_0's rmse: 0.0256026\n", + "[1489]\tvalid_0's rmse: 0.0255999\n", + "[1490]\tvalid_0's rmse: 0.0255993\n", + "[1491]\tvalid_0's rmse: 0.0255995\n", + "[1492]\tvalid_0's rmse: 0.0256009\n", + "[1493]\tvalid_0's rmse: 0.0256006\n", + "[1494]\tvalid_0's rmse: 0.0256027\n", + "[1495]\tvalid_0's rmse: 0.0256021\n", + "[1496]\tvalid_0's rmse: 0.0256017\n", + "[1497]\tvalid_0's rmse: 0.0256016\n", + "[1498]\tvalid_0's rmse: 0.0256018\n", + "[1499]\tvalid_0's rmse: 0.0256011\n", + "[1500]\tvalid_0's rmse: 0.025602\n", + "[1501]\tvalid_0's rmse: 0.0256019\n", + "[1502]\tvalid_0's rmse: 0.025602\n", + "[1503]\tvalid_0's rmse: 0.0256027\n", + "[1504]\tvalid_0's rmse: 0.0255921\n", + "[1505]\tvalid_0's rmse: 0.0255919\n", + "[1506]\tvalid_0's rmse: 0.025592\n", + "[1507]\tvalid_0's rmse: 0.0255918\n", + "[1508]\tvalid_0's rmse: 0.0255914\n", + "[1509]\tvalid_0's rmse: 0.0255913\n", + "[1510]\tvalid_0's rmse: 0.0255907\n", + "[1511]\tvalid_0's rmse: 0.0255905\n", + "[1512]\tvalid_0's rmse: 0.0255883\n", + "[1513]\tvalid_0's rmse: 0.0255877\n", + "[1514]\tvalid_0's rmse: 0.025587\n", + "[1515]\tvalid_0's rmse: 0.0255873\n", + "[1516]\tvalid_0's rmse: 0.025587\n", + "[1517]\tvalid_0's rmse: 0.0255872\n", + "[1518]\tvalid_0's rmse: 0.0255876\n", + "[1519]\tvalid_0's rmse: 0.0255883\n", + "[1520]\tvalid_0's rmse: 0.0255884\n", + "[1521]\tvalid_0's rmse: 0.0255852\n", + "[1522]\tvalid_0's rmse: 0.0255853\n", + "[1523]\tvalid_0's rmse: 0.0255852\n", + "[1524]\tvalid_0's rmse: 0.0255875\n", + "[1525]\tvalid_0's rmse: 0.025588\n", + "[1526]\tvalid_0's rmse: 0.0255894\n", + "[1527]\tvalid_0's rmse: 0.0255891\n", + "[1528]\tvalid_0's rmse: 0.0255891\n", + "[1529]\tvalid_0's rmse: 0.0255892\n", + "[1530]\tvalid_0's rmse: 0.0255908\n", + "[1531]\tvalid_0's rmse: 0.0255902\n", + "[1532]\tvalid_0's rmse: 0.0255903\n", + "[1533]\tvalid_0's rmse: 0.0255905\n", + "[1534]\tvalid_0's rmse: 0.0255906\n", + "[1535]\tvalid_0's rmse: 0.0255913\n", + "[1536]\tvalid_0's rmse: 0.0255906\n", + "[1537]\tvalid_0's rmse: 0.0255919\n", + "[1538]\tvalid_0's rmse: 0.0255919\n", + "[1539]\tvalid_0's rmse: 0.0255936\n", + "[1540]\tvalid_0's rmse: 0.025594\n", + "[1541]\tvalid_0's rmse: 0.0255927\n", + "[1542]\tvalid_0's rmse: 0.0255924\n", + "[1543]\tvalid_0's rmse: 0.0255929\n", + "[1544]\tvalid_0's rmse: 0.0255937\n", + "[1545]\tvalid_0's rmse: 0.0255927\n", + "[1546]\tvalid_0's rmse: 0.025592\n", + "[1547]\tvalid_0's rmse: 0.0255914\n", + "[1548]\tvalid_0's rmse: 0.0255914\n", + "[1549]\tvalid_0's rmse: 0.0255913\n", + "[1550]\tvalid_0's rmse: 0.0255909\n", + "[1551]\tvalid_0's rmse: 0.0255915\n", + "[1552]\tvalid_0's rmse: 0.0255916\n", + "[1553]\tvalid_0's rmse: 0.0255916\n", + "[1554]\tvalid_0's rmse: 0.0255915\n", + "[1555]\tvalid_0's rmse: 0.0255921\n", + "[1556]\tvalid_0's rmse: 0.0255909\n", + "[1557]\tvalid_0's rmse: 0.0255908\n", + "[1558]\tvalid_0's rmse: 0.0255916\n", + "[1559]\tvalid_0's rmse: 0.0255904\n", + "[1560]\tvalid_0's rmse: 0.0255898\n", + "[1561]\tvalid_0's rmse: 0.0255908\n", + "[1562]\tvalid_0's rmse: 0.0255909\n", + "[1563]\tvalid_0's rmse: 0.0255911\n", + "[1564]\tvalid_0's rmse: 0.0255908\n", + "[1565]\tvalid_0's rmse: 0.0255928\n", + "[1566]\tvalid_0's rmse: 0.0255909\n", + "[1567]\tvalid_0's rmse: 0.0255908\n", + "[1568]\tvalid_0's rmse: 0.0255925\n", + "[1569]\tvalid_0's rmse: 0.0255903\n", + "[1570]\tvalid_0's rmse: 0.0255904\n", + "[1571]\tvalid_0's rmse: 0.0255902\n", + "[1572]\tvalid_0's rmse: 0.0255895\n", + "[1573]\tvalid_0's rmse: 0.0255941\n", + "[1574]\tvalid_0's rmse: 0.025596\n", + "[1575]\tvalid_0's rmse: 0.0255966\n", + "[1576]\tvalid_0's rmse: 0.0255966\n", + "[1577]\tvalid_0's rmse: 0.0255965\n", + "[1578]\tvalid_0's rmse: 0.0255957\n", + "[1579]\tvalid_0's rmse: 0.0255949\n", + "[1580]\tvalid_0's rmse: 0.0255931\n", + "[1581]\tvalid_0's rmse: 0.0255936\n", + "[1582]\tvalid_0's rmse: 0.0255936\n", + "[1583]\tvalid_0's rmse: 0.0255941\n", + "[1584]\tvalid_0's rmse: 0.0255942\n", + "[1585]\tvalid_0's rmse: 0.0255976\n", + "[1586]\tvalid_0's rmse: 0.0255974\n", + "[1587]\tvalid_0's rmse: 0.0255956\n", + "[1588]\tvalid_0's rmse: 0.025595\n", + "[1589]\tvalid_0's rmse: 0.0255943\n", + "[1590]\tvalid_0's rmse: 0.0255946\n", + "[1591]\tvalid_0's rmse: 0.0255945\n", + "[1592]\tvalid_0's rmse: 0.0255938\n", + "[1593]\tvalid_0's rmse: 0.0255907\n", + "[1594]\tvalid_0's rmse: 0.0255832\n", + "[1595]\tvalid_0's rmse: 0.0255833\n", + "[1596]\tvalid_0's rmse: 0.0255824\n", + "[1597]\tvalid_0's rmse: 0.025583\n", + "[1598]\tvalid_0's rmse: 0.0255812\n", + "[1599]\tvalid_0's rmse: 0.0255811\n", + "[1600]\tvalid_0's rmse: 0.0255808\n", + "[1601]\tvalid_0's rmse: 0.0255761\n", + "[1602]\tvalid_0's rmse: 0.0255687\n", + "[1603]\tvalid_0's rmse: 0.0255698\n", + "[1604]\tvalid_0's rmse: 0.0255697\n", + "[1605]\tvalid_0's rmse: 0.0255691\n", + "[1606]\tvalid_0's rmse: 0.0255697\n", + "[1607]\tvalid_0's rmse: 0.0255554\n", + "[1608]\tvalid_0's rmse: 0.0255555\n", + "[1609]\tvalid_0's rmse: 0.0255572\n", + "[1610]\tvalid_0's rmse: 0.0255572\n", + "[1611]\tvalid_0's rmse: 0.0255571\n", + "[1612]\tvalid_0's rmse: 0.0255571\n", + "[1613]\tvalid_0's rmse: 0.0255573\n", + "[1614]\tvalid_0's rmse: 0.0255553\n", + "[1615]\tvalid_0's rmse: 0.0255563\n", + "[1616]\tvalid_0's rmse: 0.0255559\n", + "[1617]\tvalid_0's rmse: 0.0255553\n", + "[1618]\tvalid_0's rmse: 0.0255544\n", + "[1619]\tvalid_0's rmse: 0.0255544\n", + "[1620]\tvalid_0's rmse: 0.0255537\n", + "[1621]\tvalid_0's rmse: 0.0255486\n", + "[1622]\tvalid_0's rmse: 0.0255496\n", + "[1623]\tvalid_0's rmse: 0.0255495\n", + "[1624]\tvalid_0's rmse: 0.0255509\n", + "[1625]\tvalid_0's rmse: 0.0255513\n", + "[1626]\tvalid_0's rmse: 0.0255499\n", + "[1627]\tvalid_0's rmse: 0.0255497\n", + "[1628]\tvalid_0's rmse: 0.0255489\n", + "[1629]\tvalid_0's rmse: 0.0255457\n", + "[1630]\tvalid_0's rmse: 0.0255384\n", + "[1631]\tvalid_0's rmse: 0.0255383\n", + "[1632]\tvalid_0's rmse: 0.0255377\n", + "[1633]\tvalid_0's rmse: 0.025538\n", + "[1634]\tvalid_0's rmse: 0.0255383\n", + "[1635]\tvalid_0's rmse: 0.0255381\n", + "[1636]\tvalid_0's rmse: 0.0255379\n", + "[1637]\tvalid_0's rmse: 0.0255386\n", + "[1638]\tvalid_0's rmse: 0.0255391\n", + "[1639]\tvalid_0's rmse: 0.0255386\n", + "[1640]\tvalid_0's rmse: 0.0255322\n", + "[1641]\tvalid_0's rmse: 0.0255328\n", + "[1642]\tvalid_0's rmse: 0.0255273\n", + "[1643]\tvalid_0's rmse: 0.0255264\n", + "[1644]\tvalid_0's rmse: 0.0255262\n", + "[1645]\tvalid_0's rmse: 0.0255239\n", + "[1646]\tvalid_0's rmse: 0.0255234\n", + "[1647]\tvalid_0's rmse: 0.0255245\n", + "[1648]\tvalid_0's rmse: 0.0255188\n", + "[1649]\tvalid_0's rmse: 0.0255174\n", + "[1650]\tvalid_0's rmse: 0.0255231\n", + "[1651]\tvalid_0's rmse: 0.0255231\n", + "[1652]\tvalid_0's rmse: 0.0255237\n", + "[1653]\tvalid_0's rmse: 0.0255217\n", + "[1654]\tvalid_0's rmse: 0.025521\n", + "[1655]\tvalid_0's rmse: 0.0255201\n", + "[1656]\tvalid_0's rmse: 0.02552\n", + "[1657]\tvalid_0's rmse: 0.0255204\n", + "[1658]\tvalid_0's rmse: 0.0255194\n", + "[1659]\tvalid_0's rmse: 0.0255194\n", + "[1660]\tvalid_0's rmse: 0.0255194\n", + "[1661]\tvalid_0's rmse: 0.0255189\n", + "[1662]\tvalid_0's rmse: 0.0255192\n", + "[1663]\tvalid_0's rmse: 0.0255183\n", + "[1664]\tvalid_0's rmse: 0.0255186\n", + "[1665]\tvalid_0's rmse: 0.0255179\n", + "[1666]\tvalid_0's rmse: 0.0255182\n", + "[1667]\tvalid_0's rmse: 0.0255178\n", + "[1668]\tvalid_0's rmse: 0.0255175\n", + "[1669]\tvalid_0's rmse: 0.0255181\n", + "[1670]\tvalid_0's rmse: 0.0255179\n", + "[1671]\tvalid_0's rmse: 0.025517\n", + "[1672]\tvalid_0's rmse: 0.0255169\n", + "[1673]\tvalid_0's rmse: 0.0255012\n", + "[1674]\tvalid_0's rmse: 0.0255018\n", + "[1675]\tvalid_0's rmse: 0.0255017\n", + "[1676]\tvalid_0's rmse: 0.0255032\n", + "[1677]\tvalid_0's rmse: 0.0255028\n", + "[1678]\tvalid_0's rmse: 0.0255035\n", + "[1679]\tvalid_0's rmse: 0.0255038\n", + "[1680]\tvalid_0's rmse: 0.0255043\n", + "[1681]\tvalid_0's rmse: 0.0255043\n", + "[1682]\tvalid_0's rmse: 0.0255052\n", + "[1683]\tvalid_0's rmse: 0.0255043\n", + "[1684]\tvalid_0's rmse: 0.0255045\n", + "[1685]\tvalid_0's rmse: 0.0255044\n", + "[1686]\tvalid_0's rmse: 0.0255039\n", + "[1687]\tvalid_0's rmse: 0.0255027\n", + "[1688]\tvalid_0's rmse: 0.0255026\n", + "[1689]\tvalid_0's rmse: 0.0255028\n", + "[1690]\tvalid_0's rmse: 0.0255036\n", + "[1691]\tvalid_0's rmse: 0.0255024\n", + "[1692]\tvalid_0's rmse: 0.0255021\n", + "[1693]\tvalid_0's rmse: 0.0255018\n", + "[1694]\tvalid_0's rmse: 0.0255018\n", + "[1695]\tvalid_0's rmse: 0.0255012\n", + "[1696]\tvalid_0's rmse: 0.0255006\n", + "[1697]\tvalid_0's rmse: 0.0255006\n", + "[1698]\tvalid_0's rmse: 0.0255005\n", + "[1699]\tvalid_0's rmse: 0.0254974\n", + "[1700]\tvalid_0's rmse: 0.0254964\n", + "[1701]\tvalid_0's rmse: 0.0254971\n", + "[1702]\tvalid_0's rmse: 0.0254974\n", + "[1703]\tvalid_0's rmse: 0.0254974\n", + "[1704]\tvalid_0's rmse: 0.0254945\n", + "[1705]\tvalid_0's rmse: 0.0254948\n", + "[1706]\tvalid_0's rmse: 0.0254947\n", + "[1707]\tvalid_0's rmse: 0.025495\n", + "[1708]\tvalid_0's rmse: 0.0254952\n", + "[1709]\tvalid_0's rmse: 0.025495\n", + "[1710]\tvalid_0's rmse: 0.0254946\n", + "[1711]\tvalid_0's rmse: 0.0254946\n", + "[1712]\tvalid_0's rmse: 0.0254923\n", + "[1713]\tvalid_0's rmse: 0.0254919\n", + "[1714]\tvalid_0's rmse: 0.0254932\n", + "[1715]\tvalid_0's rmse: 0.025493\n", + "[1716]\tvalid_0's rmse: 0.0254935\n", + "[1717]\tvalid_0's rmse: 0.025492\n", + "[1718]\tvalid_0's rmse: 0.0254914\n", + "[1719]\tvalid_0's rmse: 0.0254918\n", + "[1720]\tvalid_0's rmse: 0.0254917\n", + "[1721]\tvalid_0's rmse: 0.0254922\n", + "[1722]\tvalid_0's rmse: 0.0254925\n", + "[1723]\tvalid_0's rmse: 0.0254928\n", + "[1724]\tvalid_0's rmse: 0.0254932\n", + "[1725]\tvalid_0's rmse: 0.0254931\n", + "[1726]\tvalid_0's rmse: 0.0254933\n", + "[1727]\tvalid_0's rmse: 0.0254931\n", + "[1728]\tvalid_0's rmse: 0.0254962\n", + "[1729]\tvalid_0's rmse: 0.0254961\n", + "[1730]\tvalid_0's rmse: 0.0254956\n", + "[1731]\tvalid_0's rmse: 0.025495\n", + "[1732]\tvalid_0's rmse: 0.0254947\n", + "[1733]\tvalid_0's rmse: 0.0254938\n", + "[1734]\tvalid_0's rmse: 0.0254942\n", + "[1735]\tvalid_0's rmse: 0.0254946\n", + "[1736]\tvalid_0's rmse: 0.0254936\n", + "[1737]\tvalid_0's rmse: 0.0254922\n", + "[1738]\tvalid_0's rmse: 0.0254917\n", + "[1739]\tvalid_0's rmse: 0.025492\n", + "[1740]\tvalid_0's rmse: 0.025492\n", + "[1741]\tvalid_0's rmse: 0.0254923\n", + "[1742]\tvalid_0's rmse: 0.0254932\n", + "[1743]\tvalid_0's rmse: 0.0254933\n", + "[1744]\tvalid_0's rmse: 0.0254935\n", + "[1745]\tvalid_0's rmse: 0.0254933\n", + "[1746]\tvalid_0's rmse: 0.0254937\n", + "[1747]\tvalid_0's rmse: 0.0254928\n", + "[1748]\tvalid_0's rmse: 0.0254926\n", + "[1749]\tvalid_0's rmse: 0.0254945\n", + "[1750]\tvalid_0's rmse: 0.0254948\n", + "[1751]\tvalid_0's rmse: 0.025495\n", + "[1752]\tvalid_0's rmse: 0.025487\n", + "[1753]\tvalid_0's rmse: 0.0254868\n", + "[1754]\tvalid_0's rmse: 0.025486\n", + "[1755]\tvalid_0's rmse: 0.0254842\n", + "[1756]\tvalid_0's rmse: 0.0254837\n", + "[1757]\tvalid_0's rmse: 0.025483\n", + "[1758]\tvalid_0's rmse: 0.0254827\n", + "[1759]\tvalid_0's rmse: 0.0254805\n", + "[1760]\tvalid_0's rmse: 0.02548\n", + "[1761]\tvalid_0's rmse: 0.0254799\n", + "[1762]\tvalid_0's rmse: 0.0254799\n", + "[1763]\tvalid_0's rmse: 0.0254794\n", + "[1764]\tvalid_0's rmse: 0.0254783\n", + "[1765]\tvalid_0's rmse: 0.0254772\n", + "[1766]\tvalid_0's rmse: 0.0254773\n", + "[1767]\tvalid_0's rmse: 0.0254773\n", + "[1768]\tvalid_0's rmse: 0.0254767\n", + "[1769]\tvalid_0's rmse: 0.0254775\n", + "[1770]\tvalid_0's rmse: 0.0254774\n", + "[1771]\tvalid_0's rmse: 0.0254775\n", + "[1772]\tvalid_0's rmse: 0.0254769\n", + "[1773]\tvalid_0's rmse: 0.025477\n", + "[1774]\tvalid_0's rmse: 0.0254779\n", + "[1775]\tvalid_0's rmse: 0.025477\n", + "[1776]\tvalid_0's rmse: 0.0254767\n", + "[1777]\tvalid_0's rmse: 0.025474\n", + "[1778]\tvalid_0's rmse: 0.0254756\n", + "[1779]\tvalid_0's rmse: 0.0254761\n", + "[1780]\tvalid_0's rmse: 0.025476\n", + "[1781]\tvalid_0's rmse: 0.0254763\n", + "[1782]\tvalid_0's rmse: 0.0254763\n", + "[1783]\tvalid_0's rmse: 0.0254762\n", + "[1784]\tvalid_0's rmse: 0.0254749\n", + "[1785]\tvalid_0's rmse: 0.025473\n", + "[1786]\tvalid_0's rmse: 0.0254723\n", + "[1787]\tvalid_0's rmse: 0.0254712\n", + "[1788]\tvalid_0's rmse: 0.0254711\n", + "[1789]\tvalid_0's rmse: 0.0254718\n", + "[1790]\tvalid_0's rmse: 0.0254716\n", + "[1791]\tvalid_0's rmse: 0.0254721\n", + "[1792]\tvalid_0's rmse: 0.0254709\n", + "[1793]\tvalid_0's rmse: 0.0254738\n", + "[1794]\tvalid_0's rmse: 0.0254739\n", + "[1795]\tvalid_0's rmse: 0.025474\n", + "[1796]\tvalid_0's rmse: 0.0254719\n", + "[1797]\tvalid_0's rmse: 0.0254719\n", + "[1798]\tvalid_0's rmse: 0.0254734\n", + "[1799]\tvalid_0's rmse: 0.0254738\n", + "[1800]\tvalid_0's rmse: 0.0254739\n", + "[1801]\tvalid_0's rmse: 0.0254722\n", + "[1802]\tvalid_0's rmse: 0.0254725\n", + "[1803]\tvalid_0's rmse: 0.0254716\n", + "[1804]\tvalid_0's rmse: 0.0254717\n", + "[1805]\tvalid_0's rmse: 0.0254718\n", + "[1806]\tvalid_0's rmse: 0.025471\n", + "[1807]\tvalid_0's rmse: 0.0254714\n", + "[1808]\tvalid_0's rmse: 0.0254714\n", + "[1809]\tvalid_0's rmse: 0.0254713\n", + "[1810]\tvalid_0's rmse: 0.0254711\n", + "[1811]\tvalid_0's rmse: 0.0254716\n", + "[1812]\tvalid_0's rmse: 0.025472\n", + "[1813]\tvalid_0's rmse: 0.0254719\n", + "[1814]\tvalid_0's rmse: 0.0254712\n", + "[1815]\tvalid_0's rmse: 0.0254712\n", + "[1816]\tvalid_0's rmse: 0.0254708\n", + "[1817]\tvalid_0's rmse: 0.0254711\n", + "[1818]\tvalid_0's rmse: 0.0254701\n", + "[1819]\tvalid_0's rmse: 0.0254683\n", + "[1820]\tvalid_0's rmse: 0.0254685\n", + "[1821]\tvalid_0's rmse: 0.0254685\n", + "[1822]\tvalid_0's rmse: 0.0254687\n", + "[1823]\tvalid_0's rmse: 0.0254688\n", + "[1824]\tvalid_0's rmse: 0.0254686\n", + "[1825]\tvalid_0's rmse: 0.0254686\n", + "[1826]\tvalid_0's rmse: 0.0254685\n", + "[1827]\tvalid_0's rmse: 0.0254681\n", + "[1828]\tvalid_0's rmse: 0.0254681\n", + "[1829]\tvalid_0's rmse: 0.025468\n", + "[1830]\tvalid_0's rmse: 0.0254683\n", + "[1831]\tvalid_0's rmse: 0.025464\n", + "[1832]\tvalid_0's rmse: 0.0254641\n", + "[1833]\tvalid_0's rmse: 0.0254636\n", + "[1834]\tvalid_0's rmse: 0.0254633\n", + "[1835]\tvalid_0's rmse: 0.0254625\n", + "[1836]\tvalid_0's rmse: 0.0254622\n", + "[1837]\tvalid_0's rmse: 0.0254617\n", + "[1838]\tvalid_0's rmse: 0.0254617\n", + "[1839]\tvalid_0's rmse: 0.0254609\n", + "[1840]\tvalid_0's rmse: 0.025452\n", + "[1841]\tvalid_0's rmse: 0.0254516\n", + "[1842]\tvalid_0's rmse: 0.0254517\n", + "[1843]\tvalid_0's rmse: 0.0254523\n", + "[1844]\tvalid_0's rmse: 0.0254516\n", + "[1845]\tvalid_0's rmse: 0.0254519\n", + "[1846]\tvalid_0's rmse: 0.0254519\n", + "[1847]\tvalid_0's rmse: 0.0254506\n", + "[1848]\tvalid_0's rmse: 0.0254508\n", + "[1849]\tvalid_0's rmse: 0.0254503\n", + "[1850]\tvalid_0's rmse: 0.0254484\n", + "[1851]\tvalid_0's rmse: 0.0254485\n", + "[1852]\tvalid_0's rmse: 0.0254486\n", + "[1853]\tvalid_0's rmse: 0.0254492\n", + "[1854]\tvalid_0's rmse: 0.0254493\n", + "[1855]\tvalid_0's rmse: 0.0254488\n", + "[1856]\tvalid_0's rmse: 0.0254492\n", + "[1857]\tvalid_0's rmse: 0.0254538\n", + "[1858]\tvalid_0's rmse: 0.0254541\n", + "[1859]\tvalid_0's rmse: 0.0254591\n", + "[1860]\tvalid_0's rmse: 0.0254593\n", + "[1861]\tvalid_0's rmse: 0.0254593\n", + "[1862]\tvalid_0's rmse: 0.0254589\n", + "[1863]\tvalid_0's rmse: 0.0254589\n", + "[1864]\tvalid_0's rmse: 0.0254596\n", + "[1865]\tvalid_0's rmse: 0.0254593\n", + "[1866]\tvalid_0's rmse: 0.02546\n", + "[1867]\tvalid_0's rmse: 0.0254596\n", + "[1868]\tvalid_0's rmse: 0.0254609\n", + "[1869]\tvalid_0's rmse: 0.0254586\n", + "[1870]\tvalid_0's rmse: 0.0254583\n", + "[1871]\tvalid_0's rmse: 0.0254584\n", + "[1872]\tvalid_0's rmse: 0.0254582\n", + "[1873]\tvalid_0's rmse: 0.025458\n", + "[1874]\tvalid_0's rmse: 0.0254559\n", + "[1875]\tvalid_0's rmse: 0.0254556\n", + "[1876]\tvalid_0's rmse: 0.0254552\n", + "[1877]\tvalid_0's rmse: 0.0254551\n", + "[1878]\tvalid_0's rmse: 0.0254557\n", + "[1879]\tvalid_0's rmse: 0.0254539\n", + "[1880]\tvalid_0's rmse: 0.0254533\n", + "[1881]\tvalid_0's rmse: 0.0254524\n", + "[1882]\tvalid_0's rmse: 0.0254525\n", + "[1883]\tvalid_0's rmse: 0.0254542\n", + "[1884]\tvalid_0's rmse: 0.0254548\n", + "[1885]\tvalid_0's rmse: 0.0254539\n", + "[1886]\tvalid_0's rmse: 0.0254536\n", + "[1887]\tvalid_0's rmse: 0.0254537\n", + "[1888]\tvalid_0's rmse: 0.0254532\n", + "[1889]\tvalid_0's rmse: 0.0254555\n", + "[1890]\tvalid_0's rmse: 0.0254548\n", + "[1891]\tvalid_0's rmse: 0.0254549\n", + "[1892]\tvalid_0's rmse: 0.0254548\n", + "[1893]\tvalid_0's rmse: 0.0254545\n", + "[1894]\tvalid_0's rmse: 0.0254543\n", + "[1895]\tvalid_0's rmse: 0.0254553\n", + "[1896]\tvalid_0's rmse: 0.0254551\n", + "[1897]\tvalid_0's rmse: 0.0254553\n", + "[1898]\tvalid_0's rmse: 0.0254557\n", + "[1899]\tvalid_0's rmse: 0.0254553\n", + "[1900]\tvalid_0's rmse: 0.0254554\n", + "[1901]\tvalid_0's rmse: 0.025455\n", + "[1902]\tvalid_0's rmse: 0.0254548\n", + "[1903]\tvalid_0's rmse: 0.0254559\n", + "[1904]\tvalid_0's rmse: 0.025455\n", + "[1905]\tvalid_0's rmse: 0.0254548\n", + "[1906]\tvalid_0's rmse: 0.0254548\n", + "[1907]\tvalid_0's rmse: 0.025454\n", + "[1908]\tvalid_0's rmse: 0.0254535\n", + "[1909]\tvalid_0's rmse: 0.0254534\n", + "[1910]\tvalid_0's rmse: 0.0254536\n", + "[1911]\tvalid_0's rmse: 0.0254536\n", + "[1912]\tvalid_0's rmse: 0.0254531\n", + "[1913]\tvalid_0's rmse: 0.0254532\n", + "[1914]\tvalid_0's rmse: 0.0254535\n", + "[1915]\tvalid_0's rmse: 0.0254525\n", + "[1916]\tvalid_0's rmse: 0.025452\n", + "[1917]\tvalid_0's rmse: 0.0254519\n", + "[1918]\tvalid_0's rmse: 0.0254518\n", + "[1919]\tvalid_0's rmse: 0.0254515\n", + "[1920]\tvalid_0's rmse: 0.0254513\n", + "[1921]\tvalid_0's rmse: 0.0254524\n", + "[1922]\tvalid_0's rmse: 0.0254529\n", + "[1923]\tvalid_0's rmse: 0.0254551\n", + "[1924]\tvalid_0's rmse: 0.0254534\n", + "[1925]\tvalid_0's rmse: 0.0254535\n", + "[1926]\tvalid_0's rmse: 0.0254536\n", + "[1927]\tvalid_0's rmse: 0.0254536\n", + "[1928]\tvalid_0's rmse: 0.0254538\n", + "[1929]\tvalid_0's rmse: 0.0254538\n", + "[1930]\tvalid_0's rmse: 0.0254529\n", + "[1931]\tvalid_0's rmse: 0.0254529\n", + "[1932]\tvalid_0's rmse: 0.0254527\n", + "[1933]\tvalid_0's rmse: 0.0254525\n", + "[1934]\tvalid_0's rmse: 0.0254524\n", + "[1935]\tvalid_0's rmse: 0.0254518\n", + "[1936]\tvalid_0's rmse: 0.0254518\n", + "[1937]\tvalid_0's rmse: 0.0254518\n", + "[1938]\tvalid_0's rmse: 0.0254512\n", + "[1939]\tvalid_0's rmse: 0.0254511\n", + "[1940]\tvalid_0's rmse: 0.0254517\n", + "[1941]\tvalid_0's rmse: 0.0254514\n", + "[1942]\tvalid_0's rmse: 0.0254517\n", + "[1943]\tvalid_0's rmse: 0.0254503\n", + "[1944]\tvalid_0's rmse: 0.0254474\n", + "[1945]\tvalid_0's rmse: 0.0254471\n", + "[1946]\tvalid_0's rmse: 0.0254472\n", + "[1947]\tvalid_0's rmse: 0.0254473\n", + "[1948]\tvalid_0's rmse: 0.0254469\n", + "[1949]\tvalid_0's rmse: 0.0254462\n", + "[1950]\tvalid_0's rmse: 0.0254464\n", + "[1951]\tvalid_0's rmse: 0.025446\n", + "[1952]\tvalid_0's rmse: 0.025446\n", + "[1953]\tvalid_0's rmse: 0.0254422\n", + "[1954]\tvalid_0's rmse: 0.0254356\n", + "[1955]\tvalid_0's rmse: 0.0254358\n", + "[1956]\tvalid_0's rmse: 0.0254357\n", + "[1957]\tvalid_0's rmse: 0.0254344\n", + "[1958]\tvalid_0's rmse: 0.0254348\n", + "[1959]\tvalid_0's rmse: 0.0254348\n", + "[1960]\tvalid_0's rmse: 0.0254347\n", + "[1961]\tvalid_0's rmse: 0.0254346\n", + "[1962]\tvalid_0's rmse: 0.0254346\n", + "[1963]\tvalid_0's rmse: 0.0254344\n", + "[1964]\tvalid_0's rmse: 0.0254341\n", + "[1965]\tvalid_0's rmse: 0.0254337\n", + "[1966]\tvalid_0's rmse: 0.0254337\n", + "[1967]\tvalid_0's rmse: 0.0254335\n", + "[1968]\tvalid_0's rmse: 0.0254336\n", + "[1969]\tvalid_0's rmse: 0.0254336\n", + "[1970]\tvalid_0's rmse: 0.0254333\n", + "[1971]\tvalid_0's rmse: 0.0254335\n", + "[1972]\tvalid_0's rmse: 0.0254333\n", + "[1973]\tvalid_0's rmse: 0.0254328\n", + "[1974]\tvalid_0's rmse: 0.0254329\n", + "[1975]\tvalid_0's rmse: 0.0254329\n", + "[1976]\tvalid_0's rmse: 0.0254334\n", + "[1977]\tvalid_0's rmse: 0.0254333\n", + "[1978]\tvalid_0's rmse: 0.0254336\n", + "[1979]\tvalid_0's rmse: 0.0254342\n", + "[1980]\tvalid_0's rmse: 0.0254343\n", + "[1981]\tvalid_0's rmse: 0.0254338\n", + "[1982]\tvalid_0's rmse: 0.0254341\n", + "[1983]\tvalid_0's rmse: 0.0254341\n", + "[1984]\tvalid_0's rmse: 0.0254343\n", + "[1985]\tvalid_0's rmse: 0.0254342\n", + "[1986]\tvalid_0's rmse: 0.0254341\n", + "[1987]\tvalid_0's rmse: 0.0254347\n", + "[1988]\tvalid_0's rmse: 0.025435\n", + "[1989]\tvalid_0's rmse: 0.0254349\n", + "[1990]\tvalid_0's rmse: 0.0254338\n", + "[1991]\tvalid_0's rmse: 0.0254339\n", + "[1992]\tvalid_0's rmse: 0.0254342\n", + "[1993]\tvalid_0's rmse: 0.0254341\n", + "[1994]\tvalid_0's rmse: 0.0254341\n", + "[1995]\tvalid_0's rmse: 0.0254339\n", + "[1996]\tvalid_0's rmse: 0.0254349\n", + "[1997]\tvalid_0's rmse: 0.025434\n", + "[1998]\tvalid_0's rmse: 0.0254327\n", + "[1999]\tvalid_0's rmse: 0.0254326\n", + "[2000]\tvalid_0's rmse: 0.025432\n", + "Did not meet early stopping. Best iteration is:\n", + "[2000]\tvalid_0's rmse: 0.025432\n" + ] + } + ], + "source": [ + "gbm = lgb.train(params_gbm, lgb_train, num_boost_round=2000, valid_sets=lgb_eval, early_stopping_rounds=100)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "y_pred = gbm.predict(X_test)\n", + "y_true = Y_test.values" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE: 3.7E-04\n", + "RMSE: 0.019\n", + "MAE: 0.013\n", + "MAPE: 2.64 %\n", + "R_2: 0.93\n" + ] + } + ], + "source": [ + "MSE = mean_squared_error(y_true, y_pred)\n", + "RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n", + "MAE = mean_absolute_error(y_true, y_pred)\n", + "MAPE = mean_absolute_percentage_error(y_true, y_pred)\n", + "R_2 = r2_score(y_true, y_pred)\n", + "print('MSE:', format(MSE, '.1E'))\n", + "print('RMSE:', round(RMSE, 3))\n", + "print('MAE:', round(MAE, 3))\n", + "print('MAPE:', round(MAPE*100, 2), '%')\n", + "print('R_2:', round(R_2, 3)) #R方为负就说明拟合效果比平均值差a" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 25, + "outputs": [], + "source": [ + "dtrain = xgb.DMatrix(X_train, Y_train)\n", + "dvalid = xgb.DMatrix(X_valid, Y_valid)\n", + "dtest = xgb.DMatrix(X_test, Y_test)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 102, + "outputs": [], + "source": [ + "from sklearn.model_selection import cross_val_score\n", + "from xgboost import XGBRegressor\n", + "from bayes_opt import BayesianOptimization" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 103, + "outputs": [], + "source": [ + "def xgb_cv(max_depth, learning_rate, n_estimators, min_child_weight, subsample, colsample_bytree, reg_alpha, gamma):\n", + " val = cross_val_score(estimator=XGBRegressor(max_depth=int(max_depth),\n", + " learning_rate=learning_rate,\n", + " n_estimators=int(n_estimators),\n", + " min_child_weight=min_child_weight,\n", + " subsample=max(min(subsample, 1), 0),\n", + " colsample_bytree=max(min(colsample_bytree, 1), 0),\n", + " reg_alpha=max(reg_alpha, 0), gamma=gamma, objective='reg:squarederror',\n", + " booster='gbtree',\n", + " seed=666), X=use_data[feature_cols], y=use_data.values[:1], scoring='r2',\n", + " cv=10).max()\n", + " return val" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 104, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "| iter | target | colsam... | gamma | learni... | max_depth | min_ch... | n_esti... | reg_alpha | subsample |\n", + "-------------------------------------------------------------------------------------------------------------------------\n" + ] + }, + { + "ename": "ValueError", + "evalue": "Found input variables with inconsistent numbers of samples: [3080, 1]", + "output_type": "error", + "traceback": [ + "\u001B[1;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[1;31mValueError\u001B[0m Traceback (most recent call last)", + "\u001B[1;32m~\\AppData\\Local\\Temp\\ipykernel_17148\\1576227182.py\u001B[0m in \u001B[0;36m\u001B[1;34m\u001B[0m\n\u001B[0;32m 7\u001B[0m \u001B[1;34m'reg_alpha'\u001B[0m\u001B[1;33m:\u001B[0m \u001B[1;33m(\u001B[0m\u001B[1;36m0.001\u001B[0m\u001B[1;33m,\u001B[0m \u001B[1;36m10\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m,\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 8\u001B[0m 'gamma': (0.001, 10)})\n\u001B[1;32m----> 9\u001B[1;33m \u001B[0mxgb_bo\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mmaximize\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mn_iter\u001B[0m\u001B[1;33m=\u001B[0m\u001B[1;36m100\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0minit_points\u001B[0m\u001B[1;33m=\u001B[0m\u001B[1;36m10\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m", + "\u001B[1;32mD:\\miniconda3\\envs\\py37\\lib\\site-packages\\bayes_opt\\bayesian_optimization.py\u001B[0m in \u001B[0;36mmaximize\u001B[1;34m(self, init_points, n_iter, acquisition_function, acq, kappa, kappa_decay, kappa_decay_delay, xi, **gp_params)\u001B[0m\n\u001B[0;32m 309\u001B[0m \u001B[0mx_probe\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0msuggest\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mutil\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 310\u001B[0m \u001B[0miteration\u001B[0m \u001B[1;33m+=\u001B[0m \u001B[1;36m1\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 311\u001B[1;33m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mprobe\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mx_probe\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mlazy\u001B[0m\u001B[1;33m=\u001B[0m\u001B[1;32mFalse\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 312\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 313\u001B[0m \u001B[1;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_bounds_transformer\u001B[0m \u001B[1;32mand\u001B[0m \u001B[0miteration\u001B[0m \u001B[1;33m>\u001B[0m \u001B[1;36m0\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32mD:\\miniconda3\\envs\\py37\\lib\\site-packages\\bayes_opt\\bayesian_optimization.py\u001B[0m in \u001B[0;36mprobe\u001B[1;34m(self, params, lazy)\u001B[0m\n\u001B[0;32m 206\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_queue\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0madd\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mparams\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 207\u001B[0m \u001B[1;32melse\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 208\u001B[1;33m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_space\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mprobe\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mparams\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 209\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mdispatch\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mEvents\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mOPTIMIZATION_STEP\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 210\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32mD:\\miniconda3\\envs\\py37\\lib\\site-packages\\bayes_opt\\target_space.py\u001B[0m in \u001B[0;36mprobe\u001B[1;34m(self, params)\u001B[0m\n\u001B[0;32m 234\u001B[0m \u001B[0mx\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_as_array\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mparams\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 235\u001B[0m \u001B[0mparams\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mdict\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mzip\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_keys\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mx\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 236\u001B[1;33m \u001B[0mtarget\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0mtarget_func\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m**\u001B[0m\u001B[0mparams\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 237\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 238\u001B[0m \u001B[1;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[1;33m.\u001B[0m\u001B[0m_constraint\u001B[0m \u001B[1;32mis\u001B[0m \u001B[1;32mNone\u001B[0m\u001B[1;33m:\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32m~\\AppData\\Local\\Temp\\ipykernel_17148\\2288155185.py\u001B[0m in \u001B[0;36mxgb_cv\u001B[1;34m(max_depth, learning_rate, n_estimators, min_child_weight, subsample, colsample_bytree, reg_alpha, gamma)\u001B[0m\n\u001B[0;32m 9\u001B[0m \u001B[0mbooster\u001B[0m\u001B[1;33m=\u001B[0m\u001B[1;34m'gbtree'\u001B[0m\u001B[1;33m,\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 10\u001B[0m seed=666), X=use_data[feature_cols], y=use_data.values[:1], scoring='r2',\n\u001B[1;32m---> 11\u001B[1;33m cv=10).max()\n\u001B[0m\u001B[0;32m 12\u001B[0m \u001B[1;32mreturn\u001B[0m \u001B[0mval\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32mD:\\miniconda3\\envs\\py37\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\u001B[0m in \u001B[0;36mcross_val_score\u001B[1;34m(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)\u001B[0m\n\u001B[0;32m 518\u001B[0m \u001B[0mfit_params\u001B[0m\u001B[1;33m=\u001B[0m\u001B[0mfit_params\u001B[0m\u001B[1;33m,\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 519\u001B[0m \u001B[0mpre_dispatch\u001B[0m\u001B[1;33m=\u001B[0m\u001B[0mpre_dispatch\u001B[0m\u001B[1;33m,\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 520\u001B[1;33m \u001B[0merror_score\u001B[0m\u001B[1;33m=\u001B[0m\u001B[0merror_score\u001B[0m\u001B[1;33m,\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 521\u001B[0m )\n\u001B[0;32m 522\u001B[0m \u001B[1;32mreturn\u001B[0m \u001B[0mcv_results\u001B[0m\u001B[1;33m[\u001B[0m\u001B[1;34m\"test_score\"\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32mD:\\miniconda3\\envs\\py37\\lib\\site-packages\\sklearn\\model_selection\\_validation.py\u001B[0m in \u001B[0;36mcross_validate\u001B[1;34m(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)\u001B[0m\n\u001B[0;32m 251\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 252\u001B[0m \"\"\"\n\u001B[1;32m--> 253\u001B[1;33m \u001B[0mX\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0my\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mgroups\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mindexable\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mX\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0my\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mgroups\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 254\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 255\u001B[0m \u001B[0mcv\u001B[0m \u001B[1;33m=\u001B[0m \u001B[0mcheck_cv\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mcv\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0my\u001B[0m\u001B[1;33m,\u001B[0m \u001B[0mclassifier\u001B[0m\u001B[1;33m=\u001B[0m\u001B[0mis_classifier\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mestimator\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32mD:\\miniconda3\\envs\\py37\\lib\\site-packages\\sklearn\\utils\\validation.py\u001B[0m in \u001B[0;36mindexable\u001B[1;34m(*iterables)\u001B[0m\n\u001B[0;32m 376\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 377\u001B[0m \u001B[0mresult\u001B[0m \u001B[1;33m=\u001B[0m \u001B[1;33m[\u001B[0m\u001B[0m_make_indexable\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0mX\u001B[0m\u001B[1;33m)\u001B[0m \u001B[1;32mfor\u001B[0m \u001B[0mX\u001B[0m \u001B[1;32min\u001B[0m \u001B[0miterables\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 378\u001B[1;33m \u001B[0mcheck_consistent_length\u001B[0m\u001B[1;33m(\u001B[0m\u001B[1;33m*\u001B[0m\u001B[0mresult\u001B[0m\u001B[1;33m)\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 379\u001B[0m \u001B[1;32mreturn\u001B[0m \u001B[0mresult\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0;32m 380\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;32mD:\\miniconda3\\envs\\py37\\lib\\site-packages\\sklearn\\utils\\validation.py\u001B[0m in \u001B[0;36mcheck_consistent_length\u001B[1;34m(*arrays)\u001B[0m\n\u001B[0;32m 332\u001B[0m raise ValueError(\n\u001B[0;32m 333\u001B[0m \u001B[1;34m\"Found input variables with inconsistent numbers of samples: %r\"\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[1;32m--> 334\u001B[1;33m \u001B[1;33m%\u001B[0m \u001B[1;33m[\u001B[0m\u001B[0mint\u001B[0m\u001B[1;33m(\u001B[0m\u001B[0ml\u001B[0m\u001B[1;33m)\u001B[0m \u001B[1;32mfor\u001B[0m \u001B[0ml\u001B[0m \u001B[1;32min\u001B[0m \u001B[0mlengths\u001B[0m\u001B[1;33m]\u001B[0m\u001B[1;33m\u001B[0m\u001B[1;33m\u001B[0m\u001B[0m\n\u001B[0m\u001B[0;32m 335\u001B[0m )\n\u001B[0;32m 336\u001B[0m \u001B[1;33m\u001B[0m\u001B[0m\n", + "\u001B[1;31mValueError\u001B[0m: Found input variables with inconsistent numbers of samples: [3080, 1]" + ] + } + ], + "source": [ + "xgb_bo = BayesianOptimization(xgb_cv, pbounds={'max_depth': (20, 60),\n", + " 'learning_rate': (0.005, 0.1),\n", + " 'n_estimators': (100, 2000),\n", + " 'min_child_weight': (0, 30),\n", + " 'subsample': (0.05, 1),\n", + " 'colsample_bytree': (0.1, 1),\n", + " 'reg_alpha': (0.001, 10),\n", + " 'gamma': (0.001, 10)})\n", + "xgb_bo.maximize(n_iter=100, init_points=10)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 105, + "outputs": [], + "source": [ + "params_xgb = {'objective': 'reg:squarederror',\n", + " 'booster': 'gbtree',\n", + " 'eta': 0.037,\n", + " 'max_depth': 30,\n", + " 'subsample': 1.0,\n", + " 'colsample_bytree': 0.47,\n", + " 'min_child_weight': 30,\n", + " 'seed': 42}\n", + "num_boost_round = 2000\n", + "\n", + "dtrain = xgb.DMatrix(X_train, Y_train)\n", + "dvalid = xgb.DMatrix(X_valid, Y_valid)\n", + "watchlist = [(dtrain, 'train'), (dvalid, 'eval')]\n", + "\n", + "gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", + " early_stopping_rounds=100, verbose_eval=False)\n" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 106, + "outputs": [], + "source": [ + "y_pred_xgb = np.expm1(gb_model.predict(xgb.DMatrix(X_test)))\n", + "y_true_xgb = np.expm1(Y_test.values)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 107, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE: 1.1E-05\n", + "RMSE: 0.003\n", + "MAE: 0.002\n", + "MAPE: 2.99 %\n", + "R_2: 0.88\n" + ] + } + ], + "source": [ + "MSE = mean_squared_error(y_true_xgb, y_pred_xgb)\n", + "RMSE = np.sqrt(mean_squared_error(y_true_xgb, y_pred_xgb))\n", + "MAE = mean_absolute_error(y_true_xgb, y_pred_xgb)\n", + "MAPE = mean_absolute_percentage_error(y_true_xgb, y_pred_xgb)\n", + "R_2 = r2_score(y_true_xgb, y_pred_xgb)\n", + "print('MSE:', format(MSE, '.1E'))\n", + "print('RMSE:', round(RMSE, 3))\n", + "print('MAE:', round(MAE, 3))\n", + "print('MAPE:', round(MAPE*100, 2), '%')\n", + "print('R_2:', round(R_2, 3)) #R方为负就说明拟合效果比平均值差a" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 108, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 109, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE: 1.8E-05, RMSE: 0.004, MAE: 0.002, MAPE: 3.47 %, R_2: 0.776\n", + "MSE: 1.8E-05, RMSE: 0.004, MAE: 0.002, MAPE: 3.19 %, R_2: 0.83\n", + "MSE: 1.8E-05, RMSE: 0.004, MAE: 0.002, MAPE: 3.87 %, R_2: 0.811\n", + "MSE: 1.2E-05, RMSE: 0.003, MAE: 0.002, MAPE: 2.96 %, R_2: 0.861\n", + "MSE: 1.9E-05, RMSE: 0.004, MAE: 0.002, MAPE: 3.65 %, R_2: 0.775\n", + "MSE: 1.9E-05, RMSE: 0.004, MAE: 0.002, MAPE: 3.56 %, R_2: 0.789\n", + "MSE: 2.3E-05, RMSE: 0.005, MAE: 0.002, MAPE: 3.05 %, R_2: 0.723\n", + "MSE: 2.5E-05, RMSE: 0.005, MAE: 0.002, MAPE: 3.94 %, R_2: 0.717\n", + "MSE: 1.0E-05, RMSE: 0.003, MAE: 0.002, MAPE: 2.9 %, R_2: 0.864\n", + "MSE: 9.4E-06, RMSE: 0.003, MAE: 0.002, MAPE: 2.89 %, R_2: 0.881\n" + ] + } + ], + "source": [ + "kf = KFold(n_splits=10, shuffle=True, random_state=42)\n", + "eva_list = list()\n", + "for (train_index, test_index) in kf.split(use_data):\n", + " train = use_data.loc[train_index]\n", + " test = use_data.loc[test_index]\n", + " train, valid = train_test_split(train, test_size=0.15, random_state=42)\n", + " X_train, Y_train = train[feature_cols], train[target_cols[1]]\n", + " X_valid, Y_valid = valid[feature_cols], valid[target_cols[1]]\n", + " X_test, Y_test = test[feature_cols], test[target_cols[1]]\n", + " dtrain = xgb.DMatrix(X_train, Y_train)\n", + " dvalid = xgb.DMatrix(X_valid, Y_valid)\n", + " watchlist = [(dvalid, 'eval')]\n", + " gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", + " early_stopping_rounds=100, verbose_eval=False)\n", + " y_pred = gb_model.predict(xgb.DMatrix(X_test))\n", + " y_true = Y_test.values\n", + " MSE = mean_squared_error(y_true, y_pred)\n", + " RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n", + " MAE = mean_absolute_error(y_true, y_pred)\n", + " MAPE = mean_absolute_percentage_error(y_true, y_pred)\n", + " R_2 = r2_score(y_true, y_pred)\n", + " print('MSE:', format(MSE, '.1E'), end=', ')\n", + " print('RMSE:', round(RMSE, 3), end=', ')\n", + " print('MAE:', round(MAE, 3), end=', ')\n", + " print('MAPE:', round(MAPE*100, 2), '%', end=', ')\n", + " print('R_2:', round(R_2, 3)) #R方为负就说明拟合效果比平均值差\n", + " eva_list.append([MSE, RMSE, MAE, MAPE, R_2])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 110, + "outputs": [], + "source": [ + "record = pd.DataFrame.from_records(eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R2'])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 120, + "outputs": [ + { + "data": { + "text/plain": " MSE RMSE MAE MAPE R2\n0 0.000018 0.004221 0.002394 0.034705 0.775560\n1 0.000018 0.004191 0.002405 0.031921 0.829931\n2 0.000018 0.004249 0.002235 0.038677 0.810649\n3 0.000012 0.003395 0.002090 0.029607 0.861337\n4 0.000019 0.004334 0.002302 0.036496 0.775066\n5 0.000019 0.004367 0.002260 0.035588 0.789063\n6 0.000023 0.004806 0.002272 0.030522 0.723082\n7 0.000025 0.004968 0.002401 0.039428 0.717094\n8 0.000010 0.003207 0.002037 0.029033 0.863679\n9 0.000009 0.003072 0.002008 0.028871 0.880821", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
MSERMSEMAEMAPER2
00.0000180.0042210.0023940.0347050.775560
10.0000180.0041910.0024050.0319210.829931
20.0000180.0042490.0022350.0386770.810649
30.0000120.0033950.0020900.0296070.861337
40.0000190.0043340.0023020.0364960.775066
50.0000190.0043670.0022600.0355880.789063
60.0000230.0048060.0022720.0305220.723082
70.0000250.0049680.0024010.0394280.717094
80.0000100.0032070.0020370.0290330.863679
90.0000090.0030720.0020080.0288710.880821
\n
" + }, + "execution_count": 120, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "record" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 124, + "outputs": [ + { + "data": { + "text/plain": " MSE RMSE MAE MAPE R2\n8 0.00001 0.003207 0.002037 0.029033 0.863679", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
MSERMSEMAEMAPER2
80.000010.0032070.0020370.0290330.863679
\n
" + }, + "execution_count": 124, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 126, + "outputs": [], + "source": [ + "index = [0, 1, 2, 3, 4, 5, 6, 8]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 128, + "outputs": [ + { + "data": { + "text/plain": "MSE 0.000017\nRMSE 0.004096\nMAE 0.002249\nMAPE 0.033319\nR2 0.803546\ndtype: float64" + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "record.loc[index].mean()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 63, + "outputs": [ + { + "data": { + "text/plain": "MSE 0.000552\nRMSE 0.022978\nMAE 0.014251\nMAPE 0.034105\nR2 0.896138\ndtype: float64" + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "record.mean()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 57, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "#新增加的两行\n", + "from pylab import mpl\n", + "# 设置显示中文字体\n", + "mpl.rcParams[\"font.sans-serif\"] = [\"SimHei\"]\n", + "\n", + "mpl.rcParams[\"axes.unicode_minus\"] = False" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 58, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(16, 10))\n", + "plt.plot(range(len(y_true)), y_true, 'o-', label='真实值')\n", + "plt.plot(range(len(y_pred)), y_pred, '*-', label='预测值')\n", + "plt.legend(loc='best')\n", + "plt.title('预测结果')\n", + "plt.savefig('./figure/CO2排放强度预测结果.png')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 59, + "outputs": [], + "source": [ + "pd.DataFrame.from_records([y_pred, y_true]).T.to_csv('pred.csv')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 60, + "outputs": [], + "source": [ + "rst = pd.DataFrame.from_records(([y_true_xgb, y_pred_xgb])).T\n", + "rst.columns = ['y_true', 'y_pred']" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 61, + "outputs": [], + "source": [ + "rst['mAP'] = abs(rst.y_pred - rst.y_true) / rst.y_true" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 62, + "outputs": [ + { + "data": { + "text/plain": " y_true y_pred mAP\n23 0.233161 0.228589 0.019609\n46 0.242031 0.260373 0.075782\n42 0.233845 0.215675 0.077700\n1 0.233773 0.237715 0.016864\n58 0.258407 0.259042 0.002460\n41 0.233404 0.246465 0.055956\n15 0.249245 0.248289 0.003837\n63 0.237670 0.284324 0.196296\n59 0.244008 0.242001 0.008228\n37 0.252681 0.251169 0.005983", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
y_truey_predmAP
230.2331610.2285890.019609
460.2420310.2603730.075782
420.2338450.2156750.077700
10.2337730.2377150.016864
580.2584070.2590420.002460
410.2334040.2464650.055956
150.2492450.2482890.003837
630.2376700.2843240.196296
590.2440080.2420010.008228
370.2526810.2511690.005983
\n
" + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rst.sort_values(by='mAP').sample(10)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 63, + "outputs": [ + { + "data": { + "text/plain": "
", + "image/png": "\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(16, 10))\n", + "plt.plot(range(len(y_true_xgb)), y_true_xgb, 'o-', label='真实值')\n", + "plt.plot(range(len(y_pred_xgb)), y_pred_xgb, '*-', label='预测值')\n", + "plt.legend(loc='best')\n", + "plt.title('预测结果')\n", + "plt.savefig('./figure/CO2排放强度预测结果.png')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "## 煤种标准化工程" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 73, + "outputs": [], + "source": [ + "new_values = total_data.groupby(['煤种', '入炉煤低位热值_new', '燃煤挥发份Var_new', '燃煤灰份Aar_new']).CO2_em_air.mean()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 74, + "outputs": [ + { + "data": { + "text/plain": " 煤种 入炉煤低位热值_new 燃煤挥发份Var_new 燃煤灰份Aar_new\n0 无烟煤 17050.00 6.51 31.330000\n1 无烟煤 18440.00 9.13 21.240189\n2 无烟煤 19335.65 7.06 21.400000\n3 无烟煤 20125.07 5.70 29.850000\n4 无烟煤 20463.30 5.70 29.790000\n.. ... ... ... ...\n622 贫煤 21772.91 10.66 26.320000\n623 贫煤 21907.00 10.64 28.100000\n624 贫煤 22042.72 12.96 25.690000\n625 贫煤 23215.00 11.00 19.310000\n626 贫煤 23791.00 11.00 19.310000\n\n[627 rows x 4 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
煤种入炉煤低位热值_new燃煤挥发份Var_new燃煤灰份Aar_new
0无烟煤17050.006.5131.330000
1无烟煤18440.009.1321.240189
2无烟煤19335.657.0621.400000
3无烟煤20125.075.7029.850000
4无烟煤20463.305.7029.790000
...............
622贫煤21772.9110.6626.320000
623贫煤21907.0010.6428.100000
624贫煤22042.7212.9625.690000
625贫煤23215.0011.0019.310000
626贫煤23791.0011.0019.310000
\n

627 rows × 4 columns

\n
" + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "coal_df = new_values.reset_index().drop(columns='CO2_em_air')\n", + "coal_df" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 75, + "outputs": [], + "source": [ + "coal_params_dict = dict()\n", + "for coal_type in coal_df['煤种'].unique().tolist():\n", + " options = coal_df[coal_df['煤种']==coal_type][['入炉煤低位热值_new', '燃煤挥发份Var_new', '燃煤灰份Aar_new']].values\n", + " coal_params_dict[coal_type] = options" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 76, + "outputs": [ + { + "data": { + "text/plain": "{'无烟煤': array([[1.70500000e+04, 6.51000000e+00, 3.13300000e+01],\n [1.84400000e+04, 9.13000000e+00, 2.12401894e+01],\n [1.93356500e+04, 7.06000000e+00, 2.14000000e+01],\n [2.01250700e+04, 5.70000000e+00, 2.98500000e+01],\n [2.04633000e+04, 5.70000000e+00, 2.97900000e+01]]),\n '烟煤': array([[1.277100e+04, 2.126000e+01, 3.355000e+01],\n [1.500000e+04, 2.346000e+01, 1.904000e+01],\n [1.610000e+04, 2.333000e+01, 1.873000e+01],\n ...,\n [2.348751e+04, 2.927000e+01, 2.097000e+01],\n [2.365000e+04, 2.887000e+01, 7.910000e+00],\n [2.365614e+04, 2.927000e+01, 2.097000e+01]]),\n '褐煤': array([[1.059800e+04, 2.476000e+01, 2.179000e+01],\n [1.129000e+04, 4.764000e+01, 3.079000e+01],\n [1.160400e+04, 4.758000e+01, 3.025000e+01],\n [1.172435e+04, 4.601000e+01, 3.673000e+01],\n [1.203000e+04, 4.726000e+01, 3.119000e+01],\n [1.213546e+04, 4.642000e+01, 1.113000e+01],\n [1.217290e+04, 4.642000e+01, 1.113000e+01],\n [1.219256e+04, 4.642000e+01, 1.113000e+01],\n [1.221131e+04, 4.642000e+01, 1.113000e+01],\n [1.230939e+04, 4.642000e+01, 1.113000e+01],\n [1.233780e+04, 4.642000e+01, 1.113000e+01],\n [1.267400e+04, 4.324000e+01, 1.237000e+01],\n [1.278700e+04, 4.884000e+01, 4.117000e+01],\n [1.295100e+04, 2.228000e+01, 1.287000e+01],\n [1.299880e+04, 2.256000e+01, 1.716000e+01],\n [1.311100e+04, 2.367000e+01, 2.107000e+01],\n [1.313000e+04, 2.417000e+01, 1.630000e+01],\n [1.318000e+04, 2.445000e+01, 1.794000e+01],\n [1.320830e+04, 2.451000e+01, 1.429000e+01],\n [1.325722e+04, 1.703000e+01, 3.660000e+01],\n [1.327000e+04, 3.204000e+01, 1.709000e+01],\n [1.327300e+04, 2.364000e+01, 1.622000e+01],\n [1.327300e+04, 2.458000e+01, 1.261000e+01],\n [1.332771e+04, 4.090000e+01, 2.507000e+01],\n [1.333064e+04, 1.680000e+01, 3.741000e+01],\n [1.335883e+04, 2.301000e+01, 1.841000e+01],\n [1.336864e+04, 2.301000e+01, 1.841000e+01],\n [1.343787e+04, 2.336000e+01, 1.705000e+01],\n [1.344000e+04, 4.782000e+01, 2.290000e+01],\n [1.345749e+04, 2.388000e+01, 1.652000e+01],\n [1.357000e+04, 1.799000e+01, 2.177000e+01],\n [1.364000e+04, 2.526000e+01, 2.108000e+01],\n [1.365410e+04, 2.232000e+01, 1.171000e+01],\n [1.369000e+04, 4.771000e+01, 2.205000e+01],\n [1.382000e+04, 2.420000e+01, 1.104000e+01],\n [1.389597e+04, 2.232000e+01, 1.171000e+01],\n [1.390000e+04, 3.683000e+01, 4.441000e+01],\n [1.395400e+04, 2.310000e+01, 1.011000e+01],\n [1.396000e+04, 4.665000e+01, 1.890000e+01],\n [1.400000e+04, 4.520000e+01, 1.364000e+01],\n [1.404100e+04, 2.346000e+01, 1.046000e+01],\n [1.410900e+04, 4.520000e+01, 1.364000e+01],\n [1.412200e+04, 2.478000e+01, 1.916000e+01],\n [1.419900e+04, 4.733000e+01, 1.697000e+01],\n [1.433937e+04, 2.476000e+01, 3.371000e+01],\n [1.440000e+04, 2.589000e+01, 1.643000e+01],\n [1.442729e+04, 4.474000e+01, 1.193000e+01],\n [1.446814e+04, 2.484000e+01, 3.331000e+01],\n [1.448810e+04, 3.554000e+01, 1.171000e+01],\n [1.458200e+04, 2.834000e+01, 2.320000e+01],\n [1.460000e+04, 2.714000e+01, 4.346000e+01],\n [1.462400e+04, 4.613000e+01, 2.700000e+01],\n [1.463500e+04, 4.613000e+01, 2.700000e+01],\n [1.464000e+04, 4.439000e+01, 1.684000e+01],\n [1.470100e+04, 2.210000e+01, 4.588000e+01],\n [1.481078e+04, 4.501000e+01, 1.325000e+01],\n [1.489878e+04, 2.386000e+01, 3.161000e+01],\n [1.507938e+04, 4.501000e+01, 1.325000e+01],\n [1.512117e+04, 2.355000e+01, 1.472000e+01],\n [1.517400e+04, 3.126000e+01, 1.696000e+01],\n [1.523800e+04, 2.492000e+01, 2.378000e+01],\n [1.524041e+04, 2.355000e+01, 1.472000e+01],\n [1.528927e+04, 2.345000e+01, 1.554000e+01],\n [1.534700e+04, 2.492000e+01, 2.378000e+01],\n [1.536708e+04, 4.501000e+01, 8.590000e+00],\n [1.540000e+04, 2.450000e+01, 2.085000e+01],\n [1.560165e+04, 2.345000e+01, 1.554000e+01],\n [1.562100e+04, 4.409000e+01, 1.019000e+01],\n [1.568455e+04, 1.865000e+01, 3.545000e+01],\n [1.599544e+04, 1.865000e+01, 3.545000e+01],\n [1.619823e+04, 2.032000e+01, 3.297000e+01],\n [1.619823e+04, 2.075000e+01, 3.310000e+01],\n [1.619951e+04, 1.790000e+01, 3.976000e+01],\n [1.620200e+04, 1.268000e+01, 4.012000e+01],\n [1.638000e+04, 2.264000e+01, 2.024000e+01],\n [1.644918e+04, 2.061000e+01, 3.224000e+01],\n [1.644918e+04, 2.087000e+01, 3.238000e+01],\n [1.660450e+04, 3.484000e+01, 9.590000e+00],\n [1.662400e+04, 1.287000e+01, 3.909000e+01],\n [1.667800e+04, 1.320000e+01, 3.884000e+01],\n [1.701000e+04, 2.721000e+01, 4.295000e+01],\n [1.711359e+04, 3.560000e+01, 9.440000e+00],\n [1.721702e+04, 3.266000e+01, 6.030000e+00],\n [1.732699e+04, 3.266000e+01, 6.030000e+00],\n [1.769205e+04, 3.632000e+01, 8.880000e+00],\n [1.783200e+04, 3.564000e+01, 2.418000e+01],\n [1.792600e+04, 3.563000e+01, 2.488000e+01],\n [1.802919e+04, 3.526000e+01, 7.680000e+00],\n [1.811583e+04, 3.348000e+01, 1.236000e+01],\n [1.815944e+04, 3.348000e+01, 1.236000e+01],\n [1.834900e+04, 3.542000e+01, 1.152000e+01],\n [1.862400e+04, 3.951000e+01, 1.937000e+01],\n [1.877383e+04, 2.676000e+01, 3.448000e+01],\n [1.877602e+04, 2.676000e+01, 3.448000e+01],\n [1.882100e+04, 2.678000e+01, 3.445000e+01],\n [1.884200e+04, 2.685000e+01, 3.451000e+01],\n [1.896000e+04, 3.951000e+01, 1.937000e+01],\n [1.903900e+04, 2.580000e+01, 2.420000e+01],\n [1.908760e+04, 3.426000e+01, 4.580000e+00],\n [1.918000e+04, 2.670000e+01, 2.480000e+01],\n [1.922827e+04, 3.426000e+01, 4.580000e+00],\n [1.924675e+04, 3.243000e+01, 7.700000e+00],\n [1.927600e+04, 3.200000e+01, 7.700000e+00],\n [1.959900e+04, 3.514000e+01, 1.065000e+01],\n [1.964010e+04, 3.446000e+01, 4.600000e+00],\n [1.965200e+04, 2.990000e+01, 2.406000e+01],\n [1.974233e+04, 3.422000e+01, 2.892000e+01],\n [1.976235e+04, 3.414000e+01, 2.934000e+01],\n [1.977612e+04, 3.446000e+01, 4.600000e+00],\n [1.993700e+04, 3.514000e+01, 1.065000e+01],\n [1.997000e+04, 3.533000e+01, 9.050000e+00],\n [2.003000e+04, 3.948000e+01, 3.080000e+01],\n [2.006000e+04, 3.911000e+01, 3.080000e+01],\n [2.011300e+04, 2.560000e+01, 2.312000e+01],\n [2.017338e+04, 2.979000e+01, 1.814000e+01],\n [2.025484e+04, 2.979000e+01, 1.814000e+01],\n [2.028500e+04, 3.009000e+01, 1.125000e+01],\n [2.057100e+04, 3.147000e+01, 2.478000e+01],\n [2.062600e+04, 2.627000e+01, 2.050000e+01],\n [2.066423e+04, 2.752000e+01, 2.014000e+01],\n [2.067360e+04, 2.840000e+01, 2.165000e+01],\n [2.068200e+04, 2.960000e+01, 1.603000e+01],\n [2.068600e+04, 3.124000e+01, 2.445000e+01],\n [2.070300e+04, 3.000000e+01, 1.125000e+01],\n [2.073600e+04, 2.627000e+01, 2.050000e+01],\n [2.075090e+04, 2.780000e+01, 2.254000e+01],\n [2.076000e+04, 2.977000e+01, 1.291000e+01],\n [2.078500e+04, 3.871000e+01, 1.575000e+01],\n [2.083648e+04, 2.780000e+01, 2.254000e+01],\n [2.089200e+04, 3.252000e+01, 9.680000e+00],\n [2.089200e+04, 3.255000e+01, 9.380000e+00],\n [2.089200e+04, 3.262000e+01, 1.026000e+01],\n [2.089200e+04, 3.324000e+01, 8.560000e+00],\n [2.090000e+04, 3.100000e+01, 1.981000e+01],\n [2.093990e+04, 2.840000e+01, 2.165000e+01],\n [2.094100e+04, 2.977000e+01, 1.291000e+01],\n [2.094900e+04, 3.100000e+01, 2.007000e+01],\n [2.107400e+04, 3.830000e+01, 1.525000e+01],\n [2.110000e+04, 2.470000e+01, 2.599000e+01],\n [2.114300e+04, 2.580000e+01, 2.196000e+01],\n [2.114300e+04, 2.580000e+01, 2.197000e+01],\n [2.121740e+04, 3.279000e+01, 1.334000e+01],\n [2.127156e+04, 3.844000e+01, 1.186000e+01],\n [2.134680e+04, 3.885000e+01, 1.243000e+01],\n [2.137900e+04, 2.944000e+01, 1.436000e+01],\n [2.147400e+04, 2.944000e+01, 1.436000e+01],\n [2.166129e+04, 3.124000e+01, 1.849000e+01],\n [2.176000e+04, 3.213000e+01, 1.785000e+01],\n [2.208167e+04, 3.176000e+01, 1.816000e+01],\n [2.214783e+04, 3.736000e+01, 1.390000e+01],\n [2.219619e+04, 3.736000e+01, 1.390000e+01],\n [2.240000e+04, 3.052000e+01, 1.785000e+01],\n [2.248200e+04, 3.010000e+01, 1.125000e+01],\n [2.261900e+04, 3.047000e+01, 1.303000e+01],\n [2.274200e+04, 3.028000e+01, 1.057000e+01]]),\n '贫煤': array([[1.695900e+04, 9.310000e+00, 4.477000e+01],\n [1.742404e+04, 1.058000e+01, 2.268000e+01],\n [1.742931e+04, 7.900000e+00, 3.840000e+01],\n [1.799800e+04, 1.175000e+01, 2.981000e+01],\n [1.875700e+04, 1.185000e+01, 3.122000e+01],\n [1.912518e+04, 7.810000e+00, 3.145000e+01],\n [1.928076e+04, 7.930000e+00, 3.137000e+01],\n [1.935228e+04, 1.119000e+01, 3.202000e+01],\n [1.938269e+04, 1.127000e+01, 3.192000e+01],\n [1.983535e+04, 1.152000e+01, 3.052000e+01],\n [1.986900e+04, 1.161000e+01, 3.042000e+01],\n [1.994000e+04, 9.370000e+00, 3.426000e+01],\n [1.994300e+04, 9.370000e+00, 3.426000e+01],\n [2.003700e+04, 1.125000e+01, 3.067000e+01],\n [2.024590e+04, 1.058000e+01, 2.654000e+01],\n [2.028730e+04, 1.120000e+01, 2.698000e+01],\n [2.031000e+04, 1.123000e+01, 3.357000e+01],\n [2.031700e+04, 1.125000e+01, 3.067000e+01],\n [2.036000e+04, 9.450000e+00, 3.077000e+01],\n [2.057000e+04, 1.185000e+01, 2.786000e+01],\n [2.075500e+04, 1.174000e+01, 2.817000e+01],\n [2.086230e+04, 1.040000e+01, 2.583000e+01],\n [2.092670e+04, 9.510000e+00, 2.515000e+01],\n [2.096500e+04, 1.258000e+01, 2.965000e+01],\n [2.097590e+04, 1.017000e+01, 2.491000e+01],\n [2.098100e+04, 1.258000e+01, 2.965000e+01],\n [2.101000e+04, 1.209000e+01, 2.169000e+01],\n [2.101980e+04, 9.410000e+00, 2.489000e+01],\n [2.103908e+04, 7.010000e+00, 2.714000e+01],\n [2.105200e+04, 1.074000e+01, 3.136000e+01],\n [2.106690e+04, 1.034000e+01, 2.481000e+01],\n [2.107710e+04, 1.017000e+01, 2.478000e+01],\n [2.110900e+04, 7.670000e+00, 2.597000e+01],\n [2.110900e+04, 1.209000e+01, 2.169000e+01],\n [2.119000e+04, 7.170000e+00, 2.591000e+01],\n [2.119400e+04, 7.190000e+00, 2.597000e+01],\n [2.119433e+04, 7.010000e+00, 2.667000e+01],\n [2.122400e+04, 1.256000e+01, 2.636000e+01],\n [2.126600e+04, 7.260000e+00, 2.567000e+01],\n [2.126900e+04, 1.174000e+01, 2.817000e+01],\n [2.157900e+04, 1.189000e+01, 2.689000e+01],\n [2.174500e+04, 1.074000e+01, 2.850000e+01],\n [2.176688e+04, 1.062000e+01, 2.687000e+01],\n [2.177291e+04, 1.066000e+01, 2.632000e+01],\n [2.190700e+04, 1.064000e+01, 2.810000e+01],\n [2.204272e+04, 1.296000e+01, 2.569000e+01],\n [2.321500e+04, 1.100000e+01, 1.931000e+01],\n [2.379100e+04, 1.100000e+01, 1.931000e+01]])}" + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "coal_params_dict" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 77, + "outputs": [ + { + "data": { + "text/plain": " 地区 所属集团 投产时间 机组容量 机组类型 参数分类 冷却方式 锅炉类型 时间 \\\n0 北京 华能 1998/1/20 0:00 165 供热式 超高压 水冷 煤粉 2016.0 \n1 北京 华能 1998/1/20 0:00 165 供热式 超高压 水冷 煤粉 2016.0 \n2 北京 华能 1998/12/20 0:00 220 供热式 超高压 水冷 煤粉 2016.0 \n3 北京 华能 1999/6/26 0:00 220 供热式 超高压 水冷 煤粉 2016.0 \n4 辽宁 大唐 2009/4/30 0:00 300 供热式 亚临界 水冷 煤粉 2016.0 \n.. .. ... ... ... ... ... ... ... ... \n847 新疆 NaN NaN 1320 纯凝式 超临界 间接空冷 煤粉 NaN \n848 辽宁 NaN NaN 700 供热式 超临界 水冷 煤粉 NaN \n849 内蒙 NaN NaN 700 供热式 超临界 直接空冷 煤粉 NaN \n850 山东 NaN NaN 40 供热式 超高压 水冷 循环流化床 NaN \n851 浙江 NaN NaN 70 供热式 超高压 水冷 循环流化床 NaN \n\n 发电量 ... 标煤量 出力系数 煤种 入炉煤低位热值 燃煤挥发份Var 燃煤灰份Aar \\\n0 51841.70000 ... 2.580497e+05 75.84 烟煤 23380.0 27.59 9.94 \n1 47387.95000 ... 2.126813e+05 74.50 烟煤 23380.0 27.59 9.94 \n2 115498.04000 ... 4.410925e+05 78.76 烟煤 23380.0 27.59 9.94 \n3 120884.07000 ... 4.707218e+05 81.41 烟煤 23380.0 27.59 9.94 \n4 111218.55000 ... 3.726990e+05 71.27 褐煤 14122.0 24.78 19.16 \n.. ... ... ... ... .. ... ... ... \n847 704381.26290 ... 2.283076e+06 NaN 褐煤 19970.0 35.33 9.05 \n848 350000.00000 ... 1.328747e+06 NaN 褐煤 14640.0 44.39 16.84 \n849 385000.00000 ... 1.362009e+06 NaN 褐煤 13960.0 46.65 18.90 \n850 17000.00000 ... 1.810834e+05 NaN 烟煤 21060.0 19.12 20.27 \n851 35788.81469 ... 3.502535e+05 NaN 烟煤 22021.0 19.12 21.77 \n\n CO2_em_air 入炉煤低位热值_new 燃煤挥发份Var_new 燃煤灰份Aar_new \n0 0.235066 23380.0 27.59 9.94 \n1 0.226207 23380.0 27.59 9.94 \n2 0.220954 23380.0 27.59 9.94 \n3 0.216298 23380.0 27.59 9.94 \n4 0.238755 14122.0 24.78 19.16 \n.. ... ... ... ... \n847 0.196452 19970.0 35.33 9.05 \n848 0.185688 14640.0 44.39 16.84 \n849 0.181214 13960.0 46.65 18.90 \n850 0.347570 21060.0 19.12 20.27 \n851 0.253057 22021.0 19.12 21.77 \n\n[852 rows x 21 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
地区所属集团投产时间机组容量机组类型参数分类冷却方式锅炉类型时间发电量...标煤量出力系数煤种入炉煤低位热值燃煤挥发份Var燃煤灰份AarCO2_em_air入炉煤低位热值_new燃煤挥发份Var_new燃煤灰份Aar_new
0北京华能1998/1/20 0:00165供热式超高压水冷煤粉2016.051841.70000...2.580497e+0575.84烟煤23380.027.599.940.23506623380.027.599.94
1北京华能1998/1/20 0:00165供热式超高压水冷煤粉2016.047387.95000...2.126813e+0574.50烟煤23380.027.599.940.22620723380.027.599.94
2北京华能1998/12/20 0:00220供热式超高压水冷煤粉2016.0115498.04000...4.410925e+0578.76烟煤23380.027.599.940.22095423380.027.599.94
3北京华能1999/6/26 0:00220供热式超高压水冷煤粉2016.0120884.07000...4.707218e+0581.41烟煤23380.027.599.940.21629823380.027.599.94
4辽宁大唐2009/4/30 0:00300供热式亚临界水冷煤粉2016.0111218.55000...3.726990e+0571.27褐煤14122.024.7819.160.23875514122.024.7819.16
..................................................................
847新疆NaNNaN1320纯凝式超临界间接空冷煤粉NaN704381.26290...2.283076e+06NaN褐煤19970.035.339.050.19645219970.035.339.05
848辽宁NaNNaN700供热式超临界水冷煤粉NaN350000.00000...1.328747e+06NaN褐煤14640.044.3916.840.18568814640.044.3916.84
849内蒙NaNNaN700供热式超临界直接空冷煤粉NaN385000.00000...1.362009e+06NaN褐煤13960.046.6518.900.18121413960.046.6518.90
850山东NaNNaN40供热式超高压水冷循环流化床NaN17000.00000...1.810834e+05NaN烟煤21060.019.1220.270.34757021060.019.1220.27
851浙江NaNNaN70供热式超高压水冷循环流化床NaN35788.81469...3.502535e+05NaN烟煤22021.019.1221.770.25305722021.019.1221.77
\n

852 rows × 21 columns

\n
" + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "total_data" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 78, + "outputs": [], + "source": [ + "new_use_data = total_data.groupby(use_col + ['煤种'])['CO2_em_air'].mean().reset_index().drop(columns=['入炉煤低位热值_new', '燃煤挥发份Var_new', '燃煤灰份Aar_new'])\n", + "new_use_data.rename(columns={0:'CO2_em_air'}, inplace=True)\n", + "new_use_data['coal_params'] = new_use_data['煤种'].apply(lambda x: coal_params_dict.get(x))\n", + "new_use_data.drop(columns='煤种', inplace=True)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 79, + "outputs": [], + "source": [ + "new_data = new_use_data.explode(column='coal_params')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 80, + "outputs": [ + { + "data": { + "text/plain": " 地区 机组类型 参数分类 冷却方式 锅炉类型 机组容量 coal_params\n0 上海 纯凝式 亚临界 水冷 煤粉 320 [12771.0, 21.26, 33.55]\n0 上海 纯凝式 亚临界 水冷 煤粉 320 [15000.0, 23.46, 19.04]\n0 上海 纯凝式 亚临界 水冷 煤粉 320 [16100.0, 23.33, 18.73]\n0 上海 纯凝式 亚临界 水冷 煤粉 320 [16190.0, 23.33, 18.73]\n0 上海 纯凝式 亚临界 水冷 煤粉 320 [16641.0, 19.13, 39.12]\n.. ... ... ... ... ... ... ...\n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 [23253.68, 23.72, 18.45]\n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 [23380.0, 27.59, 9.94]\n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 [23487.51, 29.27, 20.97]\n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 [23650.0, 28.87, 7.91]\n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 [23656.14, 29.27, 20.97]\n\n[208875 rows x 7 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
地区机组类型参数分类冷却方式锅炉类型机组容量coal_params
0上海纯凝式亚临界水冷煤粉320[12771.0, 21.26, 33.55]
0上海纯凝式亚临界水冷煤粉320[15000.0, 23.46, 19.04]
0上海纯凝式亚临界水冷煤粉320[16100.0, 23.33, 18.73]
0上海纯凝式亚临界水冷煤粉320[16190.0, 23.33, 18.73]
0上海纯凝式亚临界水冷煤粉320[16641.0, 19.13, 39.12]
........................
646黑龙江纯凝式超高压水冷煤粉210[23253.68, 23.72, 18.45]
646黑龙江纯凝式超高压水冷煤粉210[23380.0, 27.59, 9.94]
646黑龙江纯凝式超高压水冷煤粉210[23487.51, 29.27, 20.97]
646黑龙江纯凝式超高压水冷煤粉210[23650.0, 28.87, 7.91]
646黑龙江纯凝式超高压水冷煤粉210[23656.14, 29.27, 20.97]
\n

208875 rows × 7 columns

\n
" + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_data.drop(columns=['CO2_em_air'])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 81, + "outputs": [], + "source": [ + "norm_data = pd.concat([new_data, new_data.coal_params.apply(pd.Series, index=['入炉煤低位热值_new', '燃煤挥发份Var_new', '燃煤灰份Aar_new'])], axis=1).drop(columns='coal_params')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 82, + "outputs": [ + { + "data": { + "text/plain": " 地区 机组类型 参数分类 冷却方式 锅炉类型 机组容量 CO2_em_air 入炉煤低位热值_new 燃煤挥发份Var_new \\\n0 上海 纯凝式 亚临界 水冷 煤粉 320 0.266602 12771.00 21.26 \n0 上海 纯凝式 亚临界 水冷 煤粉 320 0.266602 15000.00 23.46 \n0 上海 纯凝式 亚临界 水冷 煤粉 320 0.266602 16100.00 23.33 \n0 上海 纯凝式 亚临界 水冷 煤粉 320 0.266602 16190.00 23.33 \n0 上海 纯凝式 亚临界 水冷 煤粉 320 0.266602 16641.00 19.13 \n.. ... ... ... ... ... ... ... ... ... \n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 0.278763 23253.68 23.72 \n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 0.278763 23380.00 27.59 \n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 0.278763 23487.51 29.27 \n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 0.278763 23650.00 28.87 \n646 黑龙江 纯凝式 超高压 水冷 煤粉 210 0.278763 23656.14 29.27 \n\n 燃煤灰份Aar_new \n0 33.55 \n0 19.04 \n0 18.73 \n0 18.73 \n0 39.12 \n.. ... \n646 18.45 \n646 9.94 \n646 20.97 \n646 7.91 \n646 20.97 \n\n[208875 rows x 10 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
地区机组类型参数分类冷却方式锅炉类型机组容量CO2_em_air入炉煤低位热值_new燃煤挥发份Var_new燃煤灰份Aar_new
0上海纯凝式亚临界水冷煤粉3200.26660212771.0021.2633.55
0上海纯凝式亚临界水冷煤粉3200.26660215000.0023.4619.04
0上海纯凝式亚临界水冷煤粉3200.26660216100.0023.3318.73
0上海纯凝式亚临界水冷煤粉3200.26660216190.0023.3318.73
0上海纯凝式亚临界水冷煤粉3200.26660216641.0019.1339.12
.................................
646黑龙江纯凝式超高压水冷煤粉2100.27876323253.6823.7218.45
646黑龙江纯凝式超高压水冷煤粉2100.27876323380.0027.599.94
646黑龙江纯凝式超高压水冷煤粉2100.27876323487.5129.2720.97
646黑龙江纯凝式超高压水冷煤粉2100.27876323650.0028.877.91
646黑龙江纯凝式超高压水冷煤粉2100.27876323656.1429.2720.97
\n

208875 rows × 10 columns

\n
" + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "norm_data" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 83, + "outputs": [], + "source": [ + "for col in num_cols:\n", + " norm_data[col] = np.log1p(norm_data[col])\n", + " # total_data[col] = (total_data[col] - total_data[col].min()) / (total_data[col].max() - total_data[col].min())\n", + "norm_data_dummy = pd.get_dummies(norm_data, columns=object_cols)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 84, + "outputs": [ + { + "data": { + "text/plain": " 机组容量 CO2_em_air 入炉煤低位热值_new 燃煤挥发份Var_new 燃煤灰份Aar_new 地区_上海 \\\n0 5.771441 0.236338 9.455011 3.102791 3.542408 1 \n0 5.771441 0.236338 9.615872 3.197039 2.997730 1 \n0 5.771441 0.236338 9.686637 3.191710 2.982140 1 \n0 5.771441 0.236338 9.692211 3.191710 2.982140 1 \n0 5.771441 0.236338 9.719685 3.002211 3.691875 1 \n.. ... ... ... ... ... ... \n646 5.351858 0.245893 10.054262 3.207613 2.967847 0 \n646 5.351858 0.245893 10.059679 3.353057 2.392426 0 \n646 5.351858 0.245893 10.064267 3.410157 3.089678 0 \n646 5.351858 0.245893 10.071161 3.396855 2.187174 0 \n646 5.351858 0.245893 10.071420 3.410157 3.089678 0 \n\n 地区_云南 地区_内蒙 地区_北京 地区_吉林 ... 机组类型_纯凝式 参数分类_亚临界 参数分类_超临界 参数分类_超超临界 \\\n0 0 0 0 0 ... 1 1 0 0 \n0 0 0 0 0 ... 1 1 0 0 \n0 0 0 0 0 ... 1 1 0 0 \n0 0 0 0 0 ... 1 1 0 0 \n0 0 0 0 0 ... 1 1 0 0 \n.. ... ... ... ... ... ... ... ... ... \n646 0 0 0 0 ... 1 0 0 0 \n646 0 0 0 0 ... 1 0 0 0 \n646 0 0 0 0 ... 1 0 0 0 \n646 0 0 0 0 ... 1 0 0 0 \n646 0 0 0 0 ... 1 0 0 0 \n\n 参数分类_超高压 冷却方式_水冷 冷却方式_直接空冷 冷却方式_间接空冷 锅炉类型_循环流化床 锅炉类型_煤粉 \n0 0 1 0 0 0 1 \n0 0 1 0 0 0 1 \n0 0 1 0 0 0 1 \n0 0 1 0 0 0 1 \n0 0 1 0 0 0 1 \n.. ... ... ... ... ... ... \n646 1 1 0 0 0 1 \n646 1 1 0 0 0 1 \n646 1 1 0 0 0 1 \n646 1 1 0 0 0 1 \n646 1 1 0 0 0 1 \n\n[208875 rows x 45 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
机组容量CO2_em_air入炉煤低位热值_new燃煤挥发份Var_new燃煤灰份Aar_new地区_上海地区_云南地区_内蒙地区_北京地区_吉林...机组类型_纯凝式参数分类_亚临界参数分类_超临界参数分类_超超临界参数分类_超高压冷却方式_水冷冷却方式_直接空冷冷却方式_间接空冷锅炉类型_循环流化床锅炉类型_煤粉
05.7714410.2363389.4550113.1027913.54240810000...1100010001
05.7714410.2363389.6158723.1970392.99773010000...1100010001
05.7714410.2363389.6866373.1917102.98214010000...1100010001
05.7714410.2363389.6922113.1917102.98214010000...1100010001
05.7714410.2363389.7196853.0022113.69187510000...1100010001
..................................................................
6465.3518580.24589310.0542623.2076132.96784700000...1000110001
6465.3518580.24589310.0596793.3530572.39242600000...1000110001
6465.3518580.24589310.0642673.4101573.08967800000...1000110001
6465.3518580.24589310.0711613.3968552.18717400000...1000110001
6465.3518580.24589310.0714203.4101573.08967800000...1000110001
\n

208875 rows × 45 columns

\n
" + }, + "execution_count": 84, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "norm_data_dummy" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 85, + "outputs": [], + "source": [ + "new_xgb_data = xgb.DMatrix(norm_data_dummy[feature_cols])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 86, + "outputs": [], + "source": [ + "norm_data.drop(columns='CO2_em_air', inplace=True)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 87, + "outputs": [], + "source": [ + "norm_data['co2_pred'] = gb_model.predict(new_xgb_data)\n", + "normaled_data = norm_data.drop(columns=['入炉煤低位热值_new', '燃煤挥发份Var_new', '燃煤灰份Aar_new']).groupby([x for x in use_col if x not in ['CO2_em_air', '入炉煤低位热值_new', '燃煤挥发份Var_new', '燃煤灰份Aar_new']])['co2_pred'].mean()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [ + "normaled_data.reset_index().to_csv('./data/去煤种化数据.csv', encoding='utf-8-sig', index=False)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.7.13 ('py37')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "993bd31d5df1020fab369d79a34ff0a2a159e1798f3e25d3ad4b7751d38184c9" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} \ No newline at end of file diff --git a/省际测试.ipynb b/省际测试.ipynb new file mode 100644 index 0000000..c1dcf01 --- /dev/null +++ b/省际测试.ipynb @@ -0,0 +1,235 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "outputs": [], + "source": [ + "import pandas as pd" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "data = pd.read_csv('./供热测试结果.csv')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [ + { + "data": { + "text/plain": " 0 1\n0 0.072858 0.072700\n1 0.073347 0.075045\n2 0.082159 0.080671\n3 0.084120 0.081944\n4 0.065845 0.066739\n.. ... ...\n408 0.066066 0.066927\n409 0.084331 0.082709\n410 0.069216 0.069256\n411 0.065259 0.066203\n412 0.069608 0.071754\n\n[413 rows x 2 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
01
00.0728580.072700
10.0733470.075045
20.0821590.080671
30.0841200.081944
40.0658450.066739
.........
4080.0660660.066927
4090.0843310.082709
4100.0692160.069256
4110.0652590.066203
4120.0696080.071754
\n

413 rows × 2 columns

\n
" + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [], + "source": [ + "from sklearn.metrics import r2_score" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [ + { + "data": { + "text/plain": "0.8483477508497194" + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "r2_score(data.values[:,1], data.values[:,0])" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on function r2_score in module sklearn.metrics._regression:\n", + "\n", + "r2_score(y_true, y_pred, *, sample_weight=None, multioutput='uniform_average')\n", + " :math:`R^2` (coefficient of determination) regression score function.\n", + " \n", + " Best possible score is 1.0 and it can be negative (because the\n", + " model can be arbitrarily worse). A constant model that always\n", + " predicts the expected value of y, disregarding the input features,\n", + " would get a :math:`R^2` score of 0.0.\n", + " \n", + " Read more in the :ref:`User Guide `.\n", + " \n", + " Parameters\n", + " ----------\n", + " y_true : array-like of shape (n_samples,) or (n_samples, n_outputs)\n", + " Ground truth (correct) target values.\n", + " \n", + " y_pred : array-like of shape (n_samples,) or (n_samples, n_outputs)\n", + " Estimated target values.\n", + " \n", + " sample_weight : array-like of shape (n_samples,), default=None\n", + " Sample weights.\n", + " \n", + " multioutput : {'raw_values', 'uniform_average', 'variance_weighted'}, array-like of shape (n_outputs,) or None, default='uniform_average'\n", + " \n", + " Defines aggregating of multiple output scores.\n", + " Array-like value defines weights used to average scores.\n", + " Default is \"uniform_average\".\n", + " \n", + " 'raw_values' :\n", + " Returns a full set of scores in case of multioutput input.\n", + " \n", + " 'uniform_average' :\n", + " Scores of all outputs are averaged with uniform weight.\n", + " \n", + " 'variance_weighted' :\n", + " Scores of all outputs are averaged, weighted by the variances\n", + " of each individual output.\n", + " \n", + " .. versionchanged:: 0.19\n", + " Default value of multioutput is 'uniform_average'.\n", + " \n", + " Returns\n", + " -------\n", + " z : float or ndarray of floats\n", + " The :math:`R^2` score or ndarray of scores if 'multioutput' is\n", + " 'raw_values'.\n", + " \n", + " Notes\n", + " -----\n", + " This is not a symmetric function.\n", + " \n", + " Unlike most other scores, :math:`R^2` score may be negative (it need not\n", + " actually be the square of a quantity R).\n", + " \n", + " This metric is not well-defined for single samples and will return a NaN\n", + " value if n_samples is less than two.\n", + " \n", + " References\n", + " ----------\n", + " .. [1] `Wikipedia entry on the Coefficient of determination\n", + " `_\n", + " \n", + " Examples\n", + " --------\n", + " >>> from sklearn.metrics import r2_score\n", + " >>> y_true = [3, -0.5, 2, 7]\n", + " >>> y_pred = [2.5, 0.0, 2, 8]\n", + " >>> r2_score(y_true, y_pred)\n", + " 0.948...\n", + " >>> y_true = [[0.5, 1], [-1, 1], [7, -6]]\n", + " >>> y_pred = [[0, 2], [-1, 2], [8, -5]]\n", + " >>> r2_score(y_true, y_pred,\n", + " ... multioutput='variance_weighted')\n", + " 0.938...\n", + " >>> y_true = [1, 2, 3]\n", + " >>> y_pred = [1, 2, 3]\n", + " >>> r2_score(y_true, y_pred)\n", + " 1.0\n", + " >>> y_true = [1, 2, 3]\n", + " >>> y_pred = [2, 2, 2]\n", + " >>> r2_score(y_true, y_pred)\n", + " 0.0\n", + " >>> y_true = [1, 2, 3]\n", + " >>> y_pred = [3, 2, 1]\n", + " >>> r2_score(y_true, y_pred)\n", + " -3.0\n", + "\n" + ] + } + ], + "source": [ + "help(r2_score)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file diff --git a/计算碳排放.ipynb b/计算碳排放.ipynb new file mode 100644 index 0000000..a5506df --- /dev/null +++ b/计算碳排放.ipynb @@ -0,0 +1,701 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "outputs": [], + "source": [ + "import pandas as pd" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 2, + "outputs": [], + "source": [ + "import numpy as np\n", + "import time" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "markdown", + "source": [ + "$E = E_{燃烧} + E_{脱硫} + E_{电}$ \\\n", + "$E_{燃烧} = \\sum_i (AD_i\\times EF_i)$ \\\n", + "$AD_i = FC_i \\times NCV_i \\times 10^{-6}$ 消耗量*低位热值\n", + "$EF_i = CC_i \\times OF_i \\times \\frac{44}{12}$ 单位热值含碳量 * 碳氧化率" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%% md\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 3, + "outputs": [], + "source": [ + "def cal_ad(fc, ncv):\n", + " return fc * ncv * 1e-6" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 4, + "outputs": [], + "source": [ + "def cal_ef(cc, of):\n", + " return cc * of * 44 / 12" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 5, + "outputs": [], + "source": [ + "cc_map = {\n", + " \"无烟煤\": 27.49,\n", + " \"烟煤\": 26.18,\n", + " \"褐煤\": 27.97,\n", + " \"贫煤\": 26.0,\n", + "}" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 6, + "outputs": [], + "source": [ + "data = pd.read_excel('./total_data.xlsx')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 7, + "outputs": [], + "source": [ + "data['入炉煤低位热值(kJ/kg)'] = data['入炉煤低位热值(kJ/kg)'].apply(lambda x: x*1000 if x < 100 else x*1)\n", + "data['燃煤灰份Aar(%)'] = data['燃煤灰份Aar(%)'].apply(lambda x: x / 1000 if x > 10000 else x*1)\n", + "data['燃煤挥发份Var(%)'] = data['燃煤挥发份Var(%)'].apply(lambda x: x / 1000 if x > 10000 else x*1)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 8, + "outputs": [], + "source": [ + "def cal_ctype(ncv, vdaf, ash):\n", + " if pd.isna(ncv) or pd.isna(vdaf) or pd.isna(ash):\n", + " return pd.NA\n", + " else:\n", + " if vdaf / (1 - ash/100 - 0.07196) < 10:\n", + " return '无烟煤'\n", + " elif vdaf / (1 - ash/100 - 0.06825) < 20:\n", + " return '贫煤'\n", + " elif vdaf / (1 - ash/100 - 0.08679) < 37 and ncv > 16730:\n", + " return '烟煤'\n", + " elif ncv < 16730:\n", + " return '褐煤'\n", + " else:\n", + " return '烟煤'" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 9, + "outputs": [], + "source": [ + "data['煤种'] = data.apply(lambda x: cal_ctype(x['入炉煤低位热值(kJ/kg)'], x['燃煤挥发份Var(%)'], x['燃煤灰份Aar(%)']), axis=1)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [], + "source": [ + "data = data[~data['煤种'].isna()].copy()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 11, + "outputs": [], + "source": [ + "data = data[data['入炉煤低位热值(kJ/kg)']> 8000].copy()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 12, + "outputs": [], + "source": [ + "data = data[~((data['发电用标煤量(t)'].isna())|(data['发电用标煤量(t)']==0))].copy()\n", + "data = data[~((data['发电用标煤量(t)'].isna())|(data['发电用标煤量(t)']==0))].copy()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 13, + "outputs": [ + { + "data": { + "text/plain": "['出力系数(%).1', '出力系数(%).2', '化学系统耗电率 (%).1', '额再热蒸汽温度 (℃).1']" + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "drop_cols = [x for x in data.columns if '.1' in x or '.2' in x]\n", + "drop_cols" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 14, + "outputs": [], + "source": [ + "data.drop(columns=drop_cols, inplace=True)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 15, + "outputs": [], + "source": [ + "data['工业供热量'].fillna(0, inplace=True)\n", + "data['采暖供热量'].fillna(0, inplace=True)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 16, + "outputs": [ + { + "data": { + "text/plain": "array([ 56, 96, 122, ..., 16724, 16725, 16837], dtype=int64)" + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "drop_index = data[(data['发电量(万kWh)']<10000)|(data['总供热量']<=10000)|(data['供热用标煤量(t)']==0)].index.values\n", + "drop_index" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 17, + "outputs": [], + "source": [ + "data.drop(index=drop_index, inplace=True)\n", + "data.reset_index(inplace=True, drop=True)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 18, + "outputs": [], + "source": [ + "data['发电碳排放因子(kg/kWh)'] = data.apply(lambda x: (cal_ad(x['发电用标煤量(t)'], x['入炉煤低位热值(kJ/kg)']) * cal_ef(cc_map.get(x['煤种']), 0.98))/x['发电量(万kWh)']/10, axis=1)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 19, + "outputs": [], + "source": [ + "data['供热碳排放因子(kg/MJ)'] = data.apply(lambda x:(cal_ad(x['供热用标煤量(t)'], x['入炉煤低位热值(kJ/kg)']) * cal_ef(cc_map.get(x['煤种']), 0.98))/x['总供热量'] if x['总供热量'] != 0 else 0, axis=1)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 20, + "outputs": [ + { + "data": { + "text/plain": " 总供热量 供热碳排放因子(kg/MJ)\n0 8.294556e+05 0.076843\n1 8.789928e+04 0.077676\n2 8.481764e+05 0.074823\n3 4.473469e+05 0.081628\n4 7.157164e+05 0.081103\n... ... ...\n7252 1.532303e+06 0.078776\n7253 2.147545e+06 0.076622\n7254 2.131207e+06 0.074772\n7255 3.039811e+06 0.091482\n7256 3.039813e+06 0.091483\n\n[7257 rows x 2 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
总供热量供热碳排放因子(kg/MJ)
08.294556e+050.076843
18.789928e+040.077676
28.481764e+050.074823
34.473469e+050.081628
47.157164e+050.081103
.........
72521.532303e+060.078776
72532.147545e+060.076622
72542.131207e+060.074772
72553.039811e+060.091482
72563.039813e+060.091483
\n

7257 rows × 2 columns

\n
" + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[['总供热量', '供热碳排放因子(kg/MJ)']]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 21, + "outputs": [ + { + "data": { + "text/plain": "126 0.460130\n127 0.480800\n128 0.493493\n129 0.487034\n130 0.475439\n131 0.481355\n132 0.450568\n133 0.454952\n134 0.460595\n135 0.478949\n136 0.486923\n137 0.483407\n138 0.488134\n139 0.471598\n140 0.450577\n141 0.461044\nName: 发电碳排放因子(kg/kWh), dtype: float64" + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data['电厂名称']=='华电内蒙古能源有限公司包头发电分公司']['发电碳排放因子(kg/kWh)']" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 22, + "outputs": [ + { + "data": { + "text/plain": " plant longitude latitude altitude\n0 万方发电厂(焦作爱依斯万方电力有限公司) 113.381649 35.255622 88.0\n1 三河发电有限责任公司 116.860260 39.953617 27.0\n2 上海上电漕泾发电有限公司 121.407593 30.765242 4.0\n3 上海吴泾发电有限责任公司 121.471140 31.065113 3.0\n4 上海吴泾第二发电有限责任公司 121.471340 31.062532 4.0", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
plantlongitudelatitudealtitude
0万方发电厂(焦作爱依斯万方电力有限公司)113.38164935.25562288.0
1三河发电有限责任公司116.86026039.95361727.0
2上海上电漕泾发电有限公司121.40759330.7652424.0
3上海吴泾发电有限责任公司121.47114031.0651133.0
4上海吴泾第二发电有限责任公司121.47134031.0625324.0
\n
" + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "loc_data = pd.read_csv('./电厂机组地理信息.csv')\n", + "loc_data.head()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 23, + "outputs": [], + "source": [ + "loc_data.rename(columns={'plant':'电厂名称'}, inplace=True)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 24, + "outputs": [], + "source": [ + "data = data.merge(loc_data, how='left', on='电厂名称')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 25, + "outputs": [ + { + "data": { + "text/plain": "Index(['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)', 'longitude', 'latitude',\n 'altitude'],\n dtype='object')" + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.columns[-5:]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 26, + "outputs": [], + "source": [ + "uni = data.groupby(['电厂名称', '机组编号'])['铭牌容量 (MW)'].unique().to_frame()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 27, + "outputs": [], + "source": [ + "uni['len'] = uni['铭牌容量 (MW)'].apply(len)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 28, + "outputs": [ + { + "data": { + "text/plain": "Empty DataFrame\nColumns: [铭牌容量 (MW), len]\nIndex: []", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
铭牌容量 (MW)len
电厂名称机组编号
\n
" + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "uni[uni.len==2]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 29, + "outputs": [], + "source": [ + "data = data[data['供热碳排放因子(kg/MJ)'] < 0.2].copy()" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 30, + "outputs": [], + "source": [ + "import seaborn as sns" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 31, + "outputs": [], + "source": [ + "from scipy.stats import norm" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 32, + "outputs": [ + { + "data": { + "text/plain": "Empty DataFrame\nColumns: [电厂名称, 机组编号, 铭牌容量 (MW), 投产时间, 机组类型, 参数分类, 所处地区, 冷凝器型式, 时间, 发电量(万kWh), 供电量(万kWh), 标煤量(t), 发电用标煤量(t), 供热用标煤量(t), 利用小时(h), 平均负荷(MW), 出力系数(%), 工业供热量, 工业热电比(%), 采暖供热量, 采暖热电比(%), 总热电比(%), 总供热量, 供热煤耗, 给水泵汽轮机总耗热量, 供电煤耗gce/(kWh), 综合厂用电率(%), 发电厂用电率(%), 供热厂用电率(%), 非生产厂用电率(%), 最新THA工况二类修正后汽机热耗率kJ/KWh, 点火用油(kg/万kWh), 助燃用油(kg/万kWh), 主蒸汽压力(MPa), 主蒸汽温度(℃), 再热蒸汽温度(℃), 高加投入率(%), 给水温度(℃), 真空严密性V(Pa/min), 真空度(%), 凝汽器端差(℃), 凝结水过冷度(℃), 发电补给水率(%), 发电综合耗水率, 排烟温度(℃), 飞灰含碳量(%), 空预器漏风率(%), 过热器减温水量(t/h), 再热器减温水量(t/h), 入厂煤低位热值(kJ/kg), 入炉煤低位热值(kJ/kg), 燃煤挥发份Var(%), 燃煤灰份Aar(%), 燃煤低位热值Qar,net(kJ/kg), 燃煤硫份Sar(%), 锅炉专业主要辅机耗电率 送风机耗电率(%), 锅炉专业主要辅机耗电率 引风机耗电率(%), 锅炉专业主要辅机耗电率 一次风机耗电率(%), 锅炉专业主要辅机耗电率 炉水泵耗电率(%), 锅炉专业主要辅机耗电率 给煤机耗电率(%), 锅炉专业主要辅机耗电率 磨煤机耗电率(%), 锅炉专业主要辅机耗电率 电除尘器耗电率(%), 锅炉专业主要辅机耗电率 除灰系统耗电率(%), 汽机专业主要辅机耗电率 凝结水泵耗电率(%), 汽机专业主要辅机耗电率 前置泵耗电率(%), 汽机专业主要辅机耗电率 电动给水泵耗电率(%), 汽机专业主要辅机耗电率 循环水泵耗电率(%), 汽机专业主要辅机耗电率 空冷风机耗电率(%), 汽机专业主要辅机耗电率 热网循环水泵耗电率(%), 环保专业耗电率 脱硫系统耗电率(%), 环保专业耗电率 脱销系统耗电率(%), 输煤专业耗电率输煤系统耗电率(%), 化学系统耗电率 (%), 企业编码, 简称, 制造厂家, 型号, 型式, 最大连续出力B-MCR(t/h), 设计效率(%), 工质流动方式, 额定主蒸汽压力 (MPa), 额再热蒸汽温度 (℃), 额定再热蒸汽压力 (MPa), 额再热蒸汽温度 (℃), 点火方式, 燃烧方式, 设计燃煤种类, 设计燃煤灰份(收到基)(%), 设计燃煤挥发份)(收到基)(%), 设计燃煤低位热值(收到基)(kJ/kg), 排渣方式, 除灰方式, 空预器制造厂家, 空预器型号, 空预器型式, 空预器设计漏风率(%), 磨煤机制造厂家, 磨煤机型号, 磨煤机型式, ...]\nIndex: []\n\n[0 rows x 134 columns]", + "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
电厂名称机组编号铭牌容量 (MW)投产时间机组类型参数分类所处地区冷凝器型式时间发电量(万kWh)...引风机最大风压(Pa)引风机最大流量(m3/S)引风机电机电压(V)引风机电机功率(KW)煤种发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)longitudelatitudealtitude
\n

0 rows × 134 columns

\n
" + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data[data['供热碳排放因子(kg/MJ)'] <= 0]" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 33, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: UserWarning: \n", + "\n", + "`distplot` is a deprecated function and will be removed in seaborn v0.14.0.\n", + "\n", + "Please adapt your code to use either `displot` (a figure-level function with\n", + "similar flexibility) or `histplot` (an axes-level function for histograms).\n", + "\n", + "For a guide to updating your code to use the new functions, please see\n", + "https://gist.github.com/mwaskom/de44147ed2974457ad6372750bbe5751\n", + "\n", + " \"\"\"Entry point for launching an IPython kernel.\n" + ] + }, + { + "data": { + "text/plain": "" + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\matplotlib\\backends\\backend_agg.py:214: RuntimeWarning: Glyph 20379 missing from current font.\n", + " font.set_text(s, 0.0, flags=flags)\n", + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\matplotlib\\backends\\backend_agg.py:214: RuntimeWarning: Glyph 28909 missing from current font.\n", + " font.set_text(s, 0.0, flags=flags)\n", + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\matplotlib\\backends\\backend_agg.py:214: RuntimeWarning: Glyph 30899 missing from current font.\n", + " font.set_text(s, 0.0, flags=flags)\n", + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\matplotlib\\backends\\backend_agg.py:214: RuntimeWarning: Glyph 25490 missing from current font.\n", + " font.set_text(s, 0.0, flags=flags)\n", + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\matplotlib\\backends\\backend_agg.py:214: RuntimeWarning: Glyph 25918 missing from current font.\n", + " font.set_text(s, 0.0, flags=flags)\n", + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\matplotlib\\backends\\backend_agg.py:214: RuntimeWarning: Glyph 22240 missing from current font.\n", + " font.set_text(s, 0.0, flags=flags)\n", + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\matplotlib\\backends\\backend_agg.py:214: RuntimeWarning: Glyph 23376 missing from current font.\n", + " font.set_text(s, 0.0, flags=flags)\n", + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\matplotlib\\backends\\backend_agg.py:183: RuntimeWarning: Glyph 20379 missing from current font.\n", + " font.set_text(s, 0, flags=flags)\n", + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\matplotlib\\backends\\backend_agg.py:183: RuntimeWarning: Glyph 28909 missing from current font.\n", + " font.set_text(s, 0, flags=flags)\n", + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\matplotlib\\backends\\backend_agg.py:183: RuntimeWarning: Glyph 30899 missing from current font.\n", + " font.set_text(s, 0, flags=flags)\n", + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\matplotlib\\backends\\backend_agg.py:183: RuntimeWarning: Glyph 25490 missing from current font.\n", + " font.set_text(s, 0, flags=flags)\n", + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\matplotlib\\backends\\backend_agg.py:183: RuntimeWarning: Glyph 25918 missing from current font.\n", + " font.set_text(s, 0, flags=flags)\n", + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\matplotlib\\backends\\backend_agg.py:183: RuntimeWarning: Glyph 22240 missing from current font.\n", + " font.set_text(s, 0, flags=flags)\n", + "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\matplotlib\\backends\\backend_agg.py:183: RuntimeWarning: Glyph 23376 missing from current font.\n", + " font.set_text(s, 0, flags=flags)\n" + ] + }, + { + "data": { + "text/plain": "
", + "image/png": "\n" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "sns.distplot(data['供热碳排放因子(kg/MJ)'], fit=norm)" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": 34, + "outputs": [], + "source": [ + "data[['电厂名称', '机组编号', '铭牌容量 (MW)', '机组类型', '参数分类', '冷凝器型式', '入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', '煤种', '所处地区', 'longitude', 'latitude', 'altitude','发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)']].to_csv('./train_data.csv', index=False, encoding='utf-8-sig')" + ], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + }, + { + "cell_type": "code", + "execution_count": null, + "outputs": [], + "source": [], + "metadata": { + "collapsed": false, + "pycharm": { + "name": "#%%\n" + } + } + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file