1536 lines
62 KiB
Plaintext
1536 lines
62 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd\n",
|
||
"import numpy as np\n",
|
||
"import xgboost as xgb\n",
|
||
"import seaborn as sns\n",
|
||
"from sklearn.model_selection import train_test_split\n",
|
||
"from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n1 上海市 凝气式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n2 上海市 凝气式 亚临界 水冷 5.771441 4.808939 3.476886 1.098612 \n3 上海市 凝气式 超超临界 水冷 6.908755 4.807356 3.458373 1.609438 \n4 上海市 纯凝式 亚临界 水冷 5.860786 4.807839 3.478627 2.833213 \n\n power_co2_factor heat_co2_factor \n0 0.574332 0.072680 \n1 0.582164 0.072391 \n2 0.569281 0.071041 \n3 0.506250 0.070460 \n4 0.565226 0.073717 ",
|
||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>所处地区</th>\n <th>机组类型</th>\n <th>参数分类</th>\n <th>冷凝器型式</th>\n <th>铭牌容量 (MW)</th>\n <th>longitude</th>\n <th>latitude</th>\n <th>altitude</th>\n <th>power_co2_factor</th>\n <th>heat_co2_factor</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>上海市</td>\n <td>供热式</td>\n <td>亚临界</td>\n <td>水冷</td>\n <td>5.707110</td>\n <td>4.807875</td>\n <td>3.467769</td>\n <td>1.386294</td>\n <td>0.574332</td>\n <td>0.072680</td>\n </tr>\n <tr>\n <th>1</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷</td>\n <td>5.707110</td>\n <td>4.807875</td>\n <td>3.467769</td>\n <td>1.386294</td>\n <td>0.582164</td>\n <td>0.072391</td>\n </tr>\n <tr>\n <th>2</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷</td>\n <td>5.771441</td>\n <td>4.808939</td>\n <td>3.476886</td>\n <td>1.098612</td>\n <td>0.569281</td>\n <td>0.071041</td>\n </tr>\n <tr>\n <th>3</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>超超临界</td>\n <td>水冷</td>\n <td>6.908755</td>\n <td>4.807356</td>\n <td>3.458373</td>\n <td>1.609438</td>\n <td>0.506250</td>\n <td>0.070460</td>\n </tr>\n <tr>\n <th>4</th>\n <td>上海市</td>\n <td>纯凝式</td>\n <td>亚临界</td>\n <td>水冷</td>\n <td>5.860786</td>\n <td>4.807839</td>\n <td>3.478627</td>\n <td>2.833213</td>\n <td>0.565226</td>\n <td>0.073717</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||
},
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data = pd.read_csv('./results/去煤种化数据.csv')\n",
|
||
"data.head()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": "(['所处地区', '机组类型', '参数分类', '冷凝器型式'],\n Index(['铭牌容量 (MW)', 'longitude', 'latitude', 'altitude'], dtype='object'))"
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"object_cols = data.columns[:4].tolist()\n",
|
||
"num_cols = data.columns[4:8]\n",
|
||
"object_cols, num_cols"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"test_data = pd.read_excel('./data/煤电机组情况(含企业名称).xlsx')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"test_geo_info = pd.read_excel('./data/电厂地理信息.xlsx')\n",
|
||
"test_geo_info.rename(columns={'name':'企业名称'}, inplace=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"test_data = test_data.merge(test_geo_info, how='left', on='企业名称').drop(columns='address')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"test_data_cp = test_data.copy()\n",
|
||
"test_data = test_data[['地区', '汽轮机类型', '压力参数', '冷却方式', '单机容量(MW)', 'lat', 'lng', 'altitude']].copy()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"test_data.columns = data.columns[:8].tolist()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"test_data['na_cols'] = test_data.isna().sum(axis=1).values"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"test_data = test_data[test_data['铭牌容量 (MW)']>=30].copy()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": "0.965160147200342"
|
||
},
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"test_data[test_data.na_cols <= 1]['铭牌容量 (MW)'].sum() /10 / 112228"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"new_test_data = test_data[test_data.na_cols <= 1].drop(columns='na_cols').reset_index(drop=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": "水冷 413\n空冷 110\n其他 1\nName: 冷凝器型式, dtype: int64"
|
||
},
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data['冷凝器型式'].value_counts()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": "水冷-闭式循环 1442\n水冷-开式循环 737\n空冷-直接空冷 497\n其他 255\n空冷-间接空冷 221\n水冷 52\n空冷 14\n间接空冷 4\n直接空冷 2\nName: 冷凝器型式, dtype: int64"
|
||
},
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"new_test_data['冷凝器型式'].value_counts()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"def change_type(x:str):\n",
|
||
" if '水冷' in x:\n",
|
||
" return '水冷'\n",
|
||
" elif '空冷' in x:\n",
|
||
" return \"空冷\"\n",
|
||
" else:\n",
|
||
" return '其他'"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"new_test_data.fillna('其他', inplace=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"new_test_data['冷凝器型式'] = new_test_data['冷凝器型式'].apply(change_type)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": "亚临界 265\n超临界 156\n超超临界 69\n超高压 32\n高压 2\nName: 参数分类, dtype: int64"
|
||
},
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"data['参数分类'].value_counts()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": "亚临界 1072\n高压 726\n超临界 608\n超高压 403\n超超临界 358\n中压 57\nName: 参数分类, dtype: int64"
|
||
},
|
||
"execution_count": 19,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"new_test_data['参数分类'].value_counts()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"new_test_data['机组类型'] = new_test_data['机组类型'].apply(lambda x: x if x.endswith('式') else x + '式')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"for col in num_cols:\n",
|
||
" new_test_data[col] = new_test_data[col].apply(lambda x: 0 if x<0 else x)\n",
|
||
" new_test_data[col] = np.log1p(new_test_data[col])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude\n0 安徽省 凝气式 亚临界 水冷 5.771441 3.451583 4.772094 2.397895\n1 安徽省 凝气式 亚临界 水冷 5.771441 3.451583 4.772094 2.397895\n2 安徽省 凝气式 超超临界 水冷 6.908755 3.451583 4.772094 2.397895\n3 安徽省 凝气式 超超临界 水冷 6.908755 3.451583 4.772094 2.397895\n4 安徽省 抽凝式 高压 水冷 3.713572 3.451583 4.772094 2.397895\n... ... ... ... ... ... ... ... ...\n3219 重庆市 抽背式 高压 其他 3.931826 3.427489 4.682353 5.645447\n3220 重庆市 抽背式 高压 其他 3.931826 3.427489 4.682353 5.645447\n3221 重庆市 抽凝式 高压 水冷 3.912023 3.427489 4.682353 5.645447\n3222 重庆市 背压式 高压 其他 3.433987 3.428715 4.682208 5.690359\n3223 重庆市 抽凝式 高压 水冷 4.836282 3.428715 4.682208 5.690359\n\n[3224 rows x 8 columns]",
|
||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>所处地区</th>\n <th>机组类型</th>\n <th>参数分类</th>\n <th>冷凝器型式</th>\n <th>铭牌容量 (MW)</th>\n <th>longitude</th>\n <th>latitude</th>\n <th>altitude</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>安徽省</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷</td>\n <td>5.771441</td>\n <td>3.451583</td>\n <td>4.772094</td>\n <td>2.397895</td>\n </tr>\n <tr>\n <th>1</th>\n <td>安徽省</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷</td>\n <td>5.771441</td>\n <td>3.451583</td>\n <td>4.772094</td>\n <td>2.397895</td>\n </tr>\n <tr>\n <th>2</th>\n <td>安徽省</td>\n <td>凝气式</td>\n <td>超超临界</td>\n <td>水冷</td>\n <td>6.908755</td>\n <td>3.451583</td>\n <td>4.772094</td>\n <td>2.397895</td>\n </tr>\n <tr>\n <th>3</th>\n <td>安徽省</td>\n <td>凝气式</td>\n <td>超超临界</td>\n <td>水冷</td>\n <td>6.908755</td>\n <td>3.451583</td>\n <td>4.772094</td>\n <td>2.397895</td>\n </tr>\n <tr>\n <th>4</th>\n <td>安徽省</td>\n <td>抽凝式</td>\n <td>高压</td>\n <td>水冷</td>\n <td>3.713572</td>\n <td>3.451583</td>\n <td>4.772094</td>\n <td>2.397895</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>3219</th>\n <td>重庆市</td>\n <td>抽背式</td>\n <td>高压</td>\n <td>其他</td>\n <td>3.931826</td>\n <td>3.427489</td>\n <td>4.682353</td>\n <td>5.645447</td>\n </tr>\n <tr>\n <th>3220</th>\n <td>重庆市</td>\n <td>抽背式</td>\n <td>高压</td>\n <td>其他</td>\n <td>3.931826</td>\n <td>3.427489</td>\n <td>4.682353</td>\n <td>5.645447</td>\n </tr>\n <tr>\n <th>3221</th>\n <td>重庆市</td>\n <td>抽凝式</td>\n <td>高压</td>\n <td>水冷</td>\n <td>3.912023</td>\n <td>3.427489</td>\n <td>4.682353</td>\n <td>5.645447</td>\n </tr>\n <tr>\n <th>3222</th>\n <td>重庆市</td>\n <td>背压式</td>\n <td>高压</td>\n <td>其他</td>\n <td>3.433987</td>\n <td>3.428715</td>\n <td>4.682208</td>\n <td>5.690359</td>\n </tr>\n <tr>\n <th>3223</th>\n <td>重庆市</td>\n <td>抽凝式</td>\n <td>高压</td>\n <td>水冷</td>\n <td>4.836282</td>\n <td>3.428715</td>\n <td>4.682208</td>\n <td>5.690359</td>\n </tr>\n </tbody>\n</table>\n<p>3224 rows × 8 columns</p>\n</div>"
|
||
},
|
||
"execution_count": 22,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"new_test_data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n1 上海市 凝气式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n2 上海市 凝气式 亚临界 水冷 5.771441 4.808939 3.476886 1.098612 \n3 上海市 凝气式 超超临界 水冷 6.908755 4.807356 3.458373 1.609438 \n4 上海市 纯凝式 亚临界 水冷 5.860786 4.807839 3.478627 2.833213 \n... ... ... ... ... ... ... ... ... \n3219 重庆市 抽背式 高压 其他 3.931826 3.427489 4.682353 5.645447 \n3220 重庆市 抽背式 高压 其他 3.931826 3.427489 4.682353 5.645447 \n3221 重庆市 抽凝式 高压 水冷 3.912023 3.427489 4.682353 5.645447 \n3222 重庆市 背压式 高压 其他 3.433987 3.428715 4.682208 5.690359 \n3223 重庆市 抽凝式 高压 水冷 4.836282 3.428715 4.682208 5.690359 \n\n power_co2_factor heat_co2_factor \n0 0.574332 0.072680 \n1 0.582164 0.072391 \n2 0.569281 0.071041 \n3 0.506250 0.070460 \n4 0.565226 0.073717 \n... ... ... \n3219 NaN NaN \n3220 NaN NaN \n3221 NaN NaN \n3222 NaN NaN \n3223 NaN NaN \n\n[3748 rows x 10 columns]",
|
||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>所处地区</th>\n <th>机组类型</th>\n <th>参数分类</th>\n <th>冷凝器型式</th>\n <th>铭牌容量 (MW)</th>\n <th>longitude</th>\n <th>latitude</th>\n <th>altitude</th>\n <th>power_co2_factor</th>\n <th>heat_co2_factor</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>上海市</td>\n <td>供热式</td>\n <td>亚临界</td>\n <td>水冷</td>\n <td>5.707110</td>\n <td>4.807875</td>\n <td>3.467769</td>\n <td>1.386294</td>\n <td>0.574332</td>\n <td>0.072680</td>\n </tr>\n <tr>\n <th>1</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷</td>\n <td>5.707110</td>\n <td>4.807875</td>\n <td>3.467769</td>\n <td>1.386294</td>\n <td>0.582164</td>\n <td>0.072391</td>\n </tr>\n <tr>\n <th>2</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>亚临界</td>\n <td>水冷</td>\n <td>5.771441</td>\n <td>4.808939</td>\n <td>3.476886</td>\n <td>1.098612</td>\n <td>0.569281</td>\n <td>0.071041</td>\n </tr>\n <tr>\n <th>3</th>\n <td>上海市</td>\n <td>凝气式</td>\n <td>超超临界</td>\n <td>水冷</td>\n <td>6.908755</td>\n <td>4.807356</td>\n <td>3.458373</td>\n <td>1.609438</td>\n <td>0.506250</td>\n <td>0.070460</td>\n </tr>\n <tr>\n <th>4</th>\n <td>上海市</td>\n <td>纯凝式</td>\n <td>亚临界</td>\n <td>水冷</td>\n <td>5.860786</td>\n <td>4.807839</td>\n <td>3.478627</td>\n <td>2.833213</td>\n <td>0.565226</td>\n <td>0.073717</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>3219</th>\n <td>重庆市</td>\n <td>抽背式</td>\n <td>高压</td>\n <td>其他</td>\n <td>3.931826</td>\n <td>3.427489</td>\n <td>4.682353</td>\n <td>5.645447</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>3220</th>\n <td>重庆市</td>\n <td>抽背式</td>\n <td>高压</td>\n <td>其他</td>\n <td>3.931826</td>\n <td>3.427489</td>\n <td>4.682353</td>\n <td>5.645447</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>3221</th>\n <td>重庆市</td>\n <td>抽凝式</td>\n <td>高压</td>\n <td>水冷</td>\n <td>3.912023</td>\n <td>3.427489</td>\n <td>4.682353</td>\n <td>5.645447</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>3222</th>\n <td>重庆市</td>\n <td>背压式</td>\n <td>高压</td>\n <td>其他</td>\n <td>3.433987</td>\n <td>3.428715</td>\n <td>4.682208</td>\n <td>5.690359</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n <tr>\n <th>3223</th>\n <td>重庆市</td>\n <td>抽凝式</td>\n <td>高压</td>\n <td>水冷</td>\n <td>4.836282</td>\n <td>3.428715</td>\n <td>4.682208</td>\n <td>5.690359</td>\n <td>NaN</td>\n <td>NaN</td>\n </tr>\n </tbody>\n</table>\n<p>3748 rows × 10 columns</p>\n</div>"
|
||
},
|
||
"execution_count": 23,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"merge_data = pd.concat([data, new_test_data], axis=0)\n",
|
||
"merge_data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": " 铭牌容量 (MW) longitude latitude altitude power_co2_factor \\\n0 5.707110 4.807875 3.467769 1.386294 0.574332 \n1 5.707110 4.807875 3.467769 1.386294 0.582164 \n2 5.771441 4.808939 3.476886 1.098612 0.569281 \n3 6.908755 4.807356 3.458373 1.609438 0.506250 \n4 5.860786 4.807839 3.478627 2.833213 0.565226 \n... ... ... ... ... ... \n3219 3.931826 3.427489 4.682353 5.645447 NaN \n3220 3.931826 3.427489 4.682353 5.645447 NaN \n3221 3.912023 3.427489 4.682353 5.645447 NaN \n3222 3.433987 3.428715 4.682208 5.690359 NaN \n3223 4.836282 3.428715 4.682208 5.690359 NaN \n\n heat_co2_factor 所处地区_上海市 所处地区_云南省 所处地区_内蒙古 所处地区_内蒙古自治区 ... \\\n0 0.072680 1 0 0 0 ... \n1 0.072391 1 0 0 0 ... \n2 0.071041 1 0 0 0 ... \n3 0.070460 1 0 0 0 ... \n4 0.073717 1 0 0 0 ... \n... ... ... ... ... ... ... \n3219 NaN 0 0 0 0 ... \n3220 NaN 0 0 0 0 ... \n3221 NaN 0 0 0 0 ... \n3222 NaN 0 0 0 0 ... \n3223 NaN 0 0 0 0 ... \n\n 机组类型_背压式 参数分类_中压 参数分类_亚临界 参数分类_超临界 参数分类_超超临界 参数分类_超高压 参数分类_高压 \\\n0 0 0 1 0 0 0 0 \n1 0 0 1 0 0 0 0 \n2 0 0 1 0 0 0 0 \n3 0 0 0 0 1 0 0 \n4 0 0 1 0 0 0 0 \n... ... ... ... ... ... ... ... \n3219 0 0 0 0 0 0 1 \n3220 0 0 0 0 0 0 1 \n3221 0 0 0 0 0 0 1 \n3222 1 0 0 0 0 0 1 \n3223 0 0 0 0 0 0 1 \n\n 冷凝器型式_其他 冷凝器型式_水冷 冷凝器型式_空冷 \n0 0 1 0 \n1 0 1 0 \n2 0 1 0 \n3 0 1 0 \n4 0 1 0 \n... ... ... ... \n3219 1 0 0 \n3220 1 0 0 \n3221 0 1 0 \n3222 1 0 0 \n3223 0 1 0 \n\n[3748 rows x 63 columns]",
|
||
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>铭牌容量 (MW)</th>\n <th>longitude</th>\n <th>latitude</th>\n <th>altitude</th>\n <th>power_co2_factor</th>\n <th>heat_co2_factor</th>\n <th>所处地区_上海市</th>\n <th>所处地区_云南省</th>\n <th>所处地区_内蒙古</th>\n <th>所处地区_内蒙古自治区</th>\n <th>...</th>\n <th>机组类型_背压式</th>\n <th>参数分类_中压</th>\n <th>参数分类_亚临界</th>\n <th>参数分类_超临界</th>\n <th>参数分类_超超临界</th>\n <th>参数分类_超高压</th>\n <th>参数分类_高压</th>\n <th>冷凝器型式_其他</th>\n <th>冷凝器型式_水冷</th>\n <th>冷凝器型式_空冷</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>5.707110</td>\n <td>4.807875</td>\n <td>3.467769</td>\n <td>1.386294</td>\n <td>0.574332</td>\n <td>0.072680</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>1</th>\n <td>5.707110</td>\n <td>4.807875</td>\n <td>3.467769</td>\n <td>1.386294</td>\n <td>0.582164</td>\n <td>0.072391</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>2</th>\n <td>5.771441</td>\n <td>4.808939</td>\n <td>3.476886</td>\n <td>1.098612</td>\n <td>0.569281</td>\n <td>0.071041</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3</th>\n <td>6.908755</td>\n <td>4.807356</td>\n <td>3.458373</td>\n <td>1.609438</td>\n <td>0.506250</td>\n <td>0.070460</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>4</th>\n <td>5.860786</td>\n <td>4.807839</td>\n <td>3.478627</td>\n <td>2.833213</td>\n <td>0.565226</td>\n <td>0.073717</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>...</th>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n <td>...</td>\n </tr>\n <tr>\n <th>3219</th>\n <td>3.931826</td>\n <td>3.427489</td>\n <td>4.682353</td>\n <td>5.645447</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3220</th>\n <td>3.931826</td>\n <td>3.427489</td>\n <td>4.682353</td>\n <td>5.645447</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3221</th>\n <td>3.912023</td>\n <td>3.427489</td>\n <td>4.682353</td>\n <td>5.645447</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3222</th>\n <td>3.433987</td>\n <td>3.428715</td>\n <td>4.682208</td>\n <td>5.690359</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>1</td>\n <td>0</td>\n <td>0</td>\n </tr>\n <tr>\n <th>3223</th>\n <td>4.836282</td>\n <td>3.428715</td>\n <td>4.682208</td>\n <td>5.690359</td>\n <td>NaN</td>\n <td>NaN</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>...</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n <td>1</td>\n <td>0</td>\n </tr>\n </tbody>\n</table>\n<p>3748 rows × 63 columns</p>\n</div>"
|
||
},
|
||
"execution_count": 24,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"use_data = pd.get_dummies(merge_data, columns=object_cols)\n",
|
||
"use_data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"use_data.to_csv('./去煤种化后的训练数据.csv', encoding='utf-8-sig', index=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"train_set = use_data[~use_data.power_co2_factor.isna()].copy()\n",
|
||
"test_set = use_data[use_data.power_co2_factor.isna()].copy()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"feature_cols = [x for x in train_set.columns if 'factor' not in x]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"train_data = train_set.copy()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.model_selection import train_test_split"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 56,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"train, valid = train_test_split(train_data.dropna(), test_size=0.1, shuffle=True, random_state=666)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 57,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"dtest = xgb.DMatrix(test_set[feature_cols])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 77,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"params_xgb = {'objective': 'reg:squarederror',\n",
|
||
" 'booster': 'gbtree',\n",
|
||
" 'eta': 0.01,\n",
|
||
" 'max_depth': 30,\n",
|
||
" 'subsample': 0.8,\n",
|
||
" 'colsample_bytree': 0.95,\n",
|
||
" 'min_child_weight': 60,\n",
|
||
" 'seed': 42}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 78,
|
||
"outputs": [],
|
||
"source": [
|
||
"from sklearn.model_selection import KFold"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 80,
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"MSE: 6.9E-04, RMSE: 0.0262, MAE: 0.018, MAPE: 3.81 %, R_2: 0.8015\n",
|
||
"MSE: 4.6E-04, RMSE: 0.0215, MAE: 0.0155, MAPE: 3.24 %, R_2: 0.8596\n",
|
||
"MSE: 1.1E-03, RMSE: 0.0337, MAE: 0.0214, MAPE: 4.6 %, R_2: 0.6518\n",
|
||
"MSE: 8.7E-04, RMSE: 0.0295, MAE: 0.019, MAPE: 4.14 %, R_2: 0.7524\n",
|
||
"MSE: 1.1E-03, RMSE: 0.0326, MAE: 0.0219, MAPE: 4.62 %, R_2: 0.695\n",
|
||
"MSE: 1.1E-03, RMSE: 0.0336, MAE: 0.0237, MAPE: 5.23 %, R_2: 0.6424\n",
|
||
"MSE: 6.0E-04, RMSE: 0.0245, MAE: 0.0164, MAPE: 3.46 %, R_2: 0.8288\n",
|
||
"MSE: 9.4E-04, RMSE: 0.0307, MAE: 0.0224, MAPE: 4.96 %, R_2: 0.7396\n",
|
||
"MSE: 6.6E-04, RMSE: 0.0256, MAE: 0.0174, MAPE: 3.73 %, R_2: 0.8133\n",
|
||
"MSE: 7.0E-04, RMSE: 0.0264, MAE: 0.017, MAPE: 3.59 %, R_2: 0.8201\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"kf = KFold(n_splits=10, shuffle=True, random_state=666)\n",
|
||
"eva_list = list()\n",
|
||
"for (train_index, test_index) in kf.split(train_data):\n",
|
||
" train = train_data.loc[train_index]\n",
|
||
" test = train_data.loc[test_index]\n",
|
||
" train, valid = train_test_split(train, test_size=0.1, random_state=666)\n",
|
||
" X_train, Y_train = train[feature_cols], train['power_co2_factor']\n",
|
||
" X_valid, Y_valid = valid[feature_cols], valid['power_co2_factor']\n",
|
||
" X_test, Y_test = valid[feature_cols], valid['power_co2_factor']\n",
|
||
" dtrain = xgb.DMatrix(X_train, Y_train)\n",
|
||
" dvalid = xgb.DMatrix(X_valid, Y_valid)\n",
|
||
" watchlist = [(dvalid, 'eval')]\n",
|
||
" gb_model = xgb.train(params_xgb, dtrain, 2000, evals=watchlist,\n",
|
||
" early_stopping_rounds=100, verbose_eval=False)\n",
|
||
" y_pred = gb_model.predict(xgb.DMatrix(X_test))\n",
|
||
" y_true = Y_test.values\n",
|
||
" MSE = mean_squared_error(y_true, y_pred)\n",
|
||
" RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n",
|
||
" MAE = mean_absolute_error(y_true, y_pred)\n",
|
||
" MAPE = mean_absolute_percentage_error(y_true, y_pred)\n",
|
||
" R_2 = r2_score(y_true, y_pred)\n",
|
||
" print('MSE:', format(MSE, '.1E'), end=', ')\n",
|
||
" print('RMSE:', round(RMSE, 4), end=', ')\n",
|
||
" print('MAE:', round(MAE, 4), end=', ')\n",
|
||
" print('MAPE:', round(MAPE*100, 2), '%', end=', ')\n",
|
||
" print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差\n",
|
||
" eva_list.append([MSE, RMSE, MAE, MAPE, R_2])\n"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 83,
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": "MSE 0.000747\nRMSE 0.027126\nMAE 0.018437\nMAPE 0.039442\nR_2 0.788768\ndtype: float64"
|
||
},
|
||
"execution_count": 83,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pd.DataFrame.from_records(eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R_2']).drop(index=[2, 5]).mean()"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"outputs": [],
|
||
"source": [
|
||
"\n",
|
||
"num_boost_round = 2000\n",
|
||
"\n",
|
||
"dtrain = xgb.DMatrix(train[feature_cols], train['power_co2_factor'].values)\n",
|
||
"dvalid = xgb.DMatrix(valid[feature_cols], valid['power_co2_factor'].values)\n",
|
||
"watchlist = [(dtrain, 'train'), (dvalid, 'eval')]\n",
|
||
"\n",
|
||
"gb_model_power = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n",
|
||
" early_stopping_rounds=200, verbose_eval=False)"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 59,
|
||
"outputs": [],
|
||
"source": [
|
||
"power_pred, power_real = gb_model_power.predict(dvalid), valid['power_co2_factor'].values"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 60,
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"MSE: 5.2E-04\n",
|
||
"RMSE: 0.023\n",
|
||
"MAE: 0.016\n",
|
||
"MAPE: 3.46 %\n",
|
||
"R_2: 0.819\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"MSE = mean_squared_error(power_real, power_pred)\n",
|
||
"RMSE = np.sqrt(mean_squared_error(power_real, power_pred))\n",
|
||
"MAE = mean_absolute_error(power_real, power_pred)\n",
|
||
"MAPE = mean_absolute_percentage_error(power_real, power_pred)\n",
|
||
"R_2 = r2_score(power_real, power_pred)\n",
|
||
"print('MSE:', format(MSE, '.1E'))\n",
|
||
"print('RMSE:', round(RMSE, 3))\n",
|
||
"print('MAE:', round(MAE, 3))\n",
|
||
"print('MAPE:', round(MAPE*100, 2), '%')\n",
|
||
"print('R_2:', round(R_2, 3)) #R方为负就说明拟合效果比平均值差a"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"new_test_data['power_co2_factor'] = gb_model_power.predict(dtest)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 34,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>所处地区</th>\n",
|
||
" <th>机组类型</th>\n",
|
||
" <th>参数分类</th>\n",
|
||
" <th>冷凝器型式</th>\n",
|
||
" <th>铭牌容量 (MW)</th>\n",
|
||
" <th>longitude</th>\n",
|
||
" <th>latitude</th>\n",
|
||
" <th>altitude</th>\n",
|
||
" <th>power_co2_factor</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>安徽省</td>\n",
|
||
" <td>凝气式</td>\n",
|
||
" <td>亚临界</td>\n",
|
||
" <td>水冷</td>\n",
|
||
" <td>5.771441</td>\n",
|
||
" <td>3.451583</td>\n",
|
||
" <td>4.772094</td>\n",
|
||
" <td>2.397895</td>\n",
|
||
" <td>0.513529</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>安徽省</td>\n",
|
||
" <td>凝气式</td>\n",
|
||
" <td>亚临界</td>\n",
|
||
" <td>水冷</td>\n",
|
||
" <td>5.771441</td>\n",
|
||
" <td>3.451583</td>\n",
|
||
" <td>4.772094</td>\n",
|
||
" <td>2.397895</td>\n",
|
||
" <td>0.513529</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>安徽省</td>\n",
|
||
" <td>凝气式</td>\n",
|
||
" <td>超超临界</td>\n",
|
||
" <td>水冷</td>\n",
|
||
" <td>6.908755</td>\n",
|
||
" <td>3.451583</td>\n",
|
||
" <td>4.772094</td>\n",
|
||
" <td>2.397895</td>\n",
|
||
" <td>0.478943</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>安徽省</td>\n",
|
||
" <td>凝气式</td>\n",
|
||
" <td>超超临界</td>\n",
|
||
" <td>水冷</td>\n",
|
||
" <td>6.908755</td>\n",
|
||
" <td>3.451583</td>\n",
|
||
" <td>4.772094</td>\n",
|
||
" <td>2.397895</td>\n",
|
||
" <td>0.478943</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>安徽省</td>\n",
|
||
" <td>抽凝式</td>\n",
|
||
" <td>高压</td>\n",
|
||
" <td>水冷</td>\n",
|
||
" <td>3.713572</td>\n",
|
||
" <td>3.451583</td>\n",
|
||
" <td>4.772094</td>\n",
|
||
" <td>2.397895</td>\n",
|
||
" <td>0.510681</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3219</th>\n",
|
||
" <td>重庆市</td>\n",
|
||
" <td>抽背式</td>\n",
|
||
" <td>高压</td>\n",
|
||
" <td>其他</td>\n",
|
||
" <td>3.931826</td>\n",
|
||
" <td>3.427489</td>\n",
|
||
" <td>4.682353</td>\n",
|
||
" <td>5.645447</td>\n",
|
||
" <td>0.510508</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3220</th>\n",
|
||
" <td>重庆市</td>\n",
|
||
" <td>抽背式</td>\n",
|
||
" <td>高压</td>\n",
|
||
" <td>其他</td>\n",
|
||
" <td>3.931826</td>\n",
|
||
" <td>3.427489</td>\n",
|
||
" <td>4.682353</td>\n",
|
||
" <td>5.645447</td>\n",
|
||
" <td>0.510508</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3221</th>\n",
|
||
" <td>重庆市</td>\n",
|
||
" <td>抽凝式</td>\n",
|
||
" <td>高压</td>\n",
|
||
" <td>水冷</td>\n",
|
||
" <td>3.912023</td>\n",
|
||
" <td>3.427489</td>\n",
|
||
" <td>4.682353</td>\n",
|
||
" <td>5.645447</td>\n",
|
||
" <td>0.512501</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3222</th>\n",
|
||
" <td>重庆市</td>\n",
|
||
" <td>背压式</td>\n",
|
||
" <td>高压</td>\n",
|
||
" <td>其他</td>\n",
|
||
" <td>3.433987</td>\n",
|
||
" <td>3.428715</td>\n",
|
||
" <td>4.682208</td>\n",
|
||
" <td>5.690359</td>\n",
|
||
" <td>0.509951</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3223</th>\n",
|
||
" <td>重庆市</td>\n",
|
||
" <td>抽凝式</td>\n",
|
||
" <td>高压</td>\n",
|
||
" <td>水冷</td>\n",
|
||
" <td>4.836282</td>\n",
|
||
" <td>3.428715</td>\n",
|
||
" <td>4.682208</td>\n",
|
||
" <td>5.690359</td>\n",
|
||
" <td>0.511886</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>3224 rows × 9 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n",
|
||
"0 安徽省 凝气式 亚临界 水冷 5.771441 3.451583 4.772094 2.397895 \n",
|
||
"1 安徽省 凝气式 亚临界 水冷 5.771441 3.451583 4.772094 2.397895 \n",
|
||
"2 安徽省 凝气式 超超临界 水冷 6.908755 3.451583 4.772094 2.397895 \n",
|
||
"3 安徽省 凝气式 超超临界 水冷 6.908755 3.451583 4.772094 2.397895 \n",
|
||
"4 安徽省 抽凝式 高压 水冷 3.713572 3.451583 4.772094 2.397895 \n",
|
||
"... ... ... ... ... ... ... ... ... \n",
|
||
"3219 重庆市 抽背式 高压 其他 3.931826 3.427489 4.682353 5.645447 \n",
|
||
"3220 重庆市 抽背式 高压 其他 3.931826 3.427489 4.682353 5.645447 \n",
|
||
"3221 重庆市 抽凝式 高压 水冷 3.912023 3.427489 4.682353 5.645447 \n",
|
||
"3222 重庆市 背压式 高压 其他 3.433987 3.428715 4.682208 5.690359 \n",
|
||
"3223 重庆市 抽凝式 高压 水冷 4.836282 3.428715 4.682208 5.690359 \n",
|
||
"\n",
|
||
" power_co2_factor \n",
|
||
"0 0.513529 \n",
|
||
"1 0.513529 \n",
|
||
"2 0.478943 \n",
|
||
"3 0.478943 \n",
|
||
"4 0.510681 \n",
|
||
"... ... \n",
|
||
"3219 0.510508 \n",
|
||
"3220 0.510508 \n",
|
||
"3221 0.512501 \n",
|
||
"3222 0.509951 \n",
|
||
"3223 0.511886 \n",
|
||
"\n",
|
||
"[3224 rows x 9 columns]"
|
||
]
|
||
},
|
||
"execution_count": 34,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"new_test_data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 84,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"params_xgb = {'objective': 'reg:squarederror',\n",
|
||
" 'booster': 'gbtree',\n",
|
||
" 'eta': 0.01,\n",
|
||
" 'max_depth': 15,\n",
|
||
" 'subsample': 0.7,\n",
|
||
" 'colsample_bytree': 0.9,\n",
|
||
" 'min_child_weight': 10,\n",
|
||
" 'seed': 666}\n",
|
||
"\n",
|
||
"num_boost_round = 1200"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 85,
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"MSE: 1.2E-05, RMSE: 0.0034, MAE: 0.002, MAPE: 2.93 %, R_2: 0.7571\n",
|
||
"MSE: 3.9E-06, RMSE: 0.002, MAE: 0.0014, MAPE: 2.01 %, R_2: 0.9072\n",
|
||
"MSE: 2.1E-05, RMSE: 0.0045, MAE: 0.0024, MAPE: 3.67 %, R_2: 0.4898\n",
|
||
"MSE: 1.3E-05, RMSE: 0.0036, MAE: 0.002, MAPE: 3.01 %, R_2: 0.6941\n",
|
||
"MSE: 1.2E-05, RMSE: 0.0034, MAE: 0.002, MAPE: 2.92 %, R_2: 0.7163\n",
|
||
"MSE: 1.5E-05, RMSE: 0.0039, MAE: 0.0022, MAPE: 3.29 %, R_2: 0.6265\n",
|
||
"MSE: 5.8E-06, RMSE: 0.0024, MAE: 0.0014, MAPE: 2.06 %, R_2: 0.8744\n",
|
||
"MSE: 1.7E-05, RMSE: 0.0041, MAE: 0.0024, MAPE: 3.64 %, R_2: 0.6661\n",
|
||
"MSE: 8.4E-06, RMSE: 0.0029, MAE: 0.0018, MAPE: 2.61 %, R_2: 0.8057\n",
|
||
"MSE: 7.0E-06, RMSE: 0.0026, MAE: 0.0016, MAPE: 2.29 %, R_2: 0.8514\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"kf = KFold(n_splits=10, shuffle=True, random_state=666)\n",
|
||
"eva_list = list()\n",
|
||
"for (train_index, test_index) in kf.split(train_data):\n",
|
||
" train = train_data.loc[train_index]\n",
|
||
" test = train_data.loc[test_index]\n",
|
||
" train, valid = train_test_split(train, test_size=0.1, random_state=666)\n",
|
||
" X_train, Y_train = train[feature_cols], train['heat_co2_factor']\n",
|
||
" X_valid, Y_valid = valid[feature_cols], valid['heat_co2_factor']\n",
|
||
" X_test, Y_test = valid[feature_cols], valid['heat_co2_factor']\n",
|
||
" dtrain = xgb.DMatrix(X_train, Y_train)\n",
|
||
" dvalid = xgb.DMatrix(X_valid, Y_valid)\n",
|
||
" watchlist = [(dvalid, 'eval')]\n",
|
||
" gb_model = xgb.train(params_xgb, dtrain, 2000, evals=watchlist,\n",
|
||
" early_stopping_rounds=100, verbose_eval=False)\n",
|
||
" y_pred = gb_model.predict(xgb.DMatrix(X_test))\n",
|
||
" y_true = Y_test.values\n",
|
||
" MSE = mean_squared_error(y_true, y_pred)\n",
|
||
" RMSE = np.sqrt(mean_squared_error(y_true, y_pred))\n",
|
||
" MAE = mean_absolute_error(y_true, y_pred)\n",
|
||
" MAPE = mean_absolute_percentage_error(y_true, y_pred)\n",
|
||
" R_2 = r2_score(y_true, y_pred)\n",
|
||
" print('MSE:', format(MSE, '.1E'), end=', ')\n",
|
||
" print('RMSE:', round(RMSE, 4), end=', ')\n",
|
||
" print('MAE:', round(MAE, 4), end=', ')\n",
|
||
" print('MAPE:', round(MAPE*100, 2), '%', end=', ')\n",
|
||
" print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差\n",
|
||
" eva_list.append([MSE, RMSE, MAE, MAPE, R_2])\n"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 86,
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": "MSE 0.000010\nRMSE 0.003161\nMAE 0.001866\nMAPE 0.027510\nR_2 0.766523\ndtype: float64"
|
||
},
|
||
"execution_count": 86,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pd.DataFrame.from_records(eva_list, columns=['MSE', 'RMSE', 'MAE', 'MAPE', 'R_2']).drop(index=[2]).mean()"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"outputs": [],
|
||
"source": [
|
||
"\n",
|
||
"dtrain = xgb.DMatrix(train[feature_cols], train['heat_co2_factor'].values)\n",
|
||
"dvalid = xgb.DMatrix(valid[feature_cols], valid['heat_co2_factor'].values)\n",
|
||
"watchlist = [(dtrain, 'train'), (dvalid, 'eval')]\n",
|
||
"\n",
|
||
"gb_model_heat = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n",
|
||
" early_stopping_rounds=100, verbose_eval=False)"
|
||
],
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
}
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 36,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"new_test_data['heat_co2_factor'] = gb_model_heat.predict(dtest)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"for col in num_cols:\n",
|
||
" new_test_data[col] = np.expm1(new_test_data[col])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>所处地区</th>\n",
|
||
" <th>机组类型</th>\n",
|
||
" <th>参数分类</th>\n",
|
||
" <th>冷凝器型式</th>\n",
|
||
" <th>铭牌容量 (MW)</th>\n",
|
||
" <th>longitude</th>\n",
|
||
" <th>latitude</th>\n",
|
||
" <th>altitude</th>\n",
|
||
" <th>power_co2_factor</th>\n",
|
||
" <th>heat_co2_factor</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>安徽省</td>\n",
|
||
" <td>凝气式</td>\n",
|
||
" <td>亚临界</td>\n",
|
||
" <td>水冷</td>\n",
|
||
" <td>320.0</td>\n",
|
||
" <td>30.550295</td>\n",
|
||
" <td>117.166391</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>0.513529</td>\n",
|
||
" <td>0.073187</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>安徽省</td>\n",
|
||
" <td>凝气式</td>\n",
|
||
" <td>亚临界</td>\n",
|
||
" <td>水冷</td>\n",
|
||
" <td>320.0</td>\n",
|
||
" <td>30.550295</td>\n",
|
||
" <td>117.166391</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>0.513529</td>\n",
|
||
" <td>0.073187</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>安徽省</td>\n",
|
||
" <td>凝气式</td>\n",
|
||
" <td>超超临界</td>\n",
|
||
" <td>水冷</td>\n",
|
||
" <td>1000.0</td>\n",
|
||
" <td>30.550295</td>\n",
|
||
" <td>117.166391</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>0.478943</td>\n",
|
||
" <td>0.071981</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>安徽省</td>\n",
|
||
" <td>凝气式</td>\n",
|
||
" <td>超超临界</td>\n",
|
||
" <td>水冷</td>\n",
|
||
" <td>1000.0</td>\n",
|
||
" <td>30.550295</td>\n",
|
||
" <td>117.166391</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>0.478943</td>\n",
|
||
" <td>0.071981</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>安徽省</td>\n",
|
||
" <td>抽凝式</td>\n",
|
||
" <td>高压</td>\n",
|
||
" <td>水冷</td>\n",
|
||
" <td>40.0</td>\n",
|
||
" <td>30.550295</td>\n",
|
||
" <td>117.166391</td>\n",
|
||
" <td>10.0</td>\n",
|
||
" <td>0.510681</td>\n",
|
||
" <td>0.072166</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3219</th>\n",
|
||
" <td>重庆市</td>\n",
|
||
" <td>抽背式</td>\n",
|
||
" <td>高压</td>\n",
|
||
" <td>其他</td>\n",
|
||
" <td>50.0</td>\n",
|
||
" <td>29.799200</td>\n",
|
||
" <td>107.023948</td>\n",
|
||
" <td>282.0</td>\n",
|
||
" <td>0.510508</td>\n",
|
||
" <td>0.071945</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3220</th>\n",
|
||
" <td>重庆市</td>\n",
|
||
" <td>抽背式</td>\n",
|
||
" <td>高压</td>\n",
|
||
" <td>其他</td>\n",
|
||
" <td>50.0</td>\n",
|
||
" <td>29.799200</td>\n",
|
||
" <td>107.023948</td>\n",
|
||
" <td>282.0</td>\n",
|
||
" <td>0.510508</td>\n",
|
||
" <td>0.071945</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3221</th>\n",
|
||
" <td>重庆市</td>\n",
|
||
" <td>抽凝式</td>\n",
|
||
" <td>高压</td>\n",
|
||
" <td>水冷</td>\n",
|
||
" <td>49.0</td>\n",
|
||
" <td>29.799200</td>\n",
|
||
" <td>107.023948</td>\n",
|
||
" <td>282.0</td>\n",
|
||
" <td>0.512501</td>\n",
|
||
" <td>0.072097</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3222</th>\n",
|
||
" <td>重庆市</td>\n",
|
||
" <td>背压式</td>\n",
|
||
" <td>高压</td>\n",
|
||
" <td>其他</td>\n",
|
||
" <td>30.0</td>\n",
|
||
" <td>29.836998</td>\n",
|
||
" <td>107.008326</td>\n",
|
||
" <td>295.0</td>\n",
|
||
" <td>0.509951</td>\n",
|
||
" <td>0.071945</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3223</th>\n",
|
||
" <td>重庆市</td>\n",
|
||
" <td>抽凝式</td>\n",
|
||
" <td>高压</td>\n",
|
||
" <td>水冷</td>\n",
|
||
" <td>125.0</td>\n",
|
||
" <td>29.836998</td>\n",
|
||
" <td>107.008326</td>\n",
|
||
" <td>295.0</td>\n",
|
||
" <td>0.511886</td>\n",
|
||
" <td>0.072097</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>3224 rows × 10 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n",
|
||
"0 安徽省 凝气式 亚临界 水冷 320.0 30.550295 117.166391 10.0 \n",
|
||
"1 安徽省 凝气式 亚临界 水冷 320.0 30.550295 117.166391 10.0 \n",
|
||
"2 安徽省 凝气式 超超临界 水冷 1000.0 30.550295 117.166391 10.0 \n",
|
||
"3 安徽省 凝气式 超超临界 水冷 1000.0 30.550295 117.166391 10.0 \n",
|
||
"4 安徽省 抽凝式 高压 水冷 40.0 30.550295 117.166391 10.0 \n",
|
||
"... ... ... ... ... ... ... ... ... \n",
|
||
"3219 重庆市 抽背式 高压 其他 50.0 29.799200 107.023948 282.0 \n",
|
||
"3220 重庆市 抽背式 高压 其他 50.0 29.799200 107.023948 282.0 \n",
|
||
"3221 重庆市 抽凝式 高压 水冷 49.0 29.799200 107.023948 282.0 \n",
|
||
"3222 重庆市 背压式 高压 其他 30.0 29.836998 107.008326 295.0 \n",
|
||
"3223 重庆市 抽凝式 高压 水冷 125.0 29.836998 107.008326 295.0 \n",
|
||
"\n",
|
||
" power_co2_factor heat_co2_factor \n",
|
||
"0 0.513529 0.073187 \n",
|
||
"1 0.513529 0.073187 \n",
|
||
"2 0.478943 0.071981 \n",
|
||
"3 0.478943 0.071981 \n",
|
||
"4 0.510681 0.072166 \n",
|
||
"... ... ... \n",
|
||
"3219 0.510508 0.071945 \n",
|
||
"3220 0.510508 0.071945 \n",
|
||
"3221 0.512501 0.072097 \n",
|
||
"3222 0.509951 0.071945 \n",
|
||
"3223 0.511886 0.072097 \n",
|
||
"\n",
|
||
"[3224 rows x 10 columns]"
|
||
]
|
||
},
|
||
"execution_count": 38,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"new_test_data"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"rst = new_test_data.copy()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {
|
||
"collapsed": false,
|
||
"jupyter": {
|
||
"outputs_hidden": false
|
||
},
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"import pandas as pd"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"rst = pd.read_excel('./results/全国机组预测数据.xlsx')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"rst.drop(columns=rst.columns[0], inplace=True)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"def change_cap(x):\n",
|
||
" if x <= 300:\n",
|
||
" return '300MW以下'\n",
|
||
" elif x<=600:\n",
|
||
" return '300-600MW'\n",
|
||
" elif x<=1000:\n",
|
||
" return '600-1000MW'\n",
|
||
" else:\n",
|
||
" return \"1000MW以上\""
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"rst['容量类型'] = rst['铭牌容量 (MW)'].apply(change_cap)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"rst.to_excel('./results/全国机组预测数据.xlsx', index=False)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {
|
||
"pycharm": {
|
||
"name": "#%%\n"
|
||
}
|
||
},
|
||
"outputs": [],
|
||
"source": []
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.7.13"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 4
|
||
} |