{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "pycharm": { "name": "#%%\n" }, "tags": [] }, "outputs": [], "source": [ "from sklearn.multioutput import MultiOutputRegressor\n", "import xgboost as xgb\n", "import pandas as pd\n", "import numpy as np\n", "from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
企业名称机组编号铭牌容量 (MW)机组类型参数分类冷凝器型式入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)煤种所处地区longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)
5740榆能榆神热电有限公司2350.0抽凝式超临界间接空冷25514.038.847.28烟煤陕西省109.82026538.30438311510.6617590.091483
\n", "
" ], "text/plain": [ " 企业名称 机组编号 铭牌容量 (MW) 机组类型 参数分类 冷凝器型式 入炉煤低位热值(kJ/kg) \\\n", "5740 榆能榆神热电有限公司 2 350.0 抽凝式 超临界 间接空冷 25514.0 \n", "\n", " 燃煤挥发份Var(%) 燃煤灰份Aar(%) 煤种 所处地区 longitude latitude altitude \\\n", "5740 38.84 7.28 烟煤 陕西省 109.820265 38.304383 1151 \n", "\n", " 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) \n", "5740 0.661759 0.091483 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_data = pd.read_excel('train_data.xlsx')\n", "total_data.tail(1)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "Index(['企业名称', '机组编号', '铭牌容量 (MW)', '机组类型', '参数分类', '冷凝器型式', '入炉煤低位热值(kJ/kg)',\n", " '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', '煤种', '所处地区', 'longitude', 'latitude',\n", " 'altitude', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'],\n", " dtype='object')" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_data.columns" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "num_cols = ['铭牌容量 (MW)', '入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', 'longitude', 'latitude', 'altitude', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)']\n", "# object_cols = ['所处地区', '类型', '机组参数', '冷却型式']\n", "object_cols = ['所处地区', '机组类型', '参数分类', '冷凝器型式']\n", "# object_cols = ['所处地区', '机组类型', '参数分类', '冷凝器型式', '煤种']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def change_str(x):\n", " if '空冷' in x:\n", " return '空冷'\n", " if '水冷' in x:\n", " return '水冷'" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "total_data = total_data[total_data['发电碳排放因子(kg/kWh)'] <= 0.9].copy()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "['所处地区',\n", " '机组类型',\n", " '参数分类',\n", " '冷凝器型式',\n", " '铭牌容量 (MW)',\n", " '入炉煤低位热值(kJ/kg)',\n", " '燃煤挥发份Var(%)',\n", " '燃煤灰份Aar(%)',\n", " 'longitude',\n", " 'latitude',\n", " 'altitude']" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "use_cols = object_cols + [x for x in num_cols if '因子' not in x]\n", "use_cols" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "total_data = total_data[~total_data['供热碳排放因子(kg/MJ)'].isna()].copy()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "(5732, 16)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_data.shape" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "(1092, 14)" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_data.groupby(['企业名称', '机组编号']).count().shape" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "total_data['入炉煤低位热值(kJ/kg)'] = total_data['入炉煤低位热值(kJ/kg)'].apply(lambda x: x * 1000 if x < 100 else x * 1)\n", "total_data['燃煤灰份Aar(%)'] = total_data['燃煤灰份Aar(%)'].apply(lambda x: x / 1000 if x > 10000 else x * 1)\n", "total_data['燃煤挥发份Var(%)'] = total_data['燃煤挥发份Var(%)'].apply(lambda x: x / 1000 if x > 10000 else x * 1)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "total_data.altitude = total_data.altitude.apply(lambda x: 0 if x < 0 else x)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "(5629, 16)" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "use_data = total_data[(total_data['供热碳排放因子(kg/MJ)'] > 0.01)&(total_data['供热碳排放因子(kg/MJ)'] < 0.1)].copy()\n", "use_data.shape" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/plain": [ "count 5629.000000\n", "mean 0.070915\n", "std 0.009967\n", "min 0.010464\n", "25% 0.065467\n", "50% 0.071533\n", "75% 0.077513\n", "max 0.099905\n", "Name: 供热碳排放因子(kg/MJ), dtype: float64" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "use_data['供热碳排放因子(kg/MJ)'].describe()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.\n", " \"\"\"Entry point for launching an IPython kernel.\n" ] } ], "source": [ "train_data = use_data.groupby(use_cols)['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'].mean().reset_index()" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)
0上海市供热式亚临界水冷300.018366.0026.0515.70121.47114031.06511330.5375740.070992
1上海市供热式亚临界水冷300.018426.0026.0515.70121.47114031.06511330.5455160.072476
2上海市供热式亚临界水冷300.019507.0026.4014.95121.47114031.06511330.5958490.064745
3上海市供热式亚临界水冷300.019599.0026.7811.58121.47114031.06511330.5844320.068390
4上海市供热式亚临界水冷300.020125.0024.9214.90121.47114031.06511330.6053690.066996
..........................................
3961黑龙江省纯凝式超高压水冷200.015941.2123.8314.73126.57564745.9185661180.5001720.064200
3962黑龙江省纯凝式超高压水冷210.015355.0042.0036.70131.69586446.580444910.5183010.063249
3963黑龙江省背压式超高压水冷-开式循环200.013396.0023.3915.66123.63914647.2106961510.2243120.053770
3964黑龙江省背压式超高压水冷-闭式循环215.015753.0036.2942.40129.60480344.6082022500.2908140.068027
3965黑龙江省背压式超高压水冷-闭式循环215.016471.1130.1038.67129.60480344.6082022500.3216350.067798
\n", "

3966 rows × 13 columns

\n", "
" ], "text/plain": [ " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) \\\n", "0 上海市 供热式 亚临界 水冷 300.0 18366.00 26.05 \n", "1 上海市 供热式 亚临界 水冷 300.0 18426.00 26.05 \n", "2 上海市 供热式 亚临界 水冷 300.0 19507.00 26.40 \n", "3 上海市 供热式 亚临界 水冷 300.0 19599.00 26.78 \n", "4 上海市 供热式 亚临界 水冷 300.0 20125.00 24.92 \n", "... ... ... ... ... ... ... ... \n", "3961 黑龙江省 纯凝式 超高压 水冷 200.0 15941.21 23.83 \n", "3962 黑龙江省 纯凝式 超高压 水冷 210.0 15355.00 42.00 \n", "3963 黑龙江省 背压式 超高压 水冷-开式循环 200.0 13396.00 23.39 \n", "3964 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 15753.00 36.29 \n", "3965 黑龙江省 背压式 超高压 水冷-闭式循环 215.0 16471.11 30.10 \n", "\n", " 燃煤灰份Aar(%) longitude latitude altitude 发电碳排放因子(kg/kWh) \\\n", "0 15.70 121.471140 31.065113 3 0.537574 \n", "1 15.70 121.471140 31.065113 3 0.545516 \n", "2 14.95 121.471140 31.065113 3 0.595849 \n", "3 11.58 121.471140 31.065113 3 0.584432 \n", "4 14.90 121.471140 31.065113 3 0.605369 \n", "... ... ... ... ... ... \n", "3961 14.73 126.575647 45.918566 118 0.500172 \n", "3962 36.70 131.695864 46.580444 91 0.518301 \n", "3963 15.66 123.639146 47.210696 151 0.224312 \n", "3964 42.40 129.604803 44.608202 250 0.290814 \n", "3965 38.67 129.604803 44.608202 250 0.321635 \n", "\n", " 供热碳排放因子(kg/MJ) \n", "0 0.070992 \n", "1 0.072476 \n", "2 0.064745 \n", "3 0.068390 \n", "4 0.066996 \n", "... ... \n", "3961 0.064200 \n", "3962 0.063249 \n", "3963 0.053770 \n", "3964 0.068027 \n", "3965 0.067798 \n", "\n", "[3966 rows x 13 columns]" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_data" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "for col in num_cols:\n", " if '因子' not in col:\n", " train_data[col] = np.log1p(train_data[col])" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "train_data = train_data[train_data['供热碳排放因子(kg/MJ)']<=0.1].copy()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "train_data = pd.get_dummies(train_data, columns=object_cols).dropna()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "for col in train_data.columns:\n", " train_data[col] = train_data[col].astype(float)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "feature_cols = [x for x in train_data.columns if '因子' not in x]\n", "target_cols = [x for x in train_data.columns if '因子' in x]" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "train_data.to_csv('./train_data_processed.csv', encoding='utf-8-sig', index=False)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "train, test = train_test_split(train_data.dropna(), test_size=0.1, shuffle=True, random_state=666)\n", "train, valid = train_test_split(train, test_size=0.2, shuffle=True, random_state=666)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "train_X, train_y = train[feature_cols], train[target_cols]\n", "valid_X, valid_y = valid[feature_cols], valid[target_cols]\n", "test_X, test_y = test[feature_cols], test[target_cols]" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "from sklearn.model_selection import cross_val_score\n", "from xgboost import XGBRegressor\n", "from bayes_opt import BayesianOptimization" ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "### 供电建模" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "params_xgb = {'objective': 'reg:squarederror',\n", " 'booster': 'gbtree',\n", " 'eta': 0.01,\n", " 'max_depth': 60,\n", " 'subsample': 0.85,\n", " 'colsample_bytree': 0.85,\n", " 'min_child_weight': 10,\n", " 'seed': 10}\n", "\n", "num_boost_round = 2000\n", "\n", "dtrain = xgb.DMatrix(train_X, train_y.values[:, 0])\n", "dvalid = xgb.DMatrix(valid_X, valid_y.values[:, 0])\n", "watchlist = [(dtrain, 'train'), (dvalid, 'eval')]\n", "\n", "gb_model = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", " early_stopping_rounds=200, verbose_eval=False)\n" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "y_pred_xgb = gb_model.predict(xgb.DMatrix(test_X))\n", "y_true_xgb = test_y.values[:, 0]" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MSE: 5.5E-04\n", "RMSE: 0.0235\n", "MAE: 0.0145\n", "MAPE: 2.99 %\n", "R_2: 0.9011\n" ] } ], "source": [ "MSE = mean_squared_error(y_true_xgb, y_pred_xgb)\n", "RMSE = np.sqrt(mean_squared_error(y_true_xgb, y_pred_xgb))\n", "MAE = mean_absolute_error(y_true_xgb, y_pred_xgb)\n", "MAPE = mean_absolute_percentage_error(y_true_xgb, y_pred_xgb)\n", "R_2 = r2_score(y_true_xgb, y_pred_xgb)\n", "print('MSE:', format(MSE, '.1E'))\n", "print('RMSE:', round(RMSE, 4))\n", "print('MAE:', round(MAE, 4))\n", "print('MAPE:', round(MAPE*100, 2), '%')\n", "print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差a" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "power_eva_df = pd.DataFrame.from_records([y_true_xgb, y_pred_xgb]).T\n", "power_eva_df.to_csv('./发电测试结果.csv', index=False, encoding='utf-8-sig')" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "gb_model.save_model('./models/power_model.txt')" ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "### 发热建模" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "def xgb_cv(max_depth, learning_rate, min_child_weight, subsample, colsample_bytree, reg_alpha, gamma):\n", " val = cross_val_score(estimator=XGBRegressor(max_depth=int(max_depth),\n", " learning_rate=learning_rate,\n", " n_estimators=2000,\n", " min_child_weight=min_child_weight,\n", " subsample=max(min(subsample, 1), 0),\n", " colsample_bytree=max(min(colsample_bytree, 1), 0),\n", " reg_alpha=max(reg_alpha, 0), gamma=gamma, objective='reg:squarederror',\n", " booster='gbtree',\n", " seed=10), X=train[feature_cols], y=train['供热碳排放因子(kg/MJ)'], scoring='r2',\n", " cv=10).max()\n", " return val" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "params_xgb = {'objective': 'reg:squarederror',\n", " 'booster': 'gbtree',\n", " 'eta': 0.01,\n", " 'max_depth': 30,\n", " 'subsample': 0.8,\n", " 'colsample_bytree': 0.9,\n", " 'min_child_weight': 10,\n", " 'seed': 108}\n", "\n", "num_boost_round = 2000\n", "\n", "dtrain = xgb.DMatrix(train_X, train_y.values[:, 1])\n", "dvalid = xgb.DMatrix(valid_X, valid_y.values[:, 1])\n", "watchlist = [(dtrain, 'train'), (dvalid, 'eval')]\n", "\n", "gb_model_heat = xgb.train(params_xgb, dtrain, num_boost_round, evals=watchlist,\n", " early_stopping_rounds=200, verbose_eval=False)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "y_pred_heat = gb_model_heat.predict(xgb.DMatrix(test_X))\n", "y_true_heat = test_y.values[:, 1]" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "MSE: 2.9E-05\n", "RMSE: 0.0054\n", "MAE: 0.0024\n", "MAPE: 5.19 %\n", "R_2: 0.7392\n" ] } ], "source": [ "MSE = mean_squared_error(y_true_heat, y_pred_heat)\n", "RMSE = np.sqrt(mean_squared_error(y_true_heat, y_pred_heat))\n", "MAE = mean_absolute_error(y_true_heat, y_pred_heat)\n", "MAPE = mean_absolute_percentage_error(y_true_heat, y_pred_heat)\n", "R_2 = r2_score(y_true_heat, y_pred_heat)\n", "print('MSE:', format(MSE, '.1E'))\n", "print('RMSE:', round(RMSE, 4))\n", "print('MAE:', round(MAE, 4))\n", "print('MAPE:', round(MAPE*100, 2), '%')\n", "print('R_2:', round(R_2, 4)) #R方为负就说明拟合效果比平均值差a" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "pd.DataFrame.from_records([y_true_heat, y_pred_heat]).T.to_csv('./供热测试结果.csv', index=False, encoding='utf-8-sig')" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "gb_model_heat.save_model('./models/heat_model.txt')" ] }, { "cell_type": "markdown", "metadata": { "pycharm": { "name": "#%% md\n" } }, "source": [ "### 煤种标准化工程" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.\n", " \"\"\"Entry point for launching an IPython kernel.\n" ] } ], "source": [ "new_values = use_data.groupby(['煤种', '入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)'])['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'].mean()" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)
煤种入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)
无烟煤19827.0011.182539.000.5614240.087794
烟煤16733.0022.5327.460.4415110.064259
16740.0018.9937.000.4872250.064535
27.9324.430.4184570.064747
16741.0026.6925.920.4336790.061822
..................
贫煤22149.0012.4325.100.6297330.082772
22272.5111.8322.970.6278770.083234
22475.978.9023.980.6203310.086574
23215.0011.0019.310.6822210.080249
23791.0011.0019.310.7017950.082240
\n", "

3936 rows × 2 columns

\n", "
" ], "text/plain": [ " 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ)\n", "煤种 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%) \n", "无烟煤 19827.00 11.18 2539.00 0.561424 0.087794\n", "烟煤 16733.00 22.53 27.46 0.441511 0.064259\n", " 16740.00 18.99 37.00 0.487225 0.064535\n", " 27.93 24.43 0.418457 0.064747\n", " 16741.00 26.69 25.92 0.433679 0.061822\n", "... ... ...\n", "贫煤 22149.00 12.43 25.10 0.629733 0.082772\n", " 22272.51 11.83 22.97 0.627877 0.083234\n", " 22475.97 8.90 23.98 0.620331 0.086574\n", " 23215.00 11.00 19.31 0.682221 0.080249\n", " 23791.00 11.00 19.31 0.701795 0.082240\n", "\n", "[3936 rows x 2 columns]" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_values" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
煤种入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)
0无烟煤19827.0011.182539.00
1烟煤16733.0022.5327.46
2烟煤16740.0018.9937.00
3烟煤16740.0027.9324.43
4烟煤16741.0026.6925.92
...............
3931贫煤22149.0012.4325.10
3932贫煤22272.5111.8322.97
3933贫煤22475.978.9023.98
3934贫煤23215.0011.0019.31
3935贫煤23791.0011.0019.31
\n", "

3936 rows × 4 columns

\n", "
" ], "text/plain": [ " 煤种 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%)\n", "0 无烟煤 19827.00 11.18 2539.00\n", "1 烟煤 16733.00 22.53 27.46\n", "2 烟煤 16740.00 18.99 37.00\n", "3 烟煤 16740.00 27.93 24.43\n", "4 烟煤 16741.00 26.69 25.92\n", "... ... ... ... ...\n", "3931 贫煤 22149.00 12.43 25.10\n", "3932 贫煤 22272.51 11.83 22.97\n", "3933 贫煤 22475.97 8.90 23.98\n", "3934 贫煤 23215.00 11.00 19.31\n", "3935 贫煤 23791.00 11.00 19.31\n", "\n", "[3936 rows x 4 columns]" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "coal_df = new_values.reset_index().drop(columns=['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'])\n", "coal_df" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "coal_params_dict = dict()\n", "for coal_type in coal_df['煤种'].unique().tolist():\n", " options = coal_df[coal_df['煤种']==coal_type][['入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)']].values\n", " coal_params_dict[coal_type] = options" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
电厂名称机组编号铭牌容量 (MW)机组类型参数分类冷凝器型式入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)煤种所处地区longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)
0江苏利港电力有限公司1350.0纯凝式亚临界水冷21602.0500026.0916.80烟煤江苏省120.09662031.9423611.00.5869900.076843
1江苏利港电力有限公司1350.0纯凝式亚临界水冷21926.8100026.6815.41烟煤江苏省120.09662031.9423611.00.6328590.077676
2江苏利港电力有限公司1350.0纯凝式亚临界水冷21261.9306226.4615.18烟煤江苏省120.09662031.9423611.00.6091960.074823
3江苏利港电力有限公司1350.0纯凝式亚临界水冷20840.0000026.4314.55烟煤江苏省120.09662031.9423611.00.6021780.081628
4江苏利港电力有限公司1350.0纯凝式亚临界水冷20706.0000026.4314.96烟煤江苏省120.09662031.9423611.00.5902540.081103
...................................................
5736浙江浙能电力股份有限公司台州发电厂8350.0纯凝式亚临界NaN21973.0000037.4317.12烟煤浙江省121.46584028.70462373.00.6283000.078776
5737浙江浙能电力股份有限公司台州发电厂8350.0纯凝式亚临界NaN21372.0000039.8718.01烟煤浙江省121.46584028.70462373.00.5950190.076622
5738浙江浙能电力股份有限公司台州发电厂8350.0纯凝式亚临界NaN20856.0000039.3219.74烟煤浙江省121.46584028.70462373.00.5657180.074772
5739榆能榆神热电有限公司1350.0供热式超临界间接空冷25514.0000038.847.28烟煤陕西省109.82026538.3043831151.00.6644560.091482
5740榆能榆神热电有限公司2350.0供热式超临界间接空冷25514.0000038.847.28烟煤陕西省109.82026538.3043831151.00.6617590.091483
\n", "

5629 rows × 16 columns

\n", "
" ], "text/plain": [ " 电厂名称 机组编号 铭牌容量 (MW) 机组类型 参数分类 冷凝器型式 入炉煤低位热值(kJ/kg) \\\n", "0 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 21602.05000 \n", "1 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 21926.81000 \n", "2 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 21261.93062 \n", "3 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 20840.00000 \n", "4 江苏利港电力有限公司 1 350.0 纯凝式 亚临界 水冷 20706.00000 \n", "... ... ... ... ... ... ... ... \n", "5736 浙江浙能电力股份有限公司台州发电厂 8 350.0 纯凝式 亚临界 NaN 21973.00000 \n", "5737 浙江浙能电力股份有限公司台州发电厂 8 350.0 纯凝式 亚临界 NaN 21372.00000 \n", "5738 浙江浙能电力股份有限公司台州发电厂 8 350.0 纯凝式 亚临界 NaN 20856.00000 \n", "5739 榆能榆神热电有限公司 1 350.0 供热式 超临界 间接空冷 25514.00000 \n", "5740 榆能榆神热电有限公司 2 350.0 供热式 超临界 间接空冷 25514.00000 \n", "\n", " 燃煤挥发份Var(%) 燃煤灰份Aar(%) 煤种 所处地区 longitude latitude altitude \\\n", "0 26.09 16.80 烟煤 江苏省 120.096620 31.942361 1.0 \n", "1 26.68 15.41 烟煤 江苏省 120.096620 31.942361 1.0 \n", "2 26.46 15.18 烟煤 江苏省 120.096620 31.942361 1.0 \n", "3 26.43 14.55 烟煤 江苏省 120.096620 31.942361 1.0 \n", "4 26.43 14.96 烟煤 江苏省 120.096620 31.942361 1.0 \n", "... ... ... .. ... ... ... ... \n", "5736 37.43 17.12 烟煤 浙江省 121.465840 28.704623 73.0 \n", "5737 39.87 18.01 烟煤 浙江省 121.465840 28.704623 73.0 \n", "5738 39.32 19.74 烟煤 浙江省 121.465840 28.704623 73.0 \n", "5739 38.84 7.28 烟煤 陕西省 109.820265 38.304383 1151.0 \n", "5740 38.84 7.28 烟煤 陕西省 109.820265 38.304383 1151.0 \n", "\n", " 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) \n", "0 0.586990 0.076843 \n", "1 0.632859 0.077676 \n", "2 0.609196 0.074823 \n", "3 0.602178 0.081628 \n", "4 0.590254 0.081103 \n", "... ... ... \n", "5736 0.628300 0.078776 \n", "5737 0.595019 0.076622 \n", "5738 0.565718 0.074772 \n", "5739 0.664456 0.091482 \n", "5740 0.661759 0.091483 \n", "\n", "[5629 rows x 16 columns]" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "use_data" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "D:\\miniconda3\\envs\\py37\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.\n", " \"\"\"Entry point for launching an IPython kernel.\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitude煤种发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)
0上海市供热式亚临界水冷300.0121.47114031.0651133.0烟煤0.5375740.070992
1上海市供热式亚临界水冷300.0121.47114031.0651133.0烟煤0.5455160.072476
2上海市供热式亚临界水冷300.0121.47114031.0651133.0烟煤0.5958490.064745
3上海市供热式亚临界水冷300.0121.47114031.0651133.0烟煤0.5844320.068390
4上海市供热式亚临界水冷300.0121.47114031.0651133.0烟煤0.6053690.066996
....................................
3075黑龙江省纯凝式超高压水冷200.0126.57564745.918566118.0褐煤0.5001720.064200
3076黑龙江省纯凝式超高压水冷200.0129.60480344.608202250.0褐煤0.3782980.069663
3077黑龙江省纯凝式超高压水冷210.0131.69586446.58044491.0褐煤0.5183010.063249
3078黑龙江省纯凝式超高压水冷215.0129.60480344.608202250.0褐煤0.2908140.068027
3079黑龙江省纯凝式超高压水冷215.0129.60480344.608202250.0褐煤0.3216350.067798
\n", "

3080 rows × 11 columns

\n", "
" ], "text/plain": [ " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude 煤种 \\\n", "0 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 烟煤 \n", "1 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 烟煤 \n", "2 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 烟煤 \n", "3 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 烟煤 \n", "4 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 烟煤 \n", "... ... ... ... ... ... ... ... ... .. \n", "3075 黑龙江省 纯凝式 超高压 水冷 200.0 126.575647 45.918566 118.0 褐煤 \n", "3076 黑龙江省 纯凝式 超高压 水冷 200.0 129.604803 44.608202 250.0 褐煤 \n", "3077 黑龙江省 纯凝式 超高压 水冷 210.0 131.695864 46.580444 91.0 褐煤 \n", "3078 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 褐煤 \n", "3079 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 褐煤 \n", "\n", " 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) \n", "0 0.537574 0.070992 \n", "1 0.545516 0.072476 \n", "2 0.595849 0.064745 \n", "3 0.584432 0.068390 \n", "4 0.605369 0.066996 \n", "... ... ... \n", "3075 0.500172 0.064200 \n", "3076 0.378298 0.069663 \n", "3077 0.518301 0.063249 \n", "3078 0.290814 0.068027 \n", "3079 0.321635 0.067798 \n", "\n", "[3080 rows x 11 columns]" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_use_data = use_data.groupby(use_cols+['煤种'])['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'].mean().reset_index().drop(columns=['入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)'])\n", "new_use_data" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "new_use_data['coal_params'] = new_use_data['煤种'].apply(lambda x: coal_params_dict.get(x))" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "new_use_data.drop(columns='煤种', inplace=True)" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "new_data = new_use_data.explode(column='coal_params')" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)coal_params
0上海市供热式亚临界水冷300.0121.47114031.0651133.00.5375740.070992[16733.0, 22.53, 27.46]
0上海市供热式亚临界水冷300.0121.47114031.0651133.00.5375740.070992[16740.0, 18.99, 37.0]
0上海市供热式亚临界水冷300.0121.47114031.0651133.00.5375740.070992[16740.0, 27.93, 24.43]
0上海市供热式亚临界水冷300.0121.47114031.0651133.00.5375740.070992[16741.0, 26.69, 25.92]
0上海市供热式亚临界水冷300.0121.47114031.0651133.00.5375740.070992[16741.51, 19.51, 35.62]
....................................
3079黑龙江省纯凝式超高压水冷215.0129.60480344.608202250.00.3216350.067798[16723.0, 40.63, 39.94]
3079黑龙江省纯凝式超高压水冷215.0129.60480344.608202250.00.3216350.067798[16725.0, 26.36, 28.51]
3079黑龙江省纯凝式超高压水冷215.0129.60480344.608202250.00.3216350.067798[16725.19, 34.59, 37.71]
3079黑龙江省纯凝式超高压水冷215.0129.60480344.608202250.00.3216350.067798[16725.85, 43.2, 12.0]
3079黑龙江省纯凝式超高压水冷215.0129.60480344.608202250.00.3216350.067798[16729.0, 51.42, 17.33]
\n", "

7151079 rows × 11 columns

\n", "
" ], "text/plain": [ " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n", "0 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 \n", "0 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 \n", "0 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 \n", "0 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 \n", "0 上海市 供热式 亚临界 水冷 300.0 121.471140 31.065113 3.0 \n", "... ... ... ... ... ... ... ... ... \n", "3079 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 \n", "3079 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 \n", "3079 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 \n", "3079 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 \n", "3079 黑龙江省 纯凝式 超高压 水冷 215.0 129.604803 44.608202 250.0 \n", "\n", " 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) coal_params \n", "0 0.537574 0.070992 [16733.0, 22.53, 27.46] \n", "0 0.537574 0.070992 [16740.0, 18.99, 37.0] \n", "0 0.537574 0.070992 [16740.0, 27.93, 24.43] \n", "0 0.537574 0.070992 [16741.0, 26.69, 25.92] \n", "0 0.537574 0.070992 [16741.51, 19.51, 35.62] \n", "... ... ... ... \n", "3079 0.321635 0.067798 [16723.0, 40.63, 39.94] \n", "3079 0.321635 0.067798 [16725.0, 26.36, 28.51] \n", "3079 0.321635 0.067798 [16725.19, 34.59, 37.71] \n", "3079 0.321635 0.067798 [16725.85, 43.2, 12.0] \n", "3079 0.321635 0.067798 [16729.0, 51.42, 17.33] \n", "\n", "[7151079 rows x 11 columns]" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_data" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "new_data['入炉煤低位热值(kJ/kg)'] = new_data.coal_params.apply(lambda x: x[0]).values\n", "new_data['燃煤挥发份Var(%)'] = new_data.coal_params.apply(lambda x: x[1]).values\n", "new_data['燃煤灰份Aar(%)'] = new_data.coal_params.apply(lambda x: x[2]).values" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "norm_data = new_data.drop(columns='coal_params')" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitude发电碳排放因子(kg/kWh)供热碳排放因子(kg/MJ)入炉煤低位热值(kJ/kg)燃煤挥发份Var(%)燃煤灰份Aar(%)
0上海市供热式亚临界水冷300.0121.4711431.0651133.00.5375740.07099216733.0022.5327.46
0上海市供热式亚临界水冷300.0121.4711431.0651133.00.5375740.07099216740.0018.9937.00
0上海市供热式亚临界水冷300.0121.4711431.0651133.00.5375740.07099216740.0027.9324.43
0上海市供热式亚临界水冷300.0121.4711431.0651133.00.5375740.07099216741.0026.6925.92
0上海市供热式亚临界水冷300.0121.4711431.0651133.00.5375740.07099216741.5119.5135.62
\n", "
" ], "text/plain": [ " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n", "0 上海市 供热式 亚临界 水冷 300.0 121.47114 31.065113 3.0 \n", "0 上海市 供热式 亚临界 水冷 300.0 121.47114 31.065113 3.0 \n", "0 上海市 供热式 亚临界 水冷 300.0 121.47114 31.065113 3.0 \n", "0 上海市 供热式 亚临界 水冷 300.0 121.47114 31.065113 3.0 \n", "0 上海市 供热式 亚临界 水冷 300.0 121.47114 31.065113 3.0 \n", "\n", " 发电碳排放因子(kg/kWh) 供热碳排放因子(kg/MJ) 入炉煤低位热值(kJ/kg) 燃煤挥发份Var(%) 燃煤灰份Aar(%) \n", "0 0.537574 0.070992 16733.00 22.53 27.46 \n", "0 0.537574 0.070992 16740.00 18.99 37.00 \n", "0 0.537574 0.070992 16740.00 27.93 24.43 \n", "0 0.537574 0.070992 16741.00 26.69 25.92 \n", "0 0.537574 0.070992 16741.51 19.51 35.62 " ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "norm_data.head()" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "for col in num_cols:\n", " norm_data[col] = np.log1p(norm_data[col])\n", " # total_data[col] = (total_data[col] - total_data[col].min()) / (total_data[col].max() - total_data[col].min())\n", "norm_data_dummpy = pd.get_dummies(norm_data, columns=object_cols)" ] }, { "cell_type": "code", "execution_count": 59, "metadata": {}, "outputs": [], "source": [ "norm_data_dummpy.drop(columns=['发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'], inplace=True)" ] }, { "cell_type": "code", "execution_count": 61, "metadata": { "collapsed": false, "jupyter": { "outputs_hidden": false }, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "new_xgb_data = xgb.DMatrix(norm_data_dummpy[feature_cols])" ] }, { "cell_type": "code", "execution_count": 62, "metadata": {}, "outputs": [], "source": [ "norm_data['power_co2_factor'] = gb_model.predict(new_xgb_data)\n", "norm_data['heat_co2_factor'] = gb_model_heat.predict(new_xgb_data)" ] }, { "cell_type": "code", "execution_count": 65, "metadata": {}, "outputs": [], "source": [ "normaled_data = norm_data.drop(columns=['入炉煤低位热值(kJ/kg)', '燃煤挥发份Var(%)', '燃煤灰份Aar(%)', '发电碳排放因子(kg/kWh)', '供热碳排放因子(kg/MJ)'])" ] }, { "cell_type": "code", "execution_count": 66, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
所处地区机组类型参数分类冷凝器型式铭牌容量 (MW)longitudelatitudealtitudepower_co2_factorheat_co2_factor
0上海市供热式亚临界水冷5.7071104.8078753.4677691.3862940.4925580.064411
0上海市供热式亚临界水冷5.7071104.8078753.4677691.3862940.4740820.062117
0上海市供热式亚临界水冷5.7071104.8078753.4677691.3862940.4896230.063859
0上海市供热式亚临界水冷5.7071104.8078753.4677691.3862940.4936150.064382
0上海市供热式亚临界水冷5.7071104.8078753.4677691.3862940.4708830.062354
.................................
3079黑龙江省纯凝式超高压水冷5.3752784.8721763.8200885.5254530.3889120.067787
3079黑龙江省纯凝式超高压水冷5.3752784.8721763.8200885.5254530.3886060.065639
3079黑龙江省纯凝式超高压水冷5.3752784.8721763.8200885.5254530.3809710.068147
3079黑龙江省纯凝式超高压水冷5.3752784.8721763.8200885.5254530.4019730.065844
3079黑龙江省纯凝式超高压水冷5.3752784.8721763.8200885.5254530.3863690.065845
\n", "

7151079 rows × 10 columns

\n", "
" ], "text/plain": [ " 所处地区 机组类型 参数分类 冷凝器型式 铭牌容量 (MW) longitude latitude altitude \\\n", "0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n", "0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n", "0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n", "0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n", "0 上海市 供热式 亚临界 水冷 5.707110 4.807875 3.467769 1.386294 \n", "... ... ... ... ... ... ... ... ... \n", "3079 黑龙江省 纯凝式 超高压 水冷 5.375278 4.872176 3.820088 5.525453 \n", "3079 黑龙江省 纯凝式 超高压 水冷 5.375278 4.872176 3.820088 5.525453 \n", "3079 黑龙江省 纯凝式 超高压 水冷 5.375278 4.872176 3.820088 5.525453 \n", "3079 黑龙江省 纯凝式 超高压 水冷 5.375278 4.872176 3.820088 5.525453 \n", "3079 黑龙江省 纯凝式 超高压 水冷 5.375278 4.872176 3.820088 5.525453 \n", "\n", " power_co2_factor heat_co2_factor \n", "0 0.492558 0.064411 \n", "0 0.474082 0.062117 \n", "0 0.489623 0.063859 \n", "0 0.493615 0.064382 \n", "0 0.470883 0.062354 \n", "... ... ... \n", "3079 0.388912 0.067787 \n", "3079 0.388606 0.065639 \n", "3079 0.380971 0.068147 \n", "3079 0.401973 0.065844 \n", "3079 0.386369 0.065845 \n", "\n", "[7151079 rows x 10 columns]" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "normaled_data" ] }, { "cell_type": "code", "execution_count": 67, "metadata": {}, "outputs": [], "source": [ "target_cols = ['power_co2_factor', 'heat_co2_factor']" ] }, { "cell_type": "code", "execution_count": 69, "metadata": {}, "outputs": [], "source": [ "save_data = normaled_data.groupby([x for x in normaled_data.columns if x not in target_cols])[target_cols].mean()" ] }, { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [], "source": [ "save_data.reset_index().to_csv('./results/去煤种化数据.csv', encoding='utf-8-sig', index=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.13" } }, "nbformat": 4, "nbformat_minor": 4 }