{ "cells": [ { "cell_type": "code", "execution_count": 1, "outputs": [], "source": [ "import pandas as pd\n", "import lightgbm as lgb\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score\n", "import datetime as dt\n", "import matplotlib.pyplot as plt\n", "#新增加的两行\n", "from pylab import mpl\n", "# 设置显示中文字体\n", "mpl.rcParams[\"font.sans-serif\"] = [\"SimHei\"]\n", "\n", "mpl.rcParams[\"axes.unicode_minus\"] = False" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 2, "outputs": [], "source": [ "ori_data = pd.read_csv('data/unit_train_data.csv')" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 3, "outputs": [], "source": [ "ori_data['day_of_week'] = ori_data.days.apply(lambda x: dt.datetime.strptime(x, '%Y-%m-%d').weekday() + 1)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 4, "outputs": [], "source": [ "data = ori_data.drop(columns=['days', 'day_of_year', '企业名称'])" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 5, "outputs": [ { "data": { "text/plain": " r_O2 temperature 发电量(万千瓦时) 供热量(吉焦) c_smoke c_NO2 \\\n0 9.900000 51.250000 15.6796 6536.83 2.872405e+04 3.979907e+06 \n1 9.400000 50.679167 13.3984 2484.64 2.261807e+04 2.639425e+06 \n2 8.550000 52.808333 13.4023 3020.83 1.817677e+04 3.231672e+06 \n3 10.202083 48.854167 12.4765 5599.23 9.161746e+04 2.243444e+06 \n4 11.497917 45.783333 13.4414 4702.65 2.995257e+05 3.580802e+06 \n... ... ... ... ... ... ... \n1486 6.329167 44.741667 467.1060 0.00 1.020390e+06 1.173919e+07 \n1487 6.183333 45.587500 504.9000 0.00 1.169393e+06 1.393048e+07 \n1488 6.425000 45.545833 462.8220 0.00 1.124243e+06 1.267043e+07 \n1489 6.162500 45.175000 528.9600 0.00 1.296293e+06 1.442430e+07 \n1490 5.570833 46.100000 672.1800 0.00 1.554891e+06 1.758048e+07 \n\n c_SO2 flow 燃料消耗量(吨) 生产设备类型 燃料类型 \\\n0 7.665088e+05 162345.192917 323.0 高温高压循环流化床锅炉 中高挥发分烟煤 \n1 5.183845e+05 140175.330833 218.0 高温高压循环流化床锅炉 中高挥发分烟煤 \n2 9.870800e+05 154686.184167 212.0 高温高压循环流化床锅炉 中高挥发分烟煤 \n3 2.880779e+05 120345.545833 223.0 高温高压循环流化床锅炉 中高挥发分烟煤 \n4 5.500482e+04 162533.103542 243.0 高温高压循环流化床锅炉 中高挥发分烟煤 \n... ... ... ... ... ... \n1486 1.101318e+07 836100.000000 2401.0 煤粉锅炉 一般烟煤 \n1487 1.335698e+07 895515.000000 2611.0 煤粉锅炉 一般烟煤 \n1488 1.129934e+07 837945.000000 2846.0 煤粉锅炉 一般烟煤 \n1489 1.434195e+07 915030.000000 2981.0 煤粉锅炉 一般烟煤 \n1490 1.593381e+07 992220.000000 3560.0 煤粉锅炉 一般烟煤 \n\n 低位发热量(GJ/t) 汽轮机类型 冷却方式 额定蒸发量 压力参数 单机容量 week_of_year \\\n0 20.501 背压式 水冷-闭式循环 230.0 高压 30.0 39 \n1 20.501 背压式 水冷-闭式循环 230.0 高压 30.0 39 \n2 20.501 背压式 水冷-闭式循环 230.0 高压 30.0 39 \n3 20.501 背压式 水冷-闭式循环 230.0 高压 30.0 39 \n4 20.501 背压式 水冷-闭式循环 230.0 高压 30.0 39 \n... ... ... ... ... ... ... ... \n1486 14.682 抽凝式 水冷-闭式循环 1172.0 超临界 350.0 20 \n1487 14.682 抽凝式 水冷-闭式循环 1172.0 超临界 350.0 21 \n1488 14.682 抽凝式 水冷-闭式循环 1172.0 超临界 350.0 21 \n1489 14.682 抽凝式 水冷-闭式循环 1172.0 超临界 350.0 21 \n1490 14.682 抽凝式 水冷-闭式循环 1172.0 超临界 350.0 21 \n\n day_of_week \n0 1 \n1 2 \n2 3 \n3 4 \n4 5 \n... ... \n1486 5 \n1487 6 \n1488 7 \n1489 1 \n1490 2 \n\n[1254 rows x 19 columns]", "text/html": "
\n | r_O2 | \ntemperature | \n发电量(万千瓦时) | \n供热量(吉焦) | \nc_smoke | \nc_NO2 | \nc_SO2 | \nflow | \n燃料消耗量(吨) | \n生产设备类型 | \n燃料类型 | \n低位发热量(GJ/t) | \n汽轮机类型 | \n冷却方式 | \n额定蒸发量 | \n压力参数 | \n单机容量 | \nweek_of_year | \nday_of_week | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n9.900000 | \n51.250000 | \n15.6796 | \n6536.83 | \n2.872405e+04 | \n3.979907e+06 | \n7.665088e+05 | \n162345.192917 | \n323.0 | \n高温高压循环流化床锅炉 | \n中高挥发分烟煤 | \n20.501 | \n背压式 | \n水冷-闭式循环 | \n230.0 | \n高压 | \n30.0 | \n39 | \n1 | \n
1 | \n9.400000 | \n50.679167 | \n13.3984 | \n2484.64 | \n2.261807e+04 | \n2.639425e+06 | \n5.183845e+05 | \n140175.330833 | \n218.0 | \n高温高压循环流化床锅炉 | \n中高挥发分烟煤 | \n20.501 | \n背压式 | \n水冷-闭式循环 | \n230.0 | \n高压 | \n30.0 | \n39 | \n2 | \n
2 | \n8.550000 | \n52.808333 | \n13.4023 | \n3020.83 | \n1.817677e+04 | \n3.231672e+06 | \n9.870800e+05 | \n154686.184167 | \n212.0 | \n高温高压循环流化床锅炉 | \n中高挥发分烟煤 | \n20.501 | \n背压式 | \n水冷-闭式循环 | \n230.0 | \n高压 | \n30.0 | \n39 | \n3 | \n
3 | \n10.202083 | \n48.854167 | \n12.4765 | \n5599.23 | \n9.161746e+04 | \n2.243444e+06 | \n2.880779e+05 | \n120345.545833 | \n223.0 | \n高温高压循环流化床锅炉 | \n中高挥发分烟煤 | \n20.501 | \n背压式 | \n水冷-闭式循环 | \n230.0 | \n高压 | \n30.0 | \n39 | \n4 | \n
4 | \n11.497917 | \n45.783333 | \n13.4414 | \n4702.65 | \n2.995257e+05 | \n3.580802e+06 | \n5.500482e+04 | \n162533.103542 | \n243.0 | \n高温高压循环流化床锅炉 | \n中高挥发分烟煤 | \n20.501 | \n背压式 | \n水冷-闭式循环 | \n230.0 | \n高压 | \n30.0 | \n39 | \n5 | \n
... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n
1486 | \n6.329167 | \n44.741667 | \n467.1060 | \n0.00 | \n1.020390e+06 | \n1.173919e+07 | \n1.101318e+07 | \n836100.000000 | \n2401.0 | \n煤粉锅炉 | \n一般烟煤 | \n14.682 | \n抽凝式 | \n水冷-闭式循环 | \n1172.0 | \n超临界 | \n350.0 | \n20 | \n5 | \n
1487 | \n6.183333 | \n45.587500 | \n504.9000 | \n0.00 | \n1.169393e+06 | \n1.393048e+07 | \n1.335698e+07 | \n895515.000000 | \n2611.0 | \n煤粉锅炉 | \n一般烟煤 | \n14.682 | \n抽凝式 | \n水冷-闭式循环 | \n1172.0 | \n超临界 | \n350.0 | \n21 | \n6 | \n
1488 | \n6.425000 | \n45.545833 | \n462.8220 | \n0.00 | \n1.124243e+06 | \n1.267043e+07 | \n1.129934e+07 | \n837945.000000 | \n2846.0 | \n煤粉锅炉 | \n一般烟煤 | \n14.682 | \n抽凝式 | \n水冷-闭式循环 | \n1172.0 | \n超临界 | \n350.0 | \n21 | \n7 | \n
1489 | \n6.162500 | \n45.175000 | \n528.9600 | \n0.00 | \n1.296293e+06 | \n1.442430e+07 | \n1.434195e+07 | \n915030.000000 | \n2981.0 | \n煤粉锅炉 | \n一般烟煤 | \n14.682 | \n抽凝式 | \n水冷-闭式循环 | \n1172.0 | \n超临界 | \n350.0 | \n21 | \n1 | \n
1490 | \n5.570833 | \n46.100000 | \n672.1800 | \n0.00 | \n1.554891e+06 | \n1.758048e+07 | \n1.593381e+07 | \n992220.000000 | \n3560.0 | \n煤粉锅炉 | \n一般烟煤 | \n14.682 | \n抽凝式 | \n水冷-闭式循环 | \n1172.0 | \n超临界 | \n350.0 | \n21 | \n2 | \n
1254 rows × 19 columns
\n\n | fea_name | \nfea_imp | \nimp_scale | \n
---|---|---|---|
8 | \nday_of_week_1 | \n309 | \n0.211354 | \n
2 | \nday_of_week_6 | \n292 | \n0.199726 | \n
12 | \nday_of_week_4 | \n277 | \n0.189466 | \n
7 | \nday_of_week_7 | \n215 | \n0.147059 | \n
9 | \nday_of_week_2 | \n147 | \n0.100547 | \n
3 | \nday_of_week_5 | \n139 | \n0.095075 | \n
0 | \nday_of_week_3 | \n83 | \n0.056772 | \n