22-T67/test.ipynb

1436 lines
1.0 MiB
Plaintext
Raw Normal View History

2023-03-30 10:25:44 +08:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Importing plotly failed. Interactive plots will not work.\n"
]
}
],
"source": [
"import pandas as pd\n",
"import os\n",
"import datetime as dt\n",
"from prophet import Prophet"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"data_folder = [x for x in os.listdir('./data/') if x.startswith('城市_')]\n",
"data_folder.sort()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['城市_20150101-20141231',\n",
" '城市_20160101-20161231',\n",
" '城市_20170101-20171231',\n",
" '城市_20180101-20181231',\n",
" '城市_20190101-20191231',\n",
" '城市_20200101-20201231',\n",
" '城市_20210101-20211231']"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data_folder"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# 一个读取数据并合成成一个大文件的函数\n",
"total_data = pd.DataFrame()\n",
"for folder in data__folder:\n",
" for file in os.listdir(f\"./data/{folder}\"):\n",
" if file.endswith('csv'):\n",
" data = pd.read_csv(f'./data/{folder}/{file}')\n",
" use_data = data[(data['type']=='PM2.5')|(data['type']=='O3')].copy()\n",
" total_data = pd.concat([total_data, use_data])"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(119419, 394)"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"total_data.shape"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>hour</th>\n",
" <th>type</th>\n",
" <th>北京</th>\n",
" <th>天津</th>\n",
" <th>石家庄</th>\n",
" <th>唐山</th>\n",
" <th>秦皇岛</th>\n",
" <th>邯郸</th>\n",
" <th>保定</th>\n",
" <th>...</th>\n",
" <th>果洛藏族自治州</th>\n",
" <th>玉树藏族自治州</th>\n",
" <th>海西蒙古族藏族自治州</th>\n",
" <th>博尔塔拉蒙古自治州</th>\n",
" <th>克孜勒苏柯尔克孜自治州</th>\n",
" <th>兰州新区</th>\n",
" <th>赣江新区</th>\n",
" <th>儋州</th>\n",
" <th>雄安新区</th>\n",
" <th>西咸新区</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>20150504</td>\n",
" <td>0</td>\n",
" <td>PM2.5</td>\n",
" <td>21.0</td>\n",
" <td>16.0</td>\n",
" <td>36.0</td>\n",
" <td>29.0</td>\n",
" <td>22.0</td>\n",
" <td>105.0</td>\n",
" <td>53.0</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>20150504</td>\n",
" <td>0</td>\n",
" <td>O3</td>\n",
" <td>70.0</td>\n",
" <td>57.0</td>\n",
" <td>17.0</td>\n",
" <td>68.0</td>\n",
" <td>52.0</td>\n",
" <td>47.0</td>\n",
" <td>61.0</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>20150504</td>\n",
" <td>1</td>\n",
" <td>PM2.5</td>\n",
" <td>15.0</td>\n",
" <td>16.0</td>\n",
" <td>44.0</td>\n",
" <td>27.0</td>\n",
" <td>16.0</td>\n",
" <td>74.0</td>\n",
" <td>31.0</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>20150504</td>\n",
" <td>1</td>\n",
" <td>O3</td>\n",
" <td>72.0</td>\n",
" <td>56.0</td>\n",
" <td>6.0</td>\n",
" <td>75.0</td>\n",
" <td>50.0</td>\n",
" <td>53.0</td>\n",
" <td>64.0</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>20150504</td>\n",
" <td>2</td>\n",
" <td>PM2.5</td>\n",
" <td>15.0</td>\n",
" <td>19.0</td>\n",
" <td>62.0</td>\n",
" <td>25.0</td>\n",
" <td>12.0</td>\n",
" <td>57.0</td>\n",
" <td>22.0</td>\n",
" <td>...</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 394 columns</p>\n",
"</div>"
],
"text/plain": [
" date hour type 北京 天津 石家庄 唐山 秦皇岛 邯郸 保定 ... \\\n",
"1 20150504 0 PM2.5 21.0 16.0 36.0 29.0 22.0 105.0 53.0 ... \n",
"9 20150504 0 O3 70.0 57.0 17.0 68.0 52.0 47.0 61.0 ... \n",
"16 20150504 1 PM2.5 15.0 16.0 44.0 27.0 16.0 74.0 31.0 ... \n",
"24 20150504 1 O3 72.0 56.0 6.0 75.0 50.0 53.0 64.0 ... \n",
"31 20150504 2 PM2.5 15.0 19.0 62.0 25.0 12.0 57.0 22.0 ... \n",
"\n",
" 果洛藏族自治州 玉树藏族自治州 海西蒙古族藏族自治州 博尔塔拉蒙古自治州 克孜勒苏柯尔克孜自治州 兰州新区 赣江新区 儋州 \\\n",
"1 NaN NaN NaN NaN NaN NaN NaN NaN \n",
"9 NaN NaN NaN NaN NaN NaN NaN NaN \n",
"16 NaN NaN NaN NaN NaN NaN NaN NaN \n",
"24 NaN NaN NaN NaN NaN NaN NaN NaN \n",
"31 NaN NaN NaN NaN NaN NaN NaN NaN \n",
"\n",
" 雄安新区 西咸新区 \n",
"1 NaN NaN \n",
"9 NaN NaN \n",
"16 NaN NaN \n",
"24 NaN NaN \n",
"31 NaN NaN \n",
"\n",
"[5 rows x 394 columns]"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"total_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"def concat_date(x, y):\n",
" time_str = f\"{x} {y}:00:00\"\n",
" return dt.datetime.strptime(time_str, \"%Y%m%d %H:%M:%S\")"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
"total_data['ds'] = total_data.apply(lambda x: concat_date(x.date, x.hour), axis=1)\n",
"total_data.ds = pd.to_datetime(total_data.ds)"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
"total_data.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [],
"source": [
"PM25_data = total_data[total_data[\"type\"]=='PM2.5'].reset_index(drop=True)\n",
"O3_data = total_data[total_data[\"type\"]=='O3'].reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [],
"source": [
"def build_model(city: str, data: pd.DataFrame, dtype:str, split_date=\"2021-01-01 00:00:00\"):\n",
" \"\"\"_summary_\n",
"\n",
" Args:\n",
" city (str): _description_\n",
" data (pd.DataFrame): _description_\n",
" dtype (str): _description_\n",
" split_date (str, optional): _description_. Defaults to \"2021-01-01 00:00:00\".\n",
" \"\"\"\n",
" use_data = data[(data['type']==dtype)][[\"ds\", city]].copy()\n",
" use_data.columns = [\"ds\", \"y\"]\n",
" train_data = use_data[use_data.ds < split_date].copy()\n",
" test_data = use_data[use_data.ds >= split_date].copy()\n",
" model=Prophet(growth=\"linear\",\n",
" yearly_seasonality=True,\n",
" weekly_seasonality=False,\n",
" daily_seasonality=False,\n",
" seasonality_mode=\"multiplicative\",\n",
" seasonality_prior_scale=12,\n",
" )\n",
" model.fit(train)\n",
" forecast=model.predict(test)"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
"train, test = build_model('北京', total_data, dtype='PM2.5')"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt\n",
"from matplotlib import rcParams\n",
"rcParams['figure.figsize'] = (24, 20)"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.dates as mdates"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['2021-12-31T00:00:00.000000000', '2021-12-31T01:00:00.000000000',\n",
" '2021-12-31T02:00:00.000000000', ...,\n",
" '2021-05-09T21:00:00.000000000', '2021-05-09T22:00:00.000000000',\n",
" '2021-05-09T23:00:00.000000000'], dtype='datetime64[ns]')"
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test.ds.values"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"from get_holiday_cn.client import getHoliday"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"client = getHoliday()\n",
"r = client.assemble_holiday_data(today='2016-01-01')"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'code': 0,\n",
" 'type': {'type': 2, 'name': '周五', 'week': 5, 'status': 0},\n",
" 'holiday': {'holiday': True, 'name': '元旦', 'date': '2016-01-01'}}"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"r"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"def get_date_type(date, holiday_client):\n",
" rst = holiday_client.assemble_holiday_data(today=date)\n",
" if rst.get('code') == 0:\n",
" if rst.get('holiday') is None:\n",
" return 'oridinary'\n",
" else:\n",
" return rst.get('holiday').get('name')"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"def build_holiday(start_date:str=\"2015-01-01\", end_date:str=\"2021-12-31\"):\n",
" \"\"\"基于起止日期,将该时间段内的国内假期都找出来\n",
"\n",
" Args:\n",
" start_date (str): 以\"YYYY-MM-DD\"形式的字符串, 默认2015-01-01\n",
" end_date (_type_): 以\"YYYY-MM-DD\"形式的字符串默认2021-12-31\n",
"\n",
" Returns:\n",
" _type_: _description_\n",
" \"\"\"\n",
" holiday_dict = {}\n",
" ds_list = pd.DataFrame(pd.date_range(start=start_date, end=end_date, freq='D'), columns=['date'])\n",
" ds_list.date = ds_list.date.apply(lambda x: dt.datetime.strftime(x, format='%Y-%m-%d'))\n",
" client = getHoliday()\n",
" ds_list['day_type'] = ds_list.date.apply(lambda x: get_date_type(x, client))\n",
" special_date = ds_list[ds_list.day_type != 'simple'].copy()\n",
" holiday_dict = special_date.groupby('day_type')['date'].apply(list).to_dict()\n",
" return holiday_dict"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"ds_list = pd.DataFrame(pd.date_range(start='2018-01-01', end='2019-01-01', freq='D'), columns=['date'])\n",
"\n",
"client = getHoliday()\n",
"ds_list['day_type'] = ds_list.date.apply(lambda x: get_date_type(x, client))\n",
"special_date = ds_list[ds_list.day_type!='oridinary'].copy()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'中秋节': ['2018-09-24'],\n",
" '元旦': ['2018-01-01', '2018-12-30', '2018-12-31', '2019-01-01'],\n",
" '元旦调休': ['2018-12-29'],\n",
" '劳动节': ['2018-04-29', '2018-04-30', '2018-05-01'],\n",
" '劳动节调休': ['2018-04-28'],\n",
" '国庆节': ['2018-10-01',\n",
" '2018-10-02',\n",
" '2018-10-03',\n",
" '2018-10-04',\n",
" '2018-10-05',\n",
" '2018-10-06',\n",
" '2018-10-07'],\n",
" '国庆节调休': ['2018-09-29', '2018-09-30'],\n",
" '春节': ['2018-02-15',\n",
" '2018-02-16',\n",
" '2018-02-17',\n",
" '2018-02-18',\n",
" '2018-02-19',\n",
" '2018-02-20',\n",
" '2018-02-21'],\n",
" '春节调休': ['2018-02-11', '2018-02-24'],\n",
" '清明节': ['2018-04-05', '2018-04-06', '2018-04-07'],\n",
" '清明节调休': ['2018-04-08'],\n",
" '端午节': ['2018-06-18']}"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"special_date.groupby('day_type')['date'].apply(list).to_dict()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"import holidays\n",
"import pandas as pd\n",
"import os\n",
"import numpy as np\n",
"from prophet import Prophet\n",
"import datetime as dt\n",
"from get_holiday_cn.client import getHoliday\n",
"from logzero import logger\n",
"import pickle\n",
"\n",
"\n",
"def concat_date(x:str, y:str):\n",
" \"\"\"_summary_\n",
"\n",
" Args:\n",
" x (str): 年月日\n",
" y (str): 小时\n",
"\n",
" Returns:\n",
" _type_: 合成的时间\n",
" \"\"\"\n",
" time_str = f\"{x} {y}:00:00\"\n",
" return dt.datetime.strptime(time_str, \"%Y%m%d %H:%M:%S\")\n",
"\n",
"\n",
"def load_data():\n",
" data_folder = [x for x in os.listdir('./data/') if x.startswith('城市_')]\n",
" data_folder.sort()\n",
" # 一个读取数据并合成成一个大文件的函数\n",
" total_data = pd.DataFrame()\n",
" for folder in data_folder:\n",
" for file in os.listdir(f\"./data/{folder}\"):\n",
" if file.endswith('csv'):\n",
" data = pd.read_csv(f'./data/{folder}/{file}')\n",
" use_data = data[(data['type']=='PM2.5')|(data['type']=='O3')].copy()\n",
" total_data = pd.concat([total_data, use_data])\n",
" total_data['ds'] = total_data.apply(lambda x: concat_date(x.date, x.hour), axis=1)\n",
" total_data.ds = pd.to_datetime(total_data.ds)\n",
" total_data.sort_values(by='ds', ascending=True)\n",
" total_data.reset_index(drop=True, inplace=True)\n",
" logger.info(f\"总数据集大小:{total_data.shape}\")\n",
" return total_data\n",
"\n",
"\n",
"def build_model(city: str, data: pd.DataFrame, dtype:str, holiday_mode:dict, split_date=\"2021-01-01 00:00:00\"):\n",
" \"\"\"_summary_\n",
"\n",
" Args:\n",
" city (str): 城市\n",
" data (pd.DataFrame): 数据\n",
" dtype (str): O3还是PM2.5\n",
" holiday_mode (dict): 假期字典\n",
" split_date (str, optional): 划分训练测试的分割日期. Defaults to \"2021-01-01\".\n",
"\n",
" Returns:\n",
" model: 模型\n",
" forecast: 对该组数据的预测\n",
" \"\"\"\n",
" logger.info(f\"选择了 {city} 的 {dtype} 数据,\")\n",
" use_data = data[(data['type']==dtype)][[\"ds\", city]].copy()\n",
" use_data.columns = [\"ds\", \"y\"]\n",
" train_data = use_data[use_data.ds < split_date].copy()\n",
" test_data = use_data[use_data.ds >= split_date].copy()\n",
" model=Prophet(growth=\"linear\",\n",
" yearly_seasonality=True,\n",
" weekly_seasonality=True,\n",
" daily_seasonality=True,\n",
" seasonality_mode=\"multiplicative\",\n",
" seasonality_prior_scale=12,\n",
" holidays=holiday_mode\n",
" )\n",
" model.fit(train_data)\n",
" forecast=model.predict(test_data)\n",
" return model, forecast\n",
"\n",
"\n",
"def get_date_type(date:str, holiday_client:getHoliday):\n",
" \"\"\"一个判断某个日期是哪种假期的类\n",
"\n",
" Args:\n",
" date (str): \"YYYY-MM-DD\"\n",
" holiday_client (getHoliday): object of getHoliday class\n",
"\n",
" Returns:\n",
" str: oridinary for simple day and others for special day\n",
" \"\"\"\n",
" rst = holiday_client.assemble_holiday_data(today=date)\n",
" if rst.get('code') == 0:\n",
" if rst.get('holiday') is None:\n",
" return 'oridinary'\n",
" else:\n",
" return rst.get('holiday').get('name')\n",
"\n",
"\n",
"def build_holiday(start_date:str=\"2015-01-01\", end_date:str=\"2021-12-31\"):\n",
" \"\"\"基于起止日期,将该时间段内的国内假期都找出来,包括本应该放假但是最后调休上班的\n",
"\n",
" Args:\n",
" start_date (str): 以\"YYYY-MM-DD\"形式的字符串, 默认2015-01-01\n",
" end_date (_type_): 以\"YYYY-MM-DD\"形式的字符串默认2021-12-31\n",
"\n",
" Returns:\n",
" _type_: _description_\n",
" \"\"\"\n",
" ds_list = pd.DataFrame(pd.date_range(start=start_date, end=end_date, freq='D'), columns=['date'])\n",
" ds_list.date = ds_list.date.apply(lambda x: dt.datetime.strftime(x, format='%Y-%m-%d'))\n",
" client = getHoliday()\n",
" ds_list['day_type'] = ds_list.date.apply(lambda x: get_date_type(x, client))\n",
" special_date = ds_list[ds_list.day_type != 'simple'].copy()\n",
" special_date.columns = ['ds', 'holiday']\n",
" return special_date\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"\n",
"data_type = 'O3'\n",
"city_list = ['北京', '天津']\n",
"if os.path.exists('./data/total_data.csv'):\n",
" data = pd.read_csv('./data/total_data.csv')\n",
"else:\n",
" data = load_data()\n",
" data.to_csv('./data/total_data.csv', encoding='utf-8', index=False)\n",
"city_list = ['北京', '天津']\n",
"model_dict = dict()\n",
"predict_dict = dict()\n",
"holiday_data = build_holiday(data.ds.min(), data.ds.max())"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ds</th>\n",
" <th>holiday</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2015-01-02</td>\n",
" <td>元旦</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2015-01-03</td>\n",
" <td>元旦</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2015-01-04</td>\n",
" <td>元旦调休</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2015-01-05</td>\n",
" <td>oridinary</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2015-01-06</td>\n",
" <td>oridinary</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2551</th>\n",
" <td>2021-12-27</td>\n",
" <td>oridinary</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2552</th>\n",
" <td>2021-12-28</td>\n",
" <td>oridinary</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2553</th>\n",
" <td>2021-12-29</td>\n",
" <td>oridinary</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2554</th>\n",
" <td>2021-12-30</td>\n",
" <td>oridinary</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2555</th>\n",
" <td>2021-12-31</td>\n",
" <td>oridinary</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2556 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" ds holiday\n",
"0 2015-01-02 元旦\n",
"1 2015-01-03 元旦\n",
"2 2015-01-04 元旦调休\n",
"3 2015-01-05 oridinary\n",
"4 2015-01-06 oridinary\n",
"... ... ...\n",
"2551 2021-12-27 oridinary\n",
"2552 2021-12-28 oridinary\n",
"2553 2021-12-29 oridinary\n",
"2554 2021-12-30 oridinary\n",
"2555 2021-12-31 oridinary\n",
"\n",
"[2556 rows x 2 columns]"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"holiday_data"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"for city in city_list:\n",
" use_data = data[city]\n",
" model, pred = build_model(city, use_data, data_type, holiday_data, '2021-01-01')\n",
" model_dict[city] = model\n",
" predict_dict[city] = pred\n",
" logger.info(f\"{city} 模型构建完成\")\n",
"if not os.path.exists('./result/'):\n",
" os.mkdir('./result/')\n",
"if not os.path.exists(f'./result/{data_type}/'):\n",
" os.mkdir(f'./result/{data_type}')\n",
"if not os.path.exists(f'./result/{data_type}/model/'):\n",
" os.mkdir(f'./result/{data_type}/model')\n",
"if not os.path.exists(f'./result/{data_type}/data/'):\n",
" os.mkdir(f'./result/{data_type}/data/')\n",
"for city in predict_dict:\n",
" city_pred = predict_dict.get(city)\n",
" city_pred.to_csv(f'./result/{data_type}/data/{city}.csv', encoding='utf-8', index=False)\n",
" logger.info(f\"{city} 预测数据保存完成\")\n",
"for city in model_dict:\n",
" city_model = model_dict.get(city)\n",
" with open(f'./result/{data_type}/model/{city}.pkl', 'wb') as fwb:\n",
" pickle.dump(city_model, fwb)\n",
" logger.info(f\"{city} 模型保存完成\")"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"d1 = pd.read_csv('./data/城市_20150101-20141231/china_cities_20150102.csv')\n",
"d2 = pd.read_csv('./data/城市_20150101-20141231/china_cities_20150103.csv')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"d1 = d1[(d1['type']=='O3')|(d1['type']=='PM2.5')].copy()\n",
"d2 = d2[(d2['type']=='O3')|(d2['type']=='PM2.5')].copy()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>hour</th>\n",
" <th>type</th>\n",
" <th>北京</th>\n",
" <th>天津</th>\n",
" <th>石家庄</th>\n",
" <th>唐山</th>\n",
" <th>秦皇岛</th>\n",
" <th>邯郸</th>\n",
" <th>保定</th>\n",
" <th>...</th>\n",
" <th>阿克苏地区</th>\n",
" <th>克州</th>\n",
" <th>喀什地区</th>\n",
" <th>和田地区</th>\n",
" <th>伊犁哈萨克州</th>\n",
" <th>塔城地区</th>\n",
" <th>阿勒泰地区</th>\n",
" <th>石河子</th>\n",
" <th>五家渠</th>\n",
" <th>三沙</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>20150102</td>\n",
" <td>1</td>\n",
" <td>PM2.5</td>\n",
" <td>90.0</td>\n",
" <td>103.0</td>\n",
" <td>153.0</td>\n",
" <td>107.0</td>\n",
" <td>77.0</td>\n",
" <td>118.0</td>\n",
" <td>214.0</td>\n",
" <td>...</td>\n",
" <td>32.0</td>\n",
" <td>108.0</td>\n",
" <td>160.0</td>\n",
" <td>146.0</td>\n",
" <td>146.0</td>\n",
" <td>40.0</td>\n",
" <td>23.0</td>\n",
" <td>97.0</td>\n",
" <td>19.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>20150102</td>\n",
" <td>1</td>\n",
" <td>O3</td>\n",
" <td>5.0</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" <td>9.0</td>\n",
" <td>11.0</td>\n",
" <td>6.0</td>\n",
" <td>23.0</td>\n",
" <td>...</td>\n",
" <td>17.0</td>\n",
" <td>53.0</td>\n",
" <td>15.0</td>\n",
" <td>23.0</td>\n",
" <td>8.0</td>\n",
" <td>79.0</td>\n",
" <td>44.0</td>\n",
" <td>6.0</td>\n",
" <td>14.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>20150102</td>\n",
" <td>2</td>\n",
" <td>PM2.5</td>\n",
" <td>83.0</td>\n",
" <td>108.0</td>\n",
" <td>173.0</td>\n",
" <td>112.0</td>\n",
" <td>83.0</td>\n",
" <td>129.0</td>\n",
" <td>242.0</td>\n",
" <td>...</td>\n",
" <td>56.0</td>\n",
" <td>104.0</td>\n",
" <td>290.0</td>\n",
" <td>225.0</td>\n",
" <td>137.0</td>\n",
" <td>48.0</td>\n",
" <td>29.0</td>\n",
" <td>83.0</td>\n",
" <td>23.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24</th>\n",
" <td>20150102</td>\n",
" <td>2</td>\n",
" <td>O3</td>\n",
" <td>9.0</td>\n",
" <td>3.0</td>\n",
" <td>5.0</td>\n",
" <td>11.0</td>\n",
" <td>14.0</td>\n",
" <td>7.0</td>\n",
" <td>24.0</td>\n",
" <td>...</td>\n",
" <td>15.0</td>\n",
" <td>56.0</td>\n",
" <td>17.0</td>\n",
" <td>21.0</td>\n",
" <td>10.0</td>\n",
" <td>70.0</td>\n",
" <td>51.0</td>\n",
" <td>6.0</td>\n",
" <td>12.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>20150102</td>\n",
" <td>3</td>\n",
" <td>PM2.5</td>\n",
" <td>74.0</td>\n",
" <td>127.0</td>\n",
" <td>187.0</td>\n",
" <td>108.0</td>\n",
" <td>74.0</td>\n",
" <td>142.0</td>\n",
" <td>274.0</td>\n",
" <td>...</td>\n",
" <td>41.0</td>\n",
" <td>109.0</td>\n",
" <td>221.0</td>\n",
" <td>176.0</td>\n",
" <td>129.0</td>\n",
" <td>36.0</td>\n",
" <td>15.0</td>\n",
" <td>98.0</td>\n",
" <td>29.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>324</th>\n",
" <td>20150103</td>\n",
" <td>21</td>\n",
" <td>O3</td>\n",
" <td>7.0</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>8.0</td>\n",
" <td>2.0</td>\n",
" <td>6.0</td>\n",
" <td>19.0</td>\n",
" <td>...</td>\n",
" <td>10.0</td>\n",
" <td>45.0</td>\n",
" <td>20.0</td>\n",
" <td>21.0</td>\n",
" <td>9.0</td>\n",
" <td>85.0</td>\n",
" <td>40.0</td>\n",
" <td>4.0</td>\n",
" <td>12.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>331</th>\n",
" <td>20150103</td>\n",
" <td>22</td>\n",
" <td>PM2.5</td>\n",
" <td>200.0</td>\n",
" <td>357.0</td>\n",
" <td>400.0</td>\n",
" <td>233.0</td>\n",
" <td>202.0</td>\n",
" <td>183.0</td>\n",
" <td>460.0</td>\n",
" <td>...</td>\n",
" <td>61.0</td>\n",
" <td>100.0</td>\n",
" <td>222.0</td>\n",
" <td>109.0</td>\n",
" <td>40.0</td>\n",
" <td>39.0</td>\n",
" <td>28.0</td>\n",
" <td>63.0</td>\n",
" <td>61.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>339</th>\n",
" <td>20150103</td>\n",
" <td>22</td>\n",
" <td>O3</td>\n",
" <td>7.0</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>9.0</td>\n",
" <td>4.0</td>\n",
" <td>7.0</td>\n",
" <td>20.0</td>\n",
" <td>...</td>\n",
" <td>16.0</td>\n",
" <td>35.0</td>\n",
" <td>20.0</td>\n",
" <td>22.0</td>\n",
" <td>8.0</td>\n",
" <td>98.0</td>\n",
" <td>31.0</td>\n",
" <td>5.0</td>\n",
" <td>16.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>346</th>\n",
" <td>20150103</td>\n",
" <td>23</td>\n",
" <td>PM2.5</td>\n",
" <td>208.0</td>\n",
" <td>350.0</td>\n",
" <td>415.0</td>\n",
" <td>234.0</td>\n",
" <td>187.0</td>\n",
" <td>180.0</td>\n",
" <td>509.0</td>\n",
" <td>...</td>\n",
" <td>46.0</td>\n",
" <td>83.0</td>\n",
" <td>327.0</td>\n",
" <td>92.0</td>\n",
" <td>34.0</td>\n",
" <td>31.0</td>\n",
" <td>49.0</td>\n",
" <td>48.0</td>\n",
" <td>81.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>354</th>\n",
" <td>20150103</td>\n",
" <td>23</td>\n",
" <td>O3</td>\n",
" <td>7.0</td>\n",
" <td>4.0</td>\n",
" <td>5.0</td>\n",
" <td>9.0</td>\n",
" <td>3.0</td>\n",
" <td>6.0</td>\n",
" <td>25.0</td>\n",
" <td>...</td>\n",
" <td>21.0</td>\n",
" <td>49.0</td>\n",
" <td>18.0</td>\n",
" <td>27.0</td>\n",
" <td>9.0</td>\n",
" <td>92.0</td>\n",
" <td>43.0</td>\n",
" <td>5.0</td>\n",
" <td>7.0</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>94 rows × 370 columns</p>\n",
"</div>"
],
"text/plain": [
" date hour type 北京 天津 石家庄 唐山 秦皇岛 邯郸 保定 \\\n",
"1 20150102 1 PM2.5 90.0 103.0 153.0 107.0 77.0 118.0 214.0 \n",
"9 20150102 1 O3 5.0 3.0 6.0 9.0 11.0 6.0 23.0 \n",
"16 20150102 2 PM2.5 83.0 108.0 173.0 112.0 83.0 129.0 242.0 \n",
"24 20150102 2 O3 9.0 3.0 5.0 11.0 14.0 7.0 24.0 \n",
"31 20150102 3 PM2.5 74.0 127.0 187.0 108.0 74.0 142.0 274.0 \n",
".. ... ... ... ... ... ... ... ... ... ... \n",
"324 20150103 21 O3 7.0 4.0 5.0 8.0 2.0 6.0 19.0 \n",
"331 20150103 22 PM2.5 200.0 357.0 400.0 233.0 202.0 183.0 460.0 \n",
"339 20150103 22 O3 7.0 4.0 5.0 9.0 4.0 7.0 20.0 \n",
"346 20150103 23 PM2.5 208.0 350.0 415.0 234.0 187.0 180.0 509.0 \n",
"354 20150103 23 O3 7.0 4.0 5.0 9.0 3.0 6.0 25.0 \n",
"\n",
" ... 阿克苏地区 克州 喀什地区 和田地区 伊犁哈萨克州 塔城地区 阿勒泰地区 石河子 五家渠 三沙 \n",
"1 ... 32.0 108.0 160.0 146.0 146.0 40.0 23.0 97.0 19.0 NaN \n",
"9 ... 17.0 53.0 15.0 23.0 8.0 79.0 44.0 6.0 14.0 NaN \n",
"16 ... 56.0 104.0 290.0 225.0 137.0 48.0 29.0 83.0 23.0 NaN \n",
"24 ... 15.0 56.0 17.0 21.0 10.0 70.0 51.0 6.0 12.0 NaN \n",
"31 ... 41.0 109.0 221.0 176.0 129.0 36.0 15.0 98.0 29.0 NaN \n",
".. ... ... ... ... ... ... ... ... ... ... .. \n",
"324 ... 10.0 45.0 20.0 21.0 9.0 85.0 40.0 4.0 12.0 NaN \n",
"331 ... 61.0 100.0 222.0 109.0 40.0 39.0 28.0 63.0 61.0 NaN \n",
"339 ... 16.0 35.0 20.0 22.0 8.0 98.0 31.0 5.0 16.0 NaN \n",
"346 ... 46.0 83.0 327.0 92.0 34.0 31.0 49.0 48.0 81.0 NaN \n",
"354 ... 21.0 49.0 18.0 27.0 9.0 92.0 43.0 5.0 7.0 NaN \n",
"\n",
"[94 rows x 370 columns]"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat([d1, d2])"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv('./data/total_data.csv')"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"from statistics import mean\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.metrics import explained_variance_score,r2_score, median_absolute_error, mean_squared_error, mean_absolute_error, mean_absolute_percentage_error\n",
"from scipy import stats\n",
"import numpy as np\n",
"from matplotlib import rcParams\n",
"config = {\"font.size\": 16,\"mathtext.fontset\":'stix'}\n",
"rcParams.update(config)\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"data = pd.read_csv('./data/eval_data.csv')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"x = data.values[:, 0]\n",
"y = data.values[:, 1]"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"def scatter_out_1(x, y, label, name): ## x,y为两个需要做对比分析的两个量。\n",
" # ==========计算评价指标==========\n",
" BIAS = mean(x - y)\n",
" MSE = mean_squared_error(x, y)\n",
" RMSE = np.power(MSE, 0.5)\n",
" R2 = r2_score(x, y)\n",
" MAE = mean_absolute_error(x, y)\n",
" mape = mean_absolute_percentage_error(x, y) * 100\n",
" EV = explained_variance_score(x, y)\n",
" print('==========算法评价指标==========')\n",
" print('Explained Variance(EV):', '%.3f' % (EV))\n",
" print('Mean Absolute Error(MAE):', '%.3f' % (MAE))\n",
" print('Mean squared error(MSE):', '%.3f' % (MSE))\n",
" print('Root Mean Squard Error(RMSE):', '%.3f' % (RMSE))\n",
" print('mean_absolute_percentage_error(MAPE):', '%.3f' % (mape))\n",
" print('R_squared:', '%.3f' % (R2))\n",
" # ===========Calculate the point density==========\n",
" xy = np.vstack([x, y])\n",
" z = stats.gaussian_kde(xy)(xy)\n",
" # ===========Sort the points by density, so that the densest points are plotted last===========\n",
" idx = z.argsort()\n",
" x, y, z = x[idx], y[idx], z[idx]\n",
" def best_fit_slope_and_intercept(xs, ys):\n",
" m = (((mean(xs) * mean(ys)) - mean(xs * ys)) / ((mean(xs) * mean(xs)) - mean(xs * xs)))\n",
" b = mean(ys) - m * mean(xs)\n",
" return m, b\n",
" m, b = best_fit_slope_and_intercept(x, y)\n",
" regression_line = []\n",
" for a in x:\n",
" regression_line.append((m * a) + b)\n",
" fig,ax=plt.subplots(figsize=(12,9),dpi=400)\n",
" scatter=ax.scatter(x,y,marker='o',c=z*100,s=15,label='LST',cmap='Spectral_r')\n",
" cbar=plt.colorbar(scatter,shrink=1,orientation='vertical',extend='both',pad=0.015,aspect=30,label='Frequency')\n",
" min_value = min(min(x), min(y))\n",
" max_value = max(max(x), max(y))\n",
"\n",
" plt.plot([min_value-5,max_value+5],[min_value-5,max_value+5],'black',lw=1.5) # 画的1:1线线的颜色为black线宽为0.8\n",
" plt.plot(x,regression_line,'red',lw=1.5) # 预测与实测数据之间的回归线\n",
" plt.axis([min_value-5,max_value+5,min_value-5,max_value+5]) # 设置线的范围\n",
" plt.xlabel('Measured %s' % label)\n",
" plt.ylabel('Retrived %s' % label)\n",
" # plt.xticks(fontproperties='Times New Roman')\n",
" # plt.yticks(fontproperties='Times New Roman')\n",
"\n",
"\n",
" plt.text(min_value-5 + (max_value-min_value) * 0.05, int(max_value * 0.95), '$N=%.f$' % len(y)) # text的位置需要根据x,y的大小范围进行调整。\n",
" plt.text(min_value-5 + (max_value-min_value) * 0.05, int(max_value * 0.90), '$R^2=%.2f$' % R2)\n",
" plt.text(min_value-5 + (max_value-min_value) * 0.05, int(max_value * 0.85), '$RMSE=%.2f$' % RMSE)\n",
" plt.text(min_value-5 + (max_value-min_value) * 0.05, int(max_value * 0.80), '$MAE=%.2f$' % MAE)\n",
" plt.text(min_value-5 + (max_value-min_value) * 0.05, int(max_value * 0.75), '$MAPE=%.2f$%%' % mape)\n",
" plt.xlim(min_value-5,max_value+5) # 设置x坐标轴的显示范围\n",
" plt.ylim(min_value-5,max_value+5) # 设置y坐标轴的显示范围\n",
" # file_name = name.split('(')[0].strip()\n",
" plt.savefig(f'./figure/{name}.png',dpi=800,bbox_inches='tight',pad_inches=0)\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"==========算法评价指标==========\n",
"Explained Variance(EV): 0.940\n",
"Mean Absolute Error(MAE): 14.998\n",
"Mean squared error(MSE): 736.604\n",
"Root Mean Squard Error(RMSE): 27.140\n",
"mean_absolute_percentage_error(MAPE): 9.027\n",
"R_squared: 0.940\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAED0AAAvZCAYAAAA/RqhMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAD2EAAA9hAHVrK90AAEAAElEQVR4nOzdeXhddZ0/8M9J2lRKsK0KNUorS3HEILbSgqkbCbKVsc4IwYVUx6FuyAgWfuoMVYGqozJjF0dAW0bLKI65Ni3alhYVMK0lVQcdNQZbg+KAQkKRNXThcn5/FCuXpslNcm9ultfrefI8nM93e5+cwtOWnM9N0jQNAAAAAAAAAAAAAAAAAAAAgMFWVuoAAAAAAAAAAAAAAAAAAAAAwOik6QEAAAAAAAAAAAAAAAAAAABQEpoeAAAAAAAAAAAAAAAAAAAAACWh6QEAAAAAAAAAAAAAAAAAAABQEpoeAAAAAAAAAAAAAAAAAAAAACWh6QEAAAAAAAAAAAAAAAAAAABQEpoeAAAAAAAAAAAAAAAAAAAAACWh6QEAAAAAAAAAAAAAAAAAAABQEpoeAAAAAAAAAAAAAAAAAAAAACWh6QEAAAAAAAAAAAAAAAAAAABQEpoeAAAAAAAAAAAAAAAAAAAAACWh6QEAAAAAAAAAAAAAAAAAAABQEpoeAAAAAAAAAAAAAAAAAAAAACWh6QEAAAAAAAAAAAAAAAAAAABQEpoeAAAAAAAAAAAAAAAAAAAAACWh6QEAAAAAAAAAAAAAAAAAAABQEpoeAAAAAAAAAAAAAAAAAAAAACWh6QEAAAAAAAAAAAAAAAAAAABQEpoeAAAAAAAAAAAAAAAAAAAAACWh6QEAAAAAAAAAAAAAAAAAAABQEpoeAAAAAAAAAAAAAAAAAAAAACWh6QEAAAAAAAAAAAAAAAAAAABQEpoeAAAAAAAAAAAAAAAAAAAAACWh6QEAAAAAAAAAAAAAAAAAAABQEpoeAAAAAAAAAAAAAAAAAAAAACWh6QEAAAAAAAAAAAAAAAAAAABQEpoeAAAAAAAAAAAAAAAAAAAAACWh6QEAAAAAAAAAAAAAAAAAAABQEpoeAAAAAAAAAAAAAAAAAAAAACWh6QEAAAAAAAAAAAAAAAAAAABQEpoeAAAAAAAAAAAAAAAAAAAAACWh6QEAAAAAAAAAAAAAAAAA0K0kSbyHCAAUld9sAAAAAAAAAAAAAAAAAAD7SZKkPCLeX+ocAMDIpukBAAAAAAAAAAAAAAAAANCd10fEhUmSJKUOAgCMXJoeAAAAAAAAAAAAAAAAAADdOTcijo2I6lIHAQBGLk0PAAAAAAAAAAAAAAAAAIAcSZKUR8Rbnr6sL2UWAGBkS9I0LXUGAEa4JEmqIuJvn1W+KyK6ShAHAAAAAAAAAAAAAAAY2sZHxFHPqq1N0/RPpQgzWiVJUhsRtzx92RYR1akXEgGAIhhT6gAAjAp/GxFfKXUIAAAAAAAAAAAAAABg2HpvRCwvdYhR5txn/POxEVEdEb8qURYAYAQrK3UAAAAAAAAAAAAAAAAAAGDoSJKkPCLe8qxyfSmyAAAjn6YHAAAAAAAAAAAAAAAAAFAa6VD8uuWWW56MiMOeGfTYY4/9RJqmJc92gC8AYBjT9AAAAAAAAAAAAAAAAAAA2KexsXG/WltbW7S2tpYgDQAw0o0pdQAARoW7nl348pe/HK94xStKkQUAAAAAAAAAACiAe37/51j9rV/EQw929Wt9RUV5nPamY2PGiVMKnAwAABgO/vCHP8SCBQvij3/8Y75L9ns3geLIZrPR1NTU7Vgmk4njjjtukBMBACNdkqZpqTMAMMIlSVITEVueWduyZUvU1NSUKBEAAAAAAAAAADAQP//JPfEfn/9h7Nnz1ID3elP9cXH2O6ZHkiQFSAYAAAwHW7Zsiblz58aOHTv6smx2mqa3FytTCQ25F/xuvfXWqKur63bs2GOPjdbW1qH4Z7ghFwgAyF9ZqQMAAAAAAAAAAAAAMHz8pvX++OLnCtPwICLiu5lfxfrVvy7IXgAAwNC3atWqqKur62vDAwZRY2PjAcfa2tqitbV1ENMAAKOBpgcAAAAAAAAAAAAA5KXr8d1x7Rc2x5NPFqbhwV9kvv6zaN/2QEH3BAAAhpY0TWPx4sVRX18fu3btKnUcDiCbzUZTU1OPczKZzCClAQBGC00PAAAAAAAAAAAAAMjLf3/tf+LBHV0F3zd9Ko0Vy7YUvJkCAAAwNGSz2bj44otjwYIFkaZpqePQg+bm5ujo6OhxTiaT8RwBgILS9AAAAAAAAAAAAACAXnXe/1g0/6C9aPv/8Z6HY+um3xdtfwAAoDS6urrinHPOiWXLluW95sgjjyxiInrS2Ni4X+21UZVz3dbWFq2trYMVCQAYBTQ9AAAAAAAAAAAAAKBXt968LdKnivtJnj/Y8Jui7g8AAAyujo6OqKurizVr1uS9pq6uLq699triheKAstlsNDU15dSmxYR4Yxy+39xMJjNYsQCAUUDTAwAAAAAAAAAAAAB61dL8+6Kf0f6bB6LjvkeLfg4AAFB827Zti5qamti6dWvea+bNmxc33XRTHHLIIUVMxoE0NzdHR0dHTm1WHBZTojImx/iceiaTiTQtbmM8AGD00PQAAAAAAAAAAAAAgB499GBX7Oh8fFDOat/2wKCcAwAAFM+WLVti9uzZcdddd+W95uMf/3isXLkyKioqipiMnjQ2Nu5XmxmHRZIkMSsOzam3tbVFa2vrYEUDAEY4TQ8AAAAAAAAAAAAA6NHdd/15EM96cNDOAgAACm/VqlVRV1cXO3bsyGt+eXl5rFixIq688spIkqTI6TiQbDYbTU1NObVpMSEmJeMiYm/zg2fLZDKDkg0AGPk0PQAAAAAAAAAAAACgRw/9uWsQz3pi0M4CAAAKJ03TWLx4cdTX18euXbvyWlNZWRnr1q2L888/v8jp6E1zc3N0dHTk1GY9o9HBlKiMyTE+ZzyTyUSapoOSDwAY2TQ9AAAAAAAAAAAAAKBHTz01eC+xDOZZAABAYWSz2bj44otjwYIFeb8EX1VVFZs2bYrTTz+9yOnIR2Nj4361mc9oepAkScyKQ3PG29raorW1tejZAICRT9MDAAAAAAAAAAAAAHo07jljBu+scYN3FgAAMHBdXV1xzjnnxLJly/JeU11dHS0tLTF9+vTiBSNv2Ww2mpqacmrTYkJMSsbl1J7ZBOEvMplMUbMBAKODpgcAAAAAAAAAAAAA9OhFh08YtLNePGXwzgIAAAamo6Mj6urqYs2aNXmvqauri82bN8fUqVOLF4w+aW5ujo6OjpzarG4aHEyJypgc43NqmUwm0jQtaj4AYOTT9AAAAAAAAAAAAACAHh0+dWKMGTM4P3Z6xNHPH5RzAACAgdm2bVvU1NTE1q1b814zb968uOmmm2LixInFC0afNTY27leb2U3TgyRJYlYcmlNra2uL1tbWomUDAEYHTQ8AAAAAAAAAAAAA6NGYseVx7PEvLPo548ePjaNe+oKinwMAAAzMli1bYvbs2XHXXXflvebjH/94rFy5MioqKoqYjL7KZrPR1NSUU5sWE2JSMq7b+d01Q8hkMkXJBgCMHpoeAAAAAAAAAAAAANCr2tOOKfoZr607Oioqyot+DgAA0H+rVq2Kurq62LFjR17zy8vLY8WKFXHllVdGkiRFTkdfNTc3R0dHR05tVjeNDf5iSlTG5BifU8tkMpGmaVHyAQCjg6YHAAAAAAAAAAAAAPRq+qzD47AXHlK0/cvKkjhlzt8UbX8AAGBg0jSNxYsXR319fezatSuvNZWVlbFu3bo4//zzi5yO/mpsbNyvNrOHpgdJksSsODSn1tbWFq2trQXPBgCMHpoeAAAAAAAAAAAAANCr8vKyeNf7Tyza/medXR0vfNFzi7Y/AADQf9lsNi6++OJYsGBBpGma15qqqqrYtGlTnH766UVOR39ls9loamrKqU2LCTEpGdfjuu6aImQymYJmAwBGF00PAAAAAAAAAAAAAMjLcdNfFHVnvLTg+045YlK8+dzjC74vAAAwcF1dXXHOOefEsmXL8l5TXV0
"text/plain": [
"<Figure size 4800x3600 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"scatter_out_1(x, y, label='coal cost(t)', name='coal')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.7.13 ('prophet')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.13"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "ed1c83680db7a5ebb7db00c91c7d3360fad193026f64add4a22e8b08f0c65865"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}