919 lines
27 KiB
Plaintext
919 lines
27 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 1,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import numpy as np"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 2,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"(52583, 54)"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 2,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"data = pd.read_csv('./data/ori_data.csv')\n",
|
|||
|
"data.shape"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 3,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>date</th>\n",
|
|||
|
" <th>PM2.5</th>\n",
|
|||
|
" <th>PM10</th>\n",
|
|||
|
" <th>SO2</th>\n",
|
|||
|
" <th>NO2</th>\n",
|
|||
|
" <th>O3</th>\n",
|
|||
|
" <th>CO</th>\n",
|
|||
|
" <th>Ox</th>\n",
|
|||
|
" <th>wind-U</th>\n",
|
|||
|
" <th>wind-V</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>VOC_power</th>\n",
|
|||
|
" <th>VOC_agricultural</th>\n",
|
|||
|
" <th>PM2.5_industrial</th>\n",
|
|||
|
" <th>PM2.5_transportation</th>\n",
|
|||
|
" <th>PM2.5_resdient</th>\n",
|
|||
|
" <th>PM2.5_power</th>\n",
|
|||
|
" <th>PM2.5_agricultural</th>\n",
|
|||
|
" <th>CO_Bio</th>\n",
|
|||
|
" <th>VOCs_Bio</th>\n",
|
|||
|
" <th>pre_time</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>2015-01-02 01:00:00</td>\n",
|
|||
|
" <td>136.0</td>\n",
|
|||
|
" <td>214.0</td>\n",
|
|||
|
" <td>317.0</td>\n",
|
|||
|
" <td>38.0</td>\n",
|
|||
|
" <td>8.0</td>\n",
|
|||
|
" <td>3.71</td>\n",
|
|||
|
" <td>46.0</td>\n",
|
|||
|
" <td>0.831775</td>\n",
|
|||
|
" <td>-0.555113</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.037724</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.926851</td>\n",
|
|||
|
" <td>0.077715</td>\n",
|
|||
|
" <td>0.827110</td>\n",
|
|||
|
" <td>0.436028</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.081546</td>\n",
|
|||
|
" <td>4.217706</td>\n",
|
|||
|
" <td>2015-01-02 00:00:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>2015-01-02 02:00:00</td>\n",
|
|||
|
" <td>114.0</td>\n",
|
|||
|
" <td>176.0</td>\n",
|
|||
|
" <td>305.0</td>\n",
|
|||
|
" <td>38.0</td>\n",
|
|||
|
" <td>8.0</td>\n",
|
|||
|
" <td>3.55</td>\n",
|
|||
|
" <td>46.0</td>\n",
|
|||
|
" <td>-0.695011</td>\n",
|
|||
|
" <td>-0.083426</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.036215</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.926851</td>\n",
|
|||
|
" <td>0.081248</td>\n",
|
|||
|
" <td>0.827110</td>\n",
|
|||
|
" <td>0.418587</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.080031</td>\n",
|
|||
|
" <td>4.119807</td>\n",
|
|||
|
" <td>2015-01-02 01:00:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>2015-01-02 03:00:00</td>\n",
|
|||
|
" <td>97.0</td>\n",
|
|||
|
" <td>154.0</td>\n",
|
|||
|
" <td>306.0</td>\n",
|
|||
|
" <td>37.0</td>\n",
|
|||
|
" <td>7.0</td>\n",
|
|||
|
" <td>3.51</td>\n",
|
|||
|
" <td>44.0</td>\n",
|
|||
|
" <td>-0.173311</td>\n",
|
|||
|
" <td>0.469003</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.035712</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.926851</td>\n",
|
|||
|
" <td>0.088313</td>\n",
|
|||
|
" <td>0.827110</td>\n",
|
|||
|
" <td>0.412773</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.077761</td>\n",
|
|||
|
" <td>3.973464</td>\n",
|
|||
|
" <td>2015-01-02 02:00:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>2015-01-02 04:00:00</td>\n",
|
|||
|
" <td>87.0</td>\n",
|
|||
|
" <td>141.0</td>\n",
|
|||
|
" <td>316.0</td>\n",
|
|||
|
" <td>38.0</td>\n",
|
|||
|
" <td>7.0</td>\n",
|
|||
|
" <td>3.55</td>\n",
|
|||
|
" <td>45.0</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>-0.200000</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.036718</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.926851</td>\n",
|
|||
|
" <td>0.091256</td>\n",
|
|||
|
" <td>0.827110</td>\n",
|
|||
|
" <td>0.424400</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.076766</td>\n",
|
|||
|
" <td>3.909235</td>\n",
|
|||
|
" <td>2015-01-02 03:00:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>2015-01-02 05:00:00</td>\n",
|
|||
|
" <td>85.0</td>\n",
|
|||
|
" <td>139.0</td>\n",
|
|||
|
" <td>292.0</td>\n",
|
|||
|
" <td>37.0</td>\n",
|
|||
|
" <td>7.0</td>\n",
|
|||
|
" <td>3.62</td>\n",
|
|||
|
" <td>44.0</td>\n",
|
|||
|
" <td>1.234518</td>\n",
|
|||
|
" <td>0.660276</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.039736</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.926851</td>\n",
|
|||
|
" <td>0.092434</td>\n",
|
|||
|
" <td>1.746121</td>\n",
|
|||
|
" <td>0.459282</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.077119</td>\n",
|
|||
|
" <td>3.930702</td>\n",
|
|||
|
" <td>2015-01-02 04:00:00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>5 rows × 54 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" date PM2.5 PM10 SO2 NO2 O3 CO Ox wind-U \\\n",
|
|||
|
"0 2015-01-02 01:00:00 136.0 214.0 317.0 38.0 8.0 3.71 46.0 0.831775 \n",
|
|||
|
"1 2015-01-02 02:00:00 114.0 176.0 305.0 38.0 8.0 3.55 46.0 -0.695011 \n",
|
|||
|
"2 2015-01-02 03:00:00 97.0 154.0 306.0 37.0 7.0 3.51 44.0 -0.173311 \n",
|
|||
|
"3 2015-01-02 04:00:00 87.0 141.0 316.0 38.0 7.0 3.55 45.0 0.000000 \n",
|
|||
|
"4 2015-01-02 05:00:00 85.0 139.0 292.0 37.0 7.0 3.62 44.0 1.234518 \n",
|
|||
|
"\n",
|
|||
|
" wind-V ... VOC_power VOC_agricultural PM2.5_industrial \\\n",
|
|||
|
"0 -0.555113 ... 0.037724 0.0 0.926851 \n",
|
|||
|
"1 -0.083426 ... 0.036215 0.0 0.926851 \n",
|
|||
|
"2 0.469003 ... 0.035712 0.0 0.926851 \n",
|
|||
|
"3 -0.200000 ... 0.036718 0.0 0.926851 \n",
|
|||
|
"4 0.660276 ... 0.039736 0.0 0.926851 \n",
|
|||
|
"\n",
|
|||
|
" PM2.5_transportation PM2.5_resdient PM2.5_power PM2.5_agricultural \\\n",
|
|||
|
"0 0.077715 0.827110 0.436028 0.0 \n",
|
|||
|
"1 0.081248 0.827110 0.418587 0.0 \n",
|
|||
|
"2 0.088313 0.827110 0.412773 0.0 \n",
|
|||
|
"3 0.091256 0.827110 0.424400 0.0 \n",
|
|||
|
"4 0.092434 1.746121 0.459282 0.0 \n",
|
|||
|
"\n",
|
|||
|
" CO_Bio VOCs_Bio pre_time \n",
|
|||
|
"0 0.081546 4.217706 2015-01-02 00:00:00 \n",
|
|||
|
"1 0.080031 4.119807 2015-01-02 01:00:00 \n",
|
|||
|
"2 0.077761 3.973464 2015-01-02 02:00:00 \n",
|
|||
|
"3 0.076766 3.909235 2015-01-02 03:00:00 \n",
|
|||
|
"4 0.077119 3.930702 2015-01-02 04:00:00 \n",
|
|||
|
"\n",
|
|||
|
"[5 rows x 54 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 3,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"data.head()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 4,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"data.date = pd.to_datetime(data.date)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 5,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import datetime as dt"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 6,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"def get_lookdays(x):\n",
|
|||
|
" start = x - dt.timedelta(hours=24)\n",
|
|||
|
" end = x - dt.timedelta(hours=1)\n",
|
|||
|
" period = pd.date_range(start, end, freq='H')\n",
|
|||
|
" return [dt.datetime.strftime(x, '%Y-%m-%d %H:%M:%S') for x in period.tolist()]\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 8,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"date_range = pd.date_range(data.date.min(), data.date.max(), freq='H')\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 9,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"['PM2.5', 'PM10', 'SO2', 'NO2', 'O3', 'CO']"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 9,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"out_cols = data.columns[1:7].tolist()\n",
|
|||
|
"out_cols"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 10,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"data['day'] = data.date.apply(lambda x: dt.datetime.strftime(x, '%Y-%m-%d'))\n",
|
|||
|
"na_counts = data.set_index('day')[out_cols].isna().groupby('day').sum()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"drop_days = na_counts[na_counts.SO2>5].index.values"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 12,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"data = data.set_index('date').interpolate(method='linear')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 13,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"data = data.reset_index()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 14,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"data = data[~data.date.isin(drop_days)].copy()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 16,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"data = data.set_index('date').reindex(date_range)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 19,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"data.reset_index(inplace=True)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 22,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"data.rename(columns={'index':'date'}, inplace=True)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 25,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"data['lookdays'] = data.date.apply(get_lookdays)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 26,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"data['features'] = data.lookdays.apply(lambda x: data[data.date.isin(x)][out_cols].values.reshape(-1,).tolist())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 27,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"data['feature_len'] = data.features.apply(lambda x: len(x))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 28,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"save_data = data[data.feature_len >=144].copy()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 29,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"pre_cols = list()\n",
|
|||
|
"for i in range(24, 0, -1):\n",
|
|||
|
" for j in out_cols:\n",
|
|||
|
" pre_cols.append(f\"{i}_{j}\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 30,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"previous_out = save_data.features.apply(pd.Series, index=pre_cols)\n",
|
|||
|
"previous_out['date'] = save_data.date.values"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 31,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>date</th>\n",
|
|||
|
" <th>PM2.5</th>\n",
|
|||
|
" <th>PM10</th>\n",
|
|||
|
" <th>SO2</th>\n",
|
|||
|
" <th>NO2</th>\n",
|
|||
|
" <th>O3</th>\n",
|
|||
|
" <th>CO</th>\n",
|
|||
|
" <th>Ox</th>\n",
|
|||
|
" <th>wind-U</th>\n",
|
|||
|
" <th>wind-V</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>2_SO2</th>\n",
|
|||
|
" <th>2_NO2</th>\n",
|
|||
|
" <th>2_O3</th>\n",
|
|||
|
" <th>2_CO</th>\n",
|
|||
|
" <th>1_PM2.5</th>\n",
|
|||
|
" <th>1_PM10</th>\n",
|
|||
|
" <th>1_SO2</th>\n",
|
|||
|
" <th>1_NO2</th>\n",
|
|||
|
" <th>1_O3</th>\n",
|
|||
|
" <th>1_CO</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>2015-01-02 01:00:00</td>\n",
|
|||
|
" <td>136.0</td>\n",
|
|||
|
" <td>214.0</td>\n",
|
|||
|
" <td>317.0</td>\n",
|
|||
|
" <td>38.0</td>\n",
|
|||
|
" <td>8.0</td>\n",
|
|||
|
" <td>3.71</td>\n",
|
|||
|
" <td>46.0</td>\n",
|
|||
|
" <td>0.831775</td>\n",
|
|||
|
" <td>-0.555113</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>2015-01-02 02:00:00</td>\n",
|
|||
|
" <td>114.0</td>\n",
|
|||
|
" <td>176.0</td>\n",
|
|||
|
" <td>305.0</td>\n",
|
|||
|
" <td>38.0</td>\n",
|
|||
|
" <td>8.0</td>\n",
|
|||
|
" <td>3.55</td>\n",
|
|||
|
" <td>46.0</td>\n",
|
|||
|
" <td>-0.695011</td>\n",
|
|||
|
" <td>-0.083426</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>2015-01-02 03:00:00</td>\n",
|
|||
|
" <td>97.0</td>\n",
|
|||
|
" <td>154.0</td>\n",
|
|||
|
" <td>306.0</td>\n",
|
|||
|
" <td>37.0</td>\n",
|
|||
|
" <td>7.0</td>\n",
|
|||
|
" <td>3.51</td>\n",
|
|||
|
" <td>44.0</td>\n",
|
|||
|
" <td>-0.173311</td>\n",
|
|||
|
" <td>0.469003</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>2015-01-02 04:00:00</td>\n",
|
|||
|
" <td>87.0</td>\n",
|
|||
|
" <td>141.0</td>\n",
|
|||
|
" <td>316.0</td>\n",
|
|||
|
" <td>38.0</td>\n",
|
|||
|
" <td>7.0</td>\n",
|
|||
|
" <td>3.55</td>\n",
|
|||
|
" <td>45.0</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>-0.200000</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>2015-01-02 05:00:00</td>\n",
|
|||
|
" <td>85.0</td>\n",
|
|||
|
" <td>139.0</td>\n",
|
|||
|
" <td>292.0</td>\n",
|
|||
|
" <td>37.0</td>\n",
|
|||
|
" <td>7.0</td>\n",
|
|||
|
" <td>3.62</td>\n",
|
|||
|
" <td>44.0</td>\n",
|
|||
|
" <td>1.234518</td>\n",
|
|||
|
" <td>0.660276</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>52578</th>\n",
|
|||
|
" <td>2020-12-31 19:00:00</td>\n",
|
|||
|
" <td>27.0</td>\n",
|
|||
|
" <td>51.0</td>\n",
|
|||
|
" <td>16.0</td>\n",
|
|||
|
" <td>46.0</td>\n",
|
|||
|
" <td>29.0</td>\n",
|
|||
|
" <td>0.72</td>\n",
|
|||
|
" <td>75.0</td>\n",
|
|||
|
" <td>1.067581</td>\n",
|
|||
|
" <td>-0.265087</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>25.0</td>\n",
|
|||
|
" <td>34.0</td>\n",
|
|||
|
" <td>43.0</td>\n",
|
|||
|
" <td>0.75</td>\n",
|
|||
|
" <td>31.0</td>\n",
|
|||
|
" <td>59.0</td>\n",
|
|||
|
" <td>21.0</td>\n",
|
|||
|
" <td>47.0</td>\n",
|
|||
|
" <td>29.0</td>\n",
|
|||
|
" <td>0.91</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>52579</th>\n",
|
|||
|
" <td>2020-12-31 20:00:00</td>\n",
|
|||
|
" <td>26.0</td>\n",
|
|||
|
" <td>51.0</td>\n",
|
|||
|
" <td>12.0</td>\n",
|
|||
|
" <td>47.0</td>\n",
|
|||
|
" <td>26.0</td>\n",
|
|||
|
" <td>0.83</td>\n",
|
|||
|
" <td>73.0</td>\n",
|
|||
|
" <td>0.029164</td>\n",
|
|||
|
" <td>0.298579</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>21.0</td>\n",
|
|||
|
" <td>47.0</td>\n",
|
|||
|
" <td>29.0</td>\n",
|
|||
|
" <td>0.91</td>\n",
|
|||
|
" <td>27.0</td>\n",
|
|||
|
" <td>51.0</td>\n",
|
|||
|
" <td>16.0</td>\n",
|
|||
|
" <td>46.0</td>\n",
|
|||
|
" <td>29.0</td>\n",
|
|||
|
" <td>0.72</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>52580</th>\n",
|
|||
|
" <td>2020-12-31 21:00:00</td>\n",
|
|||
|
" <td>29.0</td>\n",
|
|||
|
" <td>58.0</td>\n",
|
|||
|
" <td>16.0</td>\n",
|
|||
|
" <td>48.0</td>\n",
|
|||
|
" <td>25.0</td>\n",
|
|||
|
" <td>1.15</td>\n",
|
|||
|
" <td>73.0</td>\n",
|
|||
|
" <td>-0.079532</td>\n",
|
|||
|
" <td>0.896479</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>16.0</td>\n",
|
|||
|
" <td>46.0</td>\n",
|
|||
|
" <td>29.0</td>\n",
|
|||
|
" <td>0.72</td>\n",
|
|||
|
" <td>26.0</td>\n",
|
|||
|
" <td>51.0</td>\n",
|
|||
|
" <td>12.0</td>\n",
|
|||
|
" <td>47.0</td>\n",
|
|||
|
" <td>26.0</td>\n",
|
|||
|
" <td>0.83</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>52581</th>\n",
|
|||
|
" <td>2020-12-31 22:00:00</td>\n",
|
|||
|
" <td>32.0</td>\n",
|
|||
|
" <td>60.0</td>\n",
|
|||
|
" <td>23.0</td>\n",
|
|||
|
" <td>49.0</td>\n",
|
|||
|
" <td>20.0</td>\n",
|
|||
|
" <td>0.90</td>\n",
|
|||
|
" <td>69.0</td>\n",
|
|||
|
" <td>-1.660193</td>\n",
|
|||
|
" <td>0.365729</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>12.0</td>\n",
|
|||
|
" <td>47.0</td>\n",
|
|||
|
" <td>26.0</td>\n",
|
|||
|
" <td>0.83</td>\n",
|
|||
|
" <td>29.0</td>\n",
|
|||
|
" <td>58.0</td>\n",
|
|||
|
" <td>16.0</td>\n",
|
|||
|
" <td>48.0</td>\n",
|
|||
|
" <td>25.0</td>\n",
|
|||
|
" <td>1.15</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>52582</th>\n",
|
|||
|
" <td>2020-12-31 23:00:00</td>\n",
|
|||
|
" <td>53.0</td>\n",
|
|||
|
" <td>94.0</td>\n",
|
|||
|
" <td>41.0</td>\n",
|
|||
|
" <td>57.0</td>\n",
|
|||
|
" <td>12.0</td>\n",
|
|||
|
" <td>1.18</td>\n",
|
|||
|
" <td>69.0</td>\n",
|
|||
|
" <td>-0.106042</td>\n",
|
|||
|
" <td>1.195305</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>16.0</td>\n",
|
|||
|
" <td>48.0</td>\n",
|
|||
|
" <td>25.0</td>\n",
|
|||
|
" <td>1.15</td>\n",
|
|||
|
" <td>32.0</td>\n",
|
|||
|
" <td>60.0</td>\n",
|
|||
|
" <td>23.0</td>\n",
|
|||
|
" <td>49.0</td>\n",
|
|||
|
" <td>20.0</td>\n",
|
|||
|
" <td>0.90</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>52583 rows × 200 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" date PM2.5 PM10 SO2 NO2 O3 CO Ox \\\n",
|
|||
|
"0 2015-01-02 01:00:00 136.0 214.0 317.0 38.0 8.0 3.71 46.0 \n",
|
|||
|
"1 2015-01-02 02:00:00 114.0 176.0 305.0 38.0 8.0 3.55 46.0 \n",
|
|||
|
"2 2015-01-02 03:00:00 97.0 154.0 306.0 37.0 7.0 3.51 44.0 \n",
|
|||
|
"3 2015-01-02 04:00:00 87.0 141.0 316.0 38.0 7.0 3.55 45.0 \n",
|
|||
|
"4 2015-01-02 05:00:00 85.0 139.0 292.0 37.0 7.0 3.62 44.0 \n",
|
|||
|
"... ... ... ... ... ... ... ... ... \n",
|
|||
|
"52578 2020-12-31 19:00:00 27.0 51.0 16.0 46.0 29.0 0.72 75.0 \n",
|
|||
|
"52579 2020-12-31 20:00:00 26.0 51.0 12.0 47.0 26.0 0.83 73.0 \n",
|
|||
|
"52580 2020-12-31 21:00:00 29.0 58.0 16.0 48.0 25.0 1.15 73.0 \n",
|
|||
|
"52581 2020-12-31 22:00:00 32.0 60.0 23.0 49.0 20.0 0.90 69.0 \n",
|
|||
|
"52582 2020-12-31 23:00:00 53.0 94.0 41.0 57.0 12.0 1.18 69.0 \n",
|
|||
|
"\n",
|
|||
|
" wind-U wind-V ... 2_SO2 2_NO2 2_O3 2_CO 1_PM2.5 1_PM10 \\\n",
|
|||
|
"0 0.831775 -0.555113 ... NaN NaN NaN NaN NaN NaN \n",
|
|||
|
"1 -0.695011 -0.083426 ... NaN NaN NaN NaN NaN NaN \n",
|
|||
|
"2 -0.173311 0.469003 ... NaN NaN NaN NaN NaN NaN \n",
|
|||
|
"3 0.000000 -0.200000 ... NaN NaN NaN NaN NaN NaN \n",
|
|||
|
"4 1.234518 0.660276 ... NaN NaN NaN NaN NaN NaN \n",
|
|||
|
"... ... ... ... ... ... ... ... ... ... \n",
|
|||
|
"52578 1.067581 -0.265087 ... 25.0 34.0 43.0 0.75 31.0 59.0 \n",
|
|||
|
"52579 0.029164 0.298579 ... 21.0 47.0 29.0 0.91 27.0 51.0 \n",
|
|||
|
"52580 -0.079532 0.896479 ... 16.0 46.0 29.0 0.72 26.0 51.0 \n",
|
|||
|
"52581 -1.660193 0.365729 ... 12.0 47.0 26.0 0.83 29.0 58.0 \n",
|
|||
|
"52582 -0.106042 1.195305 ... 16.0 48.0 25.0 1.15 32.0 60.0 \n",
|
|||
|
"\n",
|
|||
|
" 1_SO2 1_NO2 1_O3 1_CO \n",
|
|||
|
"0 NaN NaN NaN NaN \n",
|
|||
|
"1 NaN NaN NaN NaN \n",
|
|||
|
"2 NaN NaN NaN NaN \n",
|
|||
|
"3 NaN NaN NaN NaN \n",
|
|||
|
"4 NaN NaN NaN NaN \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"52578 21.0 47.0 29.0 0.91 \n",
|
|||
|
"52579 16.0 46.0 29.0 0.72 \n",
|
|||
|
"52580 12.0 47.0 26.0 0.83 \n",
|
|||
|
"52581 16.0 48.0 25.0 1.15 \n",
|
|||
|
"52582 23.0 49.0 20.0 0.90 \n",
|
|||
|
"\n",
|
|||
|
"[52583 rows x 200 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 31,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"data.drop(columns=['features', 'feature_len']).merge(previous_out, on='date', how='left')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 34,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"new_data = previous_out.merge(data.drop(columns=['features', 'feature_len', 'lookdays', 'pre_time']))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 42,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"drop_cols = [x for x in new_data.columns if 'agricultural' in x] + ['NH3_power'] + ['CO_Bio', 'VOCs_Bio']\n",
|
|||
|
"drop_cols.remove('NH3_agricultural')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 48,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"new_data.drop(columns=drop_cols, inplace=True)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 44,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"new_data.dropna(inplace=True)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 45,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"new_data.drop(columns=['day'], inplace=True)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 49,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"(49014, 188)"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 49,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"new_data.shape"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 51,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"new_data.set_index('date').to_csv('new_train_data.csv', encoding='utf-8-sig')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": []
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "py37",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.7.13"
|
|||
|
},
|
|||
|
"orig_nbformat": 4
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|