22-T67/数据预处理+特征工程.ipynb

991 lines
1.3 MiB
Plaintext
Raw Permalink Normal View History

2023-03-30 10:25:44 +08:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"import matplotlib.pyplot as plt\n",
"#新增加的两行\n",
"from pylab import mpl\n",
"# 设置显示中文字体\n",
"mpl.rcParams[\"font.sans-serif\"] = [\"SimHei\"]\n",
"\n",
"mpl.rcParams[\"axes.unicode_minus\"] = False"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>date</th>\n",
" <th>PM2.5</th>\n",
" <th>PM10</th>\n",
" <th>SO2</th>\n",
" <th>NO2</th>\n",
" <th>O3</th>\n",
" <th>O3_8h</th>\n",
" <th>CO</th>\n",
" <th>Ox</th>\n",
" <th>wind-U</th>\n",
" <th>...</th>\n",
" <th>VOC_resdient</th>\n",
" <th>VOC_power</th>\n",
" <th>VOC_agricultural</th>\n",
" <th>PM2.5_industrial</th>\n",
" <th>PM2.5_transportation</th>\n",
" <th>PM2.5_resdient</th>\n",
" <th>PM2.5_power</th>\n",
" <th>PM2.5_agricultural</th>\n",
" <th>CO_Bio</th>\n",
" <th>VOCs_Bio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2015-01-02 01:00:00</td>\n",
" <td>136.0</td>\n",
" <td>214.0</td>\n",
" <td>317.0</td>\n",
" <td>38.0</td>\n",
" <td>8.0</td>\n",
" <td>9.0</td>\n",
" <td>3.71</td>\n",
" <td>46</td>\n",
" <td>0.831775</td>\n",
" <td>...</td>\n",
" <td>0.937173</td>\n",
" <td>0.037724</td>\n",
" <td>0</td>\n",
" <td>0.926851</td>\n",
" <td>0.077715</td>\n",
" <td>0.827110</td>\n",
" <td>0.436028</td>\n",
" <td>0</td>\n",
" <td>0.081546</td>\n",
" <td>4.217706</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2015-01-02 02:00:00</td>\n",
" <td>114.0</td>\n",
" <td>176.0</td>\n",
" <td>305.0</td>\n",
" <td>38.0</td>\n",
" <td>8.0</td>\n",
" <td>9.0</td>\n",
" <td>3.55</td>\n",
" <td>46</td>\n",
" <td>-0.695011</td>\n",
" <td>...</td>\n",
" <td>0.937173</td>\n",
" <td>0.036215</td>\n",
" <td>0</td>\n",
" <td>0.926851</td>\n",
" <td>0.081248</td>\n",
" <td>0.827110</td>\n",
" <td>0.418587</td>\n",
" <td>0</td>\n",
" <td>0.080031</td>\n",
" <td>4.119807</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2015-01-02 03:00:00</td>\n",
" <td>97.0</td>\n",
" <td>154.0</td>\n",
" <td>306.0</td>\n",
" <td>37.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" <td>3.51</td>\n",
" <td>44</td>\n",
" <td>-0.173311</td>\n",
" <td>...</td>\n",
" <td>0.937173</td>\n",
" <td>0.035712</td>\n",
" <td>0</td>\n",
" <td>0.926851</td>\n",
" <td>0.088313</td>\n",
" <td>0.827110</td>\n",
" <td>0.412773</td>\n",
" <td>0</td>\n",
" <td>0.077761</td>\n",
" <td>3.973464</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2015-01-02 04:00:00</td>\n",
" <td>87.0</td>\n",
" <td>141.0</td>\n",
" <td>316.0</td>\n",
" <td>38.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" <td>3.55</td>\n",
" <td>45</td>\n",
" <td>0.000000</td>\n",
" <td>...</td>\n",
" <td>0.937173</td>\n",
" <td>0.036718</td>\n",
" <td>0</td>\n",
" <td>0.926851</td>\n",
" <td>0.091256</td>\n",
" <td>0.827110</td>\n",
" <td>0.424400</td>\n",
" <td>0</td>\n",
" <td>0.076766</td>\n",
" <td>3.909235</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2015-01-02 05:00:00</td>\n",
" <td>85.0</td>\n",
" <td>139.0</td>\n",
" <td>292.0</td>\n",
" <td>37.0</td>\n",
" <td>7.0</td>\n",
" <td>8.0</td>\n",
" <td>3.62</td>\n",
" <td>44</td>\n",
" <td>1.234518</td>\n",
" <td>...</td>\n",
" <td>1.978475</td>\n",
" <td>0.039736</td>\n",
" <td>0</td>\n",
" <td>0.926851</td>\n",
" <td>0.092434</td>\n",
" <td>1.746121</td>\n",
" <td>0.459282</td>\n",
" <td>0</td>\n",
" <td>0.077119</td>\n",
" <td>3.930702</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 54 columns</p>\n",
"</div>"
],
"text/plain": [
" date PM2.5 PM10 SO2 NO2 O3 O3_8h CO Ox \\\n",
"0 2015-01-02 01:00:00 136.0 214.0 317.0 38.0 8.0 9.0 3.71 46 \n",
"1 2015-01-02 02:00:00 114.0 176.0 305.0 38.0 8.0 9.0 3.55 46 \n",
"2 2015-01-02 03:00:00 97.0 154.0 306.0 37.0 7.0 8.0 3.51 44 \n",
"3 2015-01-02 04:00:00 87.0 141.0 316.0 38.0 7.0 8.0 3.55 45 \n",
"4 2015-01-02 05:00:00 85.0 139.0 292.0 37.0 7.0 8.0 3.62 44 \n",
"\n",
" wind-U ... VOC_resdient VOC_power VOC_agricultural PM2.5_industrial \\\n",
"0 0.831775 ... 0.937173 0.037724 0 0.926851 \n",
"1 -0.695011 ... 0.937173 0.036215 0 0.926851 \n",
"2 -0.173311 ... 0.937173 0.035712 0 0.926851 \n",
"3 0.000000 ... 0.937173 0.036718 0 0.926851 \n",
"4 1.234518 ... 1.978475 0.039736 0 0.926851 \n",
"\n",
" PM2.5_transportation PM2.5_resdient PM2.5_power PM2.5_agricultural \\\n",
"0 0.077715 0.827110 0.436028 0 \n",
"1 0.081248 0.827110 0.418587 0 \n",
"2 0.088313 0.827110 0.412773 0 \n",
"3 0.091256 0.827110 0.424400 0 \n",
"4 0.092434 1.746121 0.459282 0 \n",
"\n",
" CO_Bio VOCs_Bio \n",
"0 0.081546 4.217706 \n",
"1 0.080031 4.119807 \n",
"2 0.077761 3.973464 \n",
"3 0.076766 3.909235 \n",
"4 0.077119 3.930702 \n",
"\n",
"[5 rows x 54 columns]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data = pd.read_excel('./data/mod_merge_ssr&MEIC&BUGS.xlsx')\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"data.drop(columns='O3_8h', inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['PM2.5', 'PM10', 'SO2', 'NO2', 'O3', 'CO']"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"out_cols = data.columns[1:7].tolist()\n",
"out_cols\n"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"date_range = pd.date_range(start=data.date.min(), end=data.date.max(), freq='H')\n",
"data.date = pd.to_datetime(data.date)\n",
"data = data.set_index('date').reindex(date_range)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"import datetime as dt"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"打印输出列的分布"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.\n",
"findfont: Generic family 'sans-serif' not found because none of the following families were found: SimHei\n",
"findfont: Font family ['sans-serif'] not found. Falling back to DejaVu Sans.\n",
"findfont: Generic family 'sans-serif' not found because none of the following families were found: SimHei\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA3oAAAGPCAYAAAAKk6L7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAABJaUlEQVR4nO3dfbhlZXnn+e+vKUUiorwUNFZhimjFDnB1UGoIhnSGtmLElw6ko6acRCqdylTGwY5GO22RzLSmr2EG0q0kplsyRAyFQaEGNTIKKsE4jhkES4PyJqGUaiipUKUSxCQSq7znj/Uc2XVqn1NV55z9cvb5fq5rX3vte6+19r322efZ6157redJVSFJkiRJmhz/ZNQJSJIkSZIWloWeJEmSJE0YCz1JkiRJmjAWepIkSZI0YSz0JEmSJGnCWOhJkiRJ0oSx0JMkSZKkCWOhp4FKsj3JPyT5TpJHkvxJkiOTfDpJJfnxafP/WYuf0x6vT/KFJN9OsiPJ7yVZNsvrVZK/a6/3nSTvGewWSlrMFqCNOi3JJ5J8I8l+A9MmOSbJh1u79N+S/A/D2TJJkybJTyX5/5I8luRbSf4yyX/XnluZ5Jok32ztze1JXtmz7PFJPpDk4bb8Xyb5idFtjYbBQk/D8K+q6kjghcB/B/wvLf7XwAVTMyU5FjgL2N2z7A8BbwKOA34CWAv8uwO83o9X1ZHt9msLsgWSJtl82qjvAVuADTOs+78C/wicAPwScHmSUxc0e0kTL8lRwEeBPwSOAVYAvws8keQY4LN0bc2pdPtMlwHvT/Kqtoojgc8DZ7TlNwMfS3LkMLdDw2Whp6Gpqq8DNwGntdA1wC8mOaw9fi3wYbqGamqZy6vq/62qf2zLXwOcPcS0JS0Rc2yj7quqK4G7p68vydOBXwD+16r6TlV9FrgBeN3gtkLShPpRgKr6QFXtrap/qKpPVtWXgd8EvgNsqKq/ac99ALgYeEeSVNXXquqdVbWzLX8F8FTg+SPbIg2chZ6GJslJwMuBv2qhh4F7gJ9tjy8Arj7Aan6aPjtU03wmyd8k+VCSVXNMV9ISs0BtVK8fBfZW1V/3xL5Ed8Rdkg7FXwN7k2xO8rIkR/c89xLgg1X1/WnLbAGeQysSeyU5na7Q2zagfDUGLPQ0DH+W5G/pTiv4f4D/vee5q4ELkjwfeFZV3TrTSpL8G2AN8J9nea3/HlgF/DO6nbSPznZNnySxQG1UH0cCj02LPQY8Yx65SlqCqurbwE8BBfwxsDvJDUlOoDtVc2efxaZix/UG22mg7wN+t6qmt1GaIO4AaxjOr6o/7w0kmZr8EPAO4Jt0jU5fSc4HLgF+pqq+MdN8VfWZNvmPSd4IfBv4MeDOuSYvaeLNu42awXeAo6bFjgIen0OOkpa4qroX+BWAJP8M+FPg94FvACf2WWQq9oP9piRHAP838Lmq+j8GmK7GgL/oaaSq6u/prol5PTPsRCU5l+7o1b+qqkMt2ArIAeeSpD4Opo2axV8Dy5Ks7on9OAc+/VySZlVVXwGuorum+M+BX0gyfb/+NcBDdG0RSQ4H/gz4OvDrw8pVo2Ohp3Hw28B/X1Xbpz+R5MV0HSL8QlXdPttKkpya5PQkh7VepN5B15jdO4CcJS0ds7VRSfI0umtdSPK0tjNFVf0d3S+C/zHJ05OcDZzHoReMkpa4JP8syVuSrGyPT6LrIOpzdD1sHgVcmeSftnbotcDvAL9VVZXkKcD1wD8AF/S5nk8TyEJPI1dVD7fe6Pr5X4FnAjf2jI1309STSW5K8tvt4QnAdXSna36N7lq9V1bV9waXvaRJd4A26ofpdpymfqX7B+C+nuf/Z+AIYBfwAeD1VeUvepIO1eN0w0zdluTv6Aq8u4C3VNU36a7fexpdB1LfBN4MvK6qrmvL/yTwSrrOpf62Z5/qXwx5OzREqdpvfFdJkiRJ0iLmL3qSJEmSNGEs9CRJkiRpwljoSZIkSdKEsdCTJEmSpAljoSdJkiRJE2bZqBOYq+OOO65WrVo16jQkLaAvfOEL36iq5aPOYz5sm6TJZPskaRzN1jYt2kJv1apVbN26ddRpSFpASf7bqHOYL9smaTIdavuU5DBgK/D1qnplkmPoxnpdBWwHXlNVj7Z5LwI2AHuB36iqT7T4GcBVdGMx3gi8sQ1+fThwNXAG3Zhpv1hV2w+Uk+2TNHlma5s8dVOSJGnhvRG4t+fxJuCWqloN3NIek+QUYB1wKnAu8O5WJAJcDmwEVrfbuS2+AXi0qp4HXAZcOthNkbQYWehJkiQtoCQrgVcA7+kJnwdsbtObgfN74tdW1RNV9QCwDTgzyYnAUVV1a1UV3S945/dZ1/XA2iQZ0OZIWqQs9CRJkhbW7wP/Hvh+T+yEqtoJ0O6Pb/EVwEM98+1osRVtenp8n2Wqag/wGHDsgm6BpEXPQk+SJGmBJHklsKuqvnCwi/SJ1Szx2Zbpl8/GJFuTbN29e/dBpiRpEljoSZIkLZyzgZ9Lsh24Fnhxkj8FHmmnY9Lud7X5dwAn9Sy/Eni4xVf2ie+zTJJlwDOBb/VLpqquqKo1VbVm+fJF3WmopENkoSdJkrRAquqiqlpZVavoOln5VFX9MnADsL7Nth74SJu+AViX5PAkJ9N1unJ7O73z8SRntevvLpi2zNS6XtVeo+8vepKWrkU7vMKhWLXpYwNb9/ZLXjGwdUuafINqn2ybpLFzCbAlyQbgQeDVAFV1d5ItwD3AHuDCqtrblnk9Tw6vcFO7AVwJvC/JNrpf8tYtdLK2TdLityQKPUmSpGGrqk8Dn27T3wTWzjDfxcDFfeJbgdP6xL9LKxQlaSYHPHUzyUlJ/iLJvUnuTvLGFn97kq8nuaPdXt6zzEVJtiW5L8lLe+JnJLmzPfeuqa6A2+kK17X4bUlWDWBbJUmSJGlJOJhr9PYAb6mqHwPOAi5sg3sCXFZVp7fbjeDAn5IkSZI0agcs9KpqZ1V9sU0/DtzLk+O49OPAn5IkSZI0QofU62Y7pfIFwG0t9IYkX07y3iRHt5gDf0qSJEnSCB10oZfkSOCDwJuq6tt0p2E+Fzgd2Am8Y2rWPosvyMCfDvopSZIkSQd2UIVekqfQFXnXVNWHAKrqkaraW1XfB/4YOLPNPrCBPx30U5IkSZIO7GB63QzdeC33VtU7e+In9sz288BdbdqBPyVJkiRphA7mF72zgdcBL542lMLvtaESvgz8S+A3oRv4E5ga+PPj7D/w53voOmj5KvsO/HlsG/jzzcCmBdk6SRMrydOS3J7kS23ol99t8WOS3Jzk/nZ/dM8yDv0iSZKWhAMOmF5Vn6X/NXQ3zrKMA39KGrQngBdX1Xfa6eWfTXIT8K+BW6rqkiSb6A4cvXXa0C/PBv48yY+2A1FTQ798jq5tO5fuQNQPhn5Jso5u6JdfHO5mSpIkHbpD6nVTksZFdb7THj6l3Yp9h2vZzL7DuDj0iyRJWhIs9CQtWkkOS3IHsAu4uapuA05o1wTT7o9vszv0iyRJWjIs9CQtWq3n39PpevE9M8l+p4b3cOgXSZK0ZFjoSVr0qupvgU/TXVv3yFSvwO1+V5vNoV8kSdKSYaEnaVFKsjzJs9r0EcDPAF9h3+Fa1rPvMC4O/SJJkpaEA/a6KUlj6kRgc5LD6A5abamqjya5FdiSZAPwIK1H36q6O8nU0C972H/ol6uAI+h62+wd+uV9beiXb9H12ilJkjT2LPQkLUpV9WXgBX3i3wTWzrCMQ79IkqQlwVM3JUmSJGnCWOhJkiRJ0oSx0JMkSZKkCWOhJ0mSJEkTxkJPkiRJkiaMhZ4kSZIkTRgLPUmSJEmaMBZ6kiRJkjRhLPQkSZIkacJY6EmSJEnShLHQkyRJkqQJY6EnSZIkSRPGQk+SJEmSJswBC70kJyX5iyT3Jrk7yRtb/JgkNye5v90f3bPMRUm2JbkvyUt74mckubM9964kafHDk1zX4rclWTWAbZUkSZKkJeFgftHbA7ylqn4MOAu4MMkpwCbglqpaDdzSHtOeWwecCpwLvDvJYW1dlwMbgdXtdm6LbwAerarnAZcBly7AtkmaYLMchHp7kq8nuaP
"text/plain": [
"<Figure size 1080x720 with 6 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig = plt.figure(figsize=(15, 10))\n",
"for index, col in enumerate(out_cols):\n",
" try:\n",
" plt.subplot(3,3,index+1)\n",
" plt.title(col)\n",
" plt.hist(data[col])\n",
" except:\n",
" print(col)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"打印特征列的分布"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"fea_cols = [x for x in data.columns if x not in out_cols]"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"VOCs_Bio\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABXIAAAYhCAYAAAAXQGZcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOz9e7xkVX3n/7/e0ogoglxaAg3YJKAJ8IsoHUJCoigaUZxAMl5womBChsRgxEsSwUxGnIQZzCTelQmKAbwBwRujoCIGHb/hYqPIRSS00kpLC60goFGU9vP7Y68D1afrXLq7Tp06dV7Px6MeZ9fae+1auy7r7LX22p+VqkKSJEmSJEmSNLoeNt8FkCRJkiRJkiRNz45cSZIkSZIkSRpxduRKkiRJkiRJ0oizI1eSJEmSJEmSRpwduZIkSZIkSZI04uzIlSRJkiRJkqQRZ0euJGlBSPLDJL+4mXkvT/LHgy6TpIXNekXSbCX5gySfme9ySJIWNztyNVRJXprk+iT/keS7Sc5I8pj5Lpek0VdV21XVNwe93yTLk1SSJZPSz07yd4N+PUmjYy7qlST/lOTcPum/muT+JDsN8vUkDUdVfaCqfmdz8rbzjH0GXSZJi0OS1Ul+3C5Af7e1U7Zr6zZqs0zVvtF4sCNXQ5PkNcAbgb8EdgAOAR4HXJrk4fNZNkmSpAE5G/j9JI+alH4s8Imqumv4RZI0l+wskTQE/6mqtgMOBJ4EnDK/xdF8sSNXQ5Fke+ANwJ9X1aeq6mdVtRp4AV1n7ouTXJzkH3vynJ/kvfNTYknDkuQPk/zfnuerklzQ8/y2JAf2jmZpV57fmeSTSe5LclWSX+rJ88wkX09yT5J3ABnqQUmaV/NZr1TVFcB3gP/ck3cr4L8A5wz8YCUNVJKTk3yj1QNfS/J7Lf2lSb7Ys10lOTHJLcAt0+zvC23xq2003Qtb+nOTXJvkB0n+Lcmv9uRZneQvk1yX5EdJzkqya5JLWrk+m2THtu3EyLsTktyeZG0bQCNpDFXVd4FP03XoahGyI1fD8pvAI4CP9CZW1Q+BS4BnAn8EvCTJ05P8AfBrwEnDLqikofs88NtJHpZkN2Br4FCAFrtyO+C6PvleRHeBaEdgFXBay7ML8GHgvwG7AN+Y2J+kRWO+65Vz6UbgTnhGK8Mlm39IkobkG8Bv091B+Abg/a0e6edo4NeB/abaWVU9pS0+sYVzOT/Jk4H3An8C7Az8E3BRkm16sv5nujbS44H/RFd/vI6uDnoY8IpJL/U0YF/gd4CTkzxjVkcraUFJsgfwbLrzFC1CduRqWHYBvldVD/RZtxbYpV1Z+lO60SpvBY6tqvuGWEZJ86DFp7yP7qryU+muMH8nyS+35/+vqn7eJ+tHqurqVq98gIeuSj8H+FpVXVhVPwPeAnx3Tg9C0kgZgXrlfcBTW2MLuk7dD7a8kkZYVf1LVd1eVT+vqvPpRtsePMXm/6uq7qqqH2/iy/xX4J+q6qqqWl9V5wD304Wem/D2qrqjqr4D/D/gqqr6SlXdD3yU7tbqXm+oqh9V1fXAP9NdmJI0Pj6W5D7gNuBO4PU96/6ije7/QZIf0P9itcaEHbkalu8Bu0wRP2q3th7gE8BWwM1V9cU+20oaT58HDgOe0pYvp+tseWp73k9vJ8p/0I2wA9id7gQHgKqq3udJbmy3Nv4wyW8DExeYtp60/60BO12khWve6pWq+jbwBbrQUdvRjdozrIK0ACQ5tifkwQ+AA+gGpfRz2xTpM3kc8JpJHS970tU1E+7oWf5xn+fbsaHesnxr0r4kLXxHV9Wj6c5tfpkN66V/qKrHTDyAX+2TX2PCjlwNyxV0V5l/vzexTQTybOCylnQacBOwWxKvIkuLx0SHy2+35c8zc4fLVNbSNYYASJLe51W1f7u1cbuq+n9t+58ByyftZ2+6hpCkhWk+6xXoOm6Ppbs9+taq+vJmHoekIUnyOODdwMuBnVuHyA1MHWu/NvOlbgNO6+14qapHVtWHNnN/0FMnAXsBt2/BviSNqKr6PN3Eqv8wz0XRPLEjV0NRVffQxZh6e5IjkmydZDnwL8Aa4H1JngL8IV2j59i27bL5KrOkofo8XWy3batqDd0thEfQxY37yibu65PA/kl+v90F8ArgF6bauKrW08W+PC3Jzq1+ehFdvDvjWUoL17zVK82H6TpW3oCjcaWF4lF0nbProJs4kW5E7pa6A/jFnufvBv40ya+n86gkRyZ59Ba8xt8keWSS/enaVOdvSYEljbS3AM9McuA8l0PzwI5cDU1V/T1dgP5/AO4FrqK7Gn04sA3dxCAvr6rvtLAKZwH/3Ea9SBpjVfXvwA/pOlqoqnuBbwL/X+to3ZR9fQ94PnA68H26iT/+vxmy/RlwF108qTvpRuIcWVV3TJtL0sia73qlqn7EQ525H9jU8ksavqr6GvCPdHcT3gH8/5j5HGI2TgXOaWEUXlBVK+ni5L4DuJtu0qKXbuFrfL7t5zK626w/s4X7kzSiqmodXf/J38x3WTR86UJ8SZIkSZKkhaTd5XgrsPUUE0tLksaII3IlSZIkSZIkacQtme8CSJIkSZK0UCX5baaIq19V2w25OJKkMWZoBUmSJEmSJEkacYZWkCRJkiRJkqQRZ0euJEmSJEmSJI24BRsjd5dddqnly5fPdzEkTXLNNdd8r6qWznc5NpV1ijSarFMkDZJ1iqRBsk6RNEizqVMWbEfu8uXLWbly5XwXQ9IkSb4132XYHNYp0miaqU5JsidwLvALwM+BM6vqrUlOBf4rsK5t+rqqurjlOQU4HlgPvKKqPt3SDwLOBrYFLgZOqqpKsk17jYOA7wMvrKrV05XLOkUaTZ6nSBok6xRJgzSbOsXQCpIkaSF7AHhNVf0KcAhwYpL92ro3V9WB7THRibsfcAywP3AE8K4kW7XtzwBOAPZtjyNa+vHA3VW1D/Bm4I1DOC5JkiRJ2oAduZIkacGqqrVV9eW2fB9wE7BsmixHAedV1f1VdSuwCjg4yW7A9lV1RVUV3Qjco3vynNOWLwQOT5LBH40kSZIkTc2OXEmSNBaSLAeeBFzVkl6e5Lok702yY0tbBtzWk21NS1vWlienb5Cnqh4A7gF2notjkCRJkqSp2JErSZIWvCTbAR8GXllV99KFSfgl4EBgLfCPE5v2yV7TpE+XZ3IZTkiyMsnKdevW9ckiSZL0kCSPSXJhkq8nuSnJbyTZKcmlSW5pf3fs2f6UJKuS3JzkWT3pByW5vq17m3cOSePLjlxJkrSgJdmarhP3A1X1EYCquqOq1lfVz4F3Awe3zdcAe/Zk3wO4vaXv0Sd9gzxJlgA7AHdNLkdVnVlVK6pqxdKlC24Ca0mSNHxvBT5VVb8MPJEuRNTJwGVVtS9wWXu+uXH+JY2ZJfNdAD1k+cmfHPg+V59+5MD3KUmDZN2nLdFGnJwF3FRVb+pJ362q1ranvwfc0JYvAj6Y5E3A7nSNnauran2S+5IcQhea4Vjg7T15jgOuAJ4HfK7F0dUiYT0ladRZTy08SbYHngK8FKCqfgr8NMlRwGFts3OAy4HX0hPnH7g1yUSc/9W0OP9tvxNx/i8Z0qFIY2XU69MZR+Qm2TPJv7Zh/jcmOamlD2y4f5Jtkpzf0q9qMe4kSZJmcijwEuDpSa5tj+cAf9/OOa4Dnga8CqCqbgQuAL4GfAo4sarWt329DHgP3QRo3+ChBtBZwM6twfRq2sgYSZKkLfCLwDrgn5N8Jcl7kjwK2HXiYnT7+9i2/ebE+Zc0ZmYzIvcB4DVV9eUkjwauSXIp3VWjy6rq9CQn0zVqXjtpuP/uwGeTPL41kiaG+18JXEw33P8S4Hjg7qraJ8kxwBuBFw7yQCVJ0vipqi/SP4btxdPkOQ04rU/6SuCAPuk/AZ6/BcWUNAaSvAr4Y7oY2dcDfwg8EjgfWA6sBl5QVXe37U+ha+esB15RVZ9u6QcBZwPb0tVVJznKX1qUlgBPBv68qq5K8lamv1i8OXH+H8qcnEDXH8Nee+216aWVNBJmHJFbVWur6stt+T66mC3L6Ib1n9M2O4du6D70DPevqlvpRrUcnGQ
"text/plain": [
"<Figure size 1728x2016 with 45 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig = plt.figure(figsize=(24, 28))\n",
"for index, col in enumerate(fea_cols):\n",
" try:\n",
" plt.subplot(9,5,index+1)\n",
" plt.title(col)\n",
" plt.hist(data[col])\n",
" except:\n",
" print(col)\n",
"fig.savefig('fea.png')"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"分别对每个输出找到相关性最大的特征"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"import seaborn as sns"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/seaborn/matrix.py:198: RuntimeWarning: All-NaN slice encountered\n",
" vmin = np.nanmin(calc_data)\n",
"/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/seaborn/matrix.py:203: RuntimeWarning: All-NaN slice encountered\n",
" vmax = np.nanmax(calc_data)\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABw8AAASFCAYAAABZiZNkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOzde7ztU73/8dcml4pSLtFVB31UijhddkdREkqiIn4qKknlfg0pFaewE6GLUjicIpRbhE6kcj92nPDJpU2ube22a/bGWr8/xphM01prz7X2nHPNtdfr+XisxzK/3/H9fsfcu0f7/fiOMT5jyuDgIJIkSZIkSZIkSZK00Hh3QJIkSZIkSZIkSVJ/cPBQkiRJkiRJkiRJEuDgoSRJkiRJkiRJkqTKwUNJkiRJkiRJkiRJgIOHkiRJkiRJkiRJkioHDyVJkiRJkiRJkiQBDh5KkiRJkiRJkiRJqp4z3h2QJEkaDxHxBWAvYHlgOrBTZl41TNvXA18D1gJeVdse3dJmCvBV4DPAUsAfgB0y85amNi8GjgI+ADwJnA7skpmPdPK7SZIkdZrZSZIkqX0TPTu58lCSJE06EfFR4HBK6FoTuA74dUQsM8wlzwNuA74I3DtMm72BnYHPAW8FHgHOj4hFm9qcDLweWJ8S5NYBvjdfX0aSJKnLzE6SJEntWxCy05TBwcGxXCdJkjRhRcQVwJWZuVP9vBDwN+DbmTltHtfOAKY1zwCrs7/urse/VY+9ELgP+FhmnhYRrwVuAP49M6+pbTYEzgVempn3dfhrSpIkdYTZSZIkqX0LQnZy5aEkSZpU6oystYALGscycwC4CJg6xtu+mlKGovmeDwBXNN1zKjCrEeCqi4BB4C1jfK4kSVJXmZ0kSZLat6BkJ/c8lCRJC4SIWIpS873V7Myc3fR5GWBhyuysZvcBK4/x8cs33aP1nss3tXnG+cx8IiJmNbWRJEnqulHkJjA7SZKkSW4yZicHDyVJUk88fv9t3a6V/lXgK8McP3CI4639mTLEsdGa1z2Hun8nnitJkhYgfZibwOwkSZL6lNlp2PNjfq6Dh5IkqTcGnuz2E44Ajh/i+OyWz/cDT/LsWVfL8ewZXO1qbGa9PDCz5Z5XN7V5SfNFEfEc4EXz8VxJkrQg6p/cBGYnSZLU78xOjTYdy04OHkqSpAVCLRMxu412cyPiGmB94Gx4auPq9ShhcCz+Sglp6wPX13u+AHgrcFRtcxmwdESsmZn/W4+9mzID7MoxPleSJGnU2s1Nta3ZSZIkTWqTMTtNGRy00oMkSeq+x+/LroaORV4SU9ptGxEfBU4EtqcEqF2BzYHXZOb9EXEicFdm7lvbLwq8rl7+K+AE4BTKRtR31Db7AF8EtqGEuq8DqwGvz8w5tc15lFlgOwCLAD8BrsjMj4/9m0uSpAVNP+UmMDtJkqT+ZnbqfHZy5aEkSZp0MvOUiFgW+Bql5MN0YMPMvL82eSUw0HTJS4Frmz5/sf6cAGxbjx0KPB84lrKJ9u+BjRoBrtoaOBr4Tb3/acDOHfpakiRJXWF2kiRJat+CkJ1ceShJknri8Xtu7O4ssBVeO6pZYJIkSf3K3CRJktQ+s1PnLTTeHZAkSZIkSZIkSZLUHyxbKkmSemJwcGDejSRJkmRukiRJGgWzU+e58lCSJEmSJEmSJEkS4MpDSZLUKwPOApMkSWqLuUmSJKl9ZqeOc+WhJEmSJEmSJEmSJMCVh5IkqVesPy9JktQec5MkSVL7zE4d58pDSZIkSZIkSZIkSYArDyVJUq8MPDnePZAkSZoYzE2SJEntMzt1nCsPJUmSJEmSJEmSJAGuPJQkSb1i/XlJkqT2mJskSZLaZ3bqOAcPJUlSbwwY5CRJktpibpIkSWqf2anjLFsqSZIkSZIkSZIkCXDloSRJ6pFBS0hIkiS1xdwkSZLUPrNT57nyUJIkSZIkSZIkSRLgykNJktQr1p+XJElqj7lJkiSpfWanjnPloSRJkiRJkiRJkiTAlYeSJKlXrD8vSZLUHnOTJElS+8xOHefKQ0mSJEmSJEmSJEmAKw8lSVKvDDw53j2QJEmaGMxNkiRJ7TM7dZyDh5L6XkQcCHyl6dA9wGXA3pl5a9P5WzJzlSGuvwVYCfhqZh5Yj20OfBxYC3ghkMC0zPzpPPqyIvDXIU6dkplbjuqLSZIkdVGXMtTKwF7A24DVgEszc90hrp0C7At8DlgGuArYOTOnd+bbSZIkdV9EbAvsBLwGeAKYAfw2M3dvafdKSq7akJJ97gHOBL6emfc3tQtgZ+DdwKuAe4Gzga9k5uzufhtJap9lSyVNFA8AU+vPnsAawG8i4vn1/GPAqyPi35sviog3U8LYYy332x14GNgN2AT4LfDfEbFTm/3Zs6k/U4EvjfL7SJPP4EB3fyRJQ+l0hno98D7gL/VnOF8EDgAOAT5AyV0XRcTy8/NlpEnD3CRJ4y4i9gV+BPwa+BDwCcqA4CYt7V4PXEOZXLU/8F7gG8BmwBUR8dKm5usD/wF8j5KpDgI2By6ICN/VS2Nlduo4Vx5KmiieyMzL639fHhF3AJdSghbAI8D/AlsCVzddtyXwP5QVhs0+0DzzC/ifGuZ2B45qoz/Z1B9JkqR+1ekMdXZmngkQEadRZtY/Q0QsThk8/EZmHl2PXUaZqb8jTrqSJEkTw47ADzJzv6ZjZ0fEVxsfarWFk4B/AlMz88F66pKIOAe4DvgusGk9/lPgmMwcrJ8vjog7KQOU7wAu6daXkaTRcDaDpInqmvp7xaZjPwO2qMGtEeC2qMefoWXgsOFaYLnOdlPSUwYGuvsjSWrH/Gaodv4P9+3AC4BTm657hFKSa6Mx9VqabMxNktQPlqKUFX2GpoE/gHdSKjsc1DRw2Gh3F/AdYJO6DQ6Z+Y+W66G8jwLfSUljZ3bqOAcPJU1UK9bfzSHuDOAlwNr18zuAZYFftHnPtwM3tNn2JxHxZETcExGHR8Rz27xOkiRpPK1Yf3cyQ7VaFXgSuLnl+I31nCRJ0kTwv8BOEbFNRCw9TJt31t9nDnP+l8AUns5ZQ3l7/d3uOylJ6joHDyVNGBHxnPrzGkrJh4eAixrn68bS51PKbFF/n9/OhtMRsR7wQeCYeTSdU9t8GlgP+AHwOYaYmS+phfXnJWlcdDNDDeNFwMOZ+WTL8X8Cz4uIRcd4X2nyMDdJUj/4AmXf5uOBmRHx54j4WkS8oKnNy4DZmfnAMPe4vands0TE84BvApdk5p87021pEjI7dZyDh5ImiqWBx+tPAv8GfDQz72lp9zPgIxGxGPAR2hjUq6Uj/hs4MzOPH6ltZt6TmTtm5lmZeXFmHkjZJ3GTiFhjVN9ImmwsISFJ46FrGWoeWstxQZl1P9w5Sc3MTZI07jLzOuC1wCaUCVhTgAOAqyNiiTZvM2zuqaXij6OUK/3U/PVWmuTMTh3n4KGkieIB4M3AvwMvB1bMzPOGaHcWsARwMPB8yt46w4qIFwPnAXcAHxtj306rv9cc4/WSJEnd0pUMNQ//BJaMiIVbji8FPJqZj8/HvSVJknomM+dk5tl1IvnrgO2AVSgVqQDuApZqWY3YbMWmdq0OATYDNs3M2zrYbUmab88Z7w5IUpueyMyr59UoMx+JiHOA3YCfZ+Yjw7WtpSHOARYF3j9S23kYbPktaQiDg63V6yRJPdDxDNWGm4CFgZUpqx0bVq3nJM2DuUmS+lNmHhcRh/L0Ps6/q783AU4a4pJNKO+LLm0+GBG7AXsCW2bmpUNcJ2kUzE6d58pDSQui71Fmy39/uAYR8Rzg55TZYhtl5t/n43kfqb+vmY97SJIkjbd5Zqg2/RF4ENi8caBO2voApeKDJElS34uI5YY4tizwQuC+euh3wHTggIhYsqXtCsAulG1ybm86/v+AbwF7ZOap3em9JM0fVx5KWuBk5sXAxfNo9l3gfZQQ9+KIeFvTuWszcw5ARNxC2bT60/XzgcCSwB8oL8XeCewFnFFr4UsaziTdYFqSJop2MlQdBHxf/fgy4AUR0ZhI9avMfDQzH4uIb1Jeov2Tstpwd8rk1aO60XdpgWNukqR+cH1EnAlcAPw
"text/plain": [
"<Figure size 2160x2160 with 12 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"k = 10\n",
"fig = plt.figure(figsize=(30, 30))\n",
"for i,u_col in enumerate(out_cols):\n",
" use_cols = fea_cols + [u_col]\n",
" corrmat = data[use_cols].corr()\n",
" cols = corrmat.nlargest(k, u_col)[u_col].index\n",
" cm = np.corrcoef(data[cols].values.T)\n",
" sns.set(font_scale=1.25)\n",
" plt.subplot(3,3,i+1)\n",
" plt.title(u_col)\n",
" hm = sns.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 10}, \n",
" yticklabels=cols.values, xticklabels=cols.values)\n",
"fig.savefig('./cm.png')"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"直接看相关性太低了,将过去一个时刻的指标扔进来"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"data.index.name = 'date'"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>pre_time</th>\n",
" <th>pre_PM2.5</th>\n",
" <th>pre_PM10</th>\n",
" <th>pre_SO2</th>\n",
" <th>pre_NO2</th>\n",
" <th>pre_O3</th>\n",
" <th>pre_CO</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2015-01-02 01:00:00</td>\n",
" <td>136.0</td>\n",
" <td>214.0</td>\n",
" <td>317.0</td>\n",
" <td>38.0</td>\n",
" <td>8.0</td>\n",
" <td>3.71</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2015-01-02 02:00:00</td>\n",
" <td>114.0</td>\n",
" <td>176.0</td>\n",
" <td>305.0</td>\n",
" <td>38.0</td>\n",
" <td>8.0</td>\n",
" <td>3.55</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2015-01-02 03:00:00</td>\n",
" <td>97.0</td>\n",
" <td>154.0</td>\n",
" <td>306.0</td>\n",
" <td>37.0</td>\n",
" <td>7.0</td>\n",
" <td>3.51</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2015-01-02 04:00:00</td>\n",
" <td>87.0</td>\n",
" <td>141.0</td>\n",
" <td>316.0</td>\n",
" <td>38.0</td>\n",
" <td>7.0</td>\n",
" <td>3.55</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2015-01-02 05:00:00</td>\n",
" <td>85.0</td>\n",
" <td>139.0</td>\n",
" <td>292.0</td>\n",
" <td>37.0</td>\n",
" <td>7.0</td>\n",
" <td>3.62</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" pre_time pre_PM2.5 pre_PM10 pre_SO2 pre_NO2 pre_O3 pre_CO\n",
"0 2015-01-02 01:00:00 136.0 214.0 317.0 38.0 8.0 3.71\n",
"1 2015-01-02 02:00:00 114.0 176.0 305.0 38.0 8.0 3.55\n",
"2 2015-01-02 03:00:00 97.0 154.0 306.0 37.0 7.0 3.51\n",
"3 2015-01-02 04:00:00 87.0 141.0 316.0 38.0 7.0 3.55\n",
"4 2015-01-02 05:00:00 85.0 139.0 292.0 37.0 7.0 3.62"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.reset_index(inplace=True)\n",
"data.date = pd.to_datetime(data.date)\n",
"data['pre_time'] = data.date.apply(lambda x: x - dt.timedelta(hours=1))\n",
"pre_out = data[['date'] + out_cols].copy()\n",
"pre_out.columns = ['pre_time'] + [f'pre_{x}' for x in out_cols]\n",
"pre_out.pre_time = pd.to_datetime(pre_out.pre_time)\n",
"pre_out.head()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(46455, 60)"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"use_data = data.merge(pre_out, how='left', on='pre_time').dropna()\n",
"use_data.shape"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"定义新的特征列"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"new_fea_cols = [x for x in use_data.columns if x not in out_cols]"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABvgAAASFCAYAAABql9tEAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOzdd3gU1dvG8e/uppMESKdLkdB7L9Kk+orYEEWwoYCKyA9BmgVEKSIiICKigGBFQKV3EJAqIEiRXkMaIaSQkG3vH4sJSxIMsEBi7s915dLMnJm9d5PNPpwz54zBbrfbEREREREREREREREREZE8wXi3A4iIiIiIiIiIiIiIiIhIzmmAT0RERERERERERERERCQP0QCfiIiIiIiIiIiIiIiISB6iAT4RERERERERERERERGRPEQDfCIiIiIiIiIiIiIiIiJ5iAb4RERERERERERERERERPIQDfCJiIiIiIiIiIiIiIiI5CEa4BMREZF8Z/v27fTq1YsmTZoQHh7O2rVr//WYpUuX0q5dO6pWrcqDDz7Ib7/9dgeSioiIiNxdqptEREREcu5O1k4a4BMREZF859KlS4SHh/P222/nqP2uXbvo378/jz32GD///DP3338/L7/8MkePHr3NSUVERETuLtVNIiIiIjl3J2sng91ut99qYBEREZG8Kjw8nKlTp9KiRYts27z++uukpqYyderU9G2dO3emSpUqOS7YRERERPI61U0iIiIiOXe7ayfN4BMRERH5F7t376Zx48ZO25o0acLu3bvvTiARERGRXEp1k4iIiEjO3Urt5HabMomIiIjcUQkJCSQkJGTa7u/vj7+//y2dOzY2lsDAQKdtgYGBxMTE3NJ5RURERO4G1U0iIiIiOZdbaycN8ImIiMgdYY49dlvPP+u7xUyePDnT9ldffZU+ffrc8vkNBkOOtomIiIjcKtVNIiIiIjmXX2snDfCJiIjInWGz3tbTP/PMMzz88MOZtt/qlVQAQUFBxMbGOm07f/48QUFBt3xuERERkUxUN4mIiIjkXD6tnTTAJyIiIv8JrlgWITs1atRg06ZNdOvWLX3b77//To0aNW7L44mIiIjcTqqbRERERHIut9ZOxtuSSERERORadtvt/boBycnJHDhwgAMHDgBw5swZDhw4kL6++cCBA/noo4/S23fv3p3ffvuNr776iqNHjzJp0iT++usvunbt6rrXR0REROQfqptEREREci6f1k6awSciIiL5zl9//UX37t3Tvx85ciSQsXb6uXPnMBozroOqVasWH330ERMmTGD8+PHcc889fPrpp5QtW/aOZxcRERG5k1Q3iYiIiOTcnaydDHa73e76pyAiIiLizHzuwG09v3uRirf1/CIiIiJ3iuomERERkZzLr7WTlugUERERERERERERERERyUO0RKeIiIjcEfYbXLNcREREJL9S3SQiIiKSc/m1dtIMPhEREREREREREREREZE8RDP4RERE5M6w5c+rqURERERumOomERERkZzLp7WTZvCJiIiIiIiIiIiIiIiI5CGawSciIiJ3Rj5dD11ERETkhqluEhEREcm5fFo7aQafiIiIiIiIiIiIiIiISB6iGXwiIiJyZ9isdzuBiIiISN6guklEREQk5/Jp7aQZfCIiIiIiIiIiIiIiIiJ5iGbwiYiIyJ2RT9dDFxEREblhqptEREREci6f1k4a4BMREZE7w5Y/iy0RERGRG6a6SURERCTn8mntpCU6RURERERERERERERERPIQzeATERGRO8KeT5dLEBEREblRqptEREREci6/1k6awSciIiIiIiIiIiIiIiKSh2gGn4iIiNwZ+XQ9dBEREZEbprpJREREJOfyae2kGXwiIiIiIiIiIiIiIiIieYhm8ImIiMidkU/XQxcRERG5YaqbRERERHIun9ZOmsEnIiIiIiIiIiIiIiIikodoBp+IiIjcGTbr3U4gIiIikjeobhIRERHJuXxaO2kGn4jkepMmTSI8PDz9q0mTJvTp04dTp0457W/Tpk2Wx7du3Zrw8HAmTZqUvm3p0qX06tWLpk2bUrNmTR555BEWLVr0r1nOnDnjlOWfr379+rnmyYqIiIi4yO2ooU6ePMnbb79Nx44dqVixIt26dcvyWLvdztSpU2nWrBnVqlWja9euHDhwwPVPUkREROQ2mj9/Po888gg1a9akbt26dOrUiVGjRmVqFxERwZAhQ2jatClVqlShZcuWjBw5kri4OKd2x44dY/jw4bRv357q1avTqlUrRo4cSUJCwp16SiLyH6IZfCKSJ/j5+TF9+nQATp8+zSeffMKzzz6bPijn6enJmTNn2Lt3L1WrVk0/bs+ePURERODp6el0vpkzZ1K8eHEGDx5M4cKF+e233+jfvz8XLlzItqPqam+++Sa1atVK/75w4cKueJoi/235dD10EZG7ydU11OHDh1m/fj3Vq1fHbDZn+7jTpk1jypQpDBw4kDJlyjBjxoz0xw0ODr4Nz1TkP0Z1k4jIXff555/zySef0KNHD/r378/ly5fZt28fv/76K4MHD05vd/jwYbp160ZgYCD9+vWjePHiHDt2jKlTp7Ju3Tq++eYbQkNDAfj999/ZuXMnTz75JOHh4Zw+fZoJEyawe/dufvzxR4xGzccRuSn5tHbSAJ+I5Akmk4kaNWoAUKNGDYoUKULXrl1Zv349AN7e3lSuXJklS5Y4dU4tWbKEBg0a8Ndffzmd77PPPiMgICD9+4YNGxIdHc2MGTNyNMBXunTp9DwiIiIiuZWra6iWLVty//33A/Daa69x4cKFTI95+fJlpk2bxksvvcTTTz+d/tgtW7Zkzpw5WvlARERE8oQ5c+bwxBNP8L///S99W8uWLXn11VfTv7fb7QwYMICCBQvyww8/4OvrC0C9evVo0aIFHTt2ZPjw4UyZMgWABx54gK5du2IwGACoX78+YWFhvPDCC+zYsYN69erdwWcoInmdLgkQkTypSpUqAJw9ezZ9W4cOHVi6dCl2ux1wFFlLly6lQ4cOmY6/enDvHxUrVsy0dIKIuJDNdnu/RETkX91qDZWTq8p37txJUlIS7du3T9/m4+NDixYt2LBhw60+BZH8QXWTiMhdl5iYSFBQUKbt/wzOAWzfvp0DBw7Qu3fv9MG9f4SGhtKtWzfWrFnDmTNnAMcKUFcfD1CpUiUAzp8/7+qnIJJ/5NPaSQN8IpIn/dMpdXWh1aZNG2JjY/njjz8A2LFjB3FxcbRu3TpH59y1axdly5bNUdvBgwdTsWJFmjRpwqhRo0hNTb3BZyAiIiJy592OGupax44dw2Qycc899zhtL1u2LMeOHbu54CIiIiJ3WKVKlZgzZw4LFizIctUCcAzwAbRq1SrL/ffffz92uz29zsrKzp07AShXrtwtJhaR/EYDfCKSZ1gsFiwWC8ePH+fdd9+lQIECNGrUKH2/v78/TZs2ZfHixQAsXryYpk2b4u/v/6/n3rx5M6tXr6Zr167Xbefh4UHXrl15//33mTlzJk888QTfffedlpoSyQm77fZ+iYhIlm5nDZWVhIQEfHx8MJlMTtsLFixISkoKaWlpN/9kRPIL1U0iInfd22+/jY+PD4MGDaJhw4Y88MADfPLJJyQlJaW3iYqKwt/fHz8/vyzPUbRo0fR2WUlJSeGjjz6iXr163Hvvva5/EiL5RT6tnXQPPhHJE+Lj46lcuXL690WLFuXjjz8mJCTEqd0DDzzABx98wODBg1m+fDnDhg3713OfOXOG/v3706pVKx555JHrtg0JCeHtt99O/75+/foEBgYyfPhwDhw4QMWKFW/wmYnkI7l4SQMRkf+q21lDXc+1S08B6UuAZrVPRK6huklE5K6rUKECS5cuZePGjWzcuJEtW7YwZcoUlixZwvz58ylQoMC/nuN6dY/dbmfo0KHExcUxbdo0V0YXyX/yae2kAT4RyRP8/PyYMWMGBoOB4OBgQkJCsiySWrZsybBhw/j4449JSUmhRYsW1z1vfHw8L774IkWKFOHDDz+8qWzt2rVj+PDh7Nu3TwN8IiIikqvcrhrqevz9/UlOTsZqtTrN4kt
"text/plain": [
"<Figure size 2160x2160 with 12 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"k = 10\n",
"fig = plt.figure(figsize=(30, 30))\n",
"for i,u_col in enumerate(out_cols):\n",
" use_cols = new_fea_cols + [u_col]\n",
" corrmat = use_data[use_cols].corr()\n",
" cols = corrmat.nlargest(k, u_col)[u_col].index\n",
" cm = np.corrcoef(use_data[cols].values.T)\n",
" sns.set(font_scale=1.25)\n",
" plt.subplot(3,3,i+1)\n",
" plt.title(u_col)\n",
" hm = sns.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 10}, \n",
" yticklabels=cols.values, xticklabels=cols.values)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"果然和上一时刻是强相关的,这就尴尬了。先做特征工程试试"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"对输出列取对数化"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"for col in out_cols:\n",
" use_data[col] = np.log1p(use_data[col])"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"from scipy.stats import norm\n",
"import scipy.stats as stats"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
" warnings.warn(msg, FutureWarning)\n",
"/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
" warnings.warn(msg, FutureWarning)\n",
"/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
" warnings.warn(msg, FutureWarning)\n",
"/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
" warnings.warn(msg, FutureWarning)\n",
"/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
" warnings.warn(msg, FutureWarning)\n",
"/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
" warnings.warn(msg, FutureWarning)\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA5cAAAGqCAYAAACf7rC/AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOzdd3gUVdvA4d/uZnfTewESaoAQAiQgRboEVMAKqIggimDDgsorvqgfwiuKYgUUERSk2VDABjYQkKIgEJDeS4CE9L59vz+WLIR0sslukue+rlywM2dmzgzZw3nmNIXVarUihBBCCCGEEEJUgdLZGRBCCCGEEEIIUftJcCmEEEIIIYQQosokuBRCCCGEEEIIUWUSXAohhBBCCCGEqDIJLoUQQgghhBBCVJkEl0IIIYQQQgghqkyCS1HrzZkzh6ioKPtPr169eOqppzhz5kyR/TfddFOJx994441ERUUxZ84c+7a1a9fy2GOP0bt3bzp27MjQoUP58ccfy81LYmJikbwU/jz77LOOuVkhRK1UHeXU6dOnmTJlCrfffjvR0dHcf//9JR5rtVqZN28effv2pUOHDowcOZKDBw86/iaFEHXGypUrGTp0KB07dqRLly7ceeedzJgxo1i68+fP8+KLL9K7d2/atWtHfHw806dPJz09vUi6EydOMG3aNAYNGkRsbCz9+/dn+vTpZGdn19QtiRri5uwMCOEIPj4+fPLJJwCcPXuWWbNm8eCDD9oDQq1WS2JiIv/++y/t27e3H7d3717Onz+PVqstcr7PPvuMiIgIJk+eTEBAAJs2bWLixIlkZGSUWoG70gsvvECnTp3snwMCAhxxm0KIWszR5dTRo0fZuHEjsbGxGI3GUq87f/585s6dy6RJk2jRogWLFi2yXzckJKQa7lQIUZt9/PHHzJo1i3HjxjFx4kT0ej379+/n+++/Z/LkyfZ0R48e5f777ycoKIhnn32WiIgITpw4wbx589iwYQPLly8nLCwMgK1bt7Jr1y5GjBhBVFQUZ8+e5f333ychIYGvv/4apVLau+oKCS5FnaBSqYiLiwMgLi6Ohg0bMnLkSDZu3AiAh4cHMTExrFmzpkilbc2aNVx//fXs27evyPk++ugjAgMD7Z+7d+/OxYsXWbRoUYWCy+bNm9vzI4QQ4PhyKj4+ngEDBgDw9NNPk5GRUeyaer2e+fPn88gjjzBq1Cj7tePj41m2bJn0qhBCFLNs2TKGDx/Oc889Z98WHx/Pk08+af9stVp5/vnn8fPz46uvvsLb2xuArl270q9fP26//XamTZvG3LlzAbjlllsYOXIkCoUCgG7dutGgQQPGjh3LP//8Q9euXWvwDkV1ktcEok5q164dAOfOnbNvGzx4MGvXrsVqtQK2gnHt2rUMHjy42PFXBpaFoqOji3XzEEKIa1XVcqoib/p37dpFbm4ugwYNsm/z9PSkX79+/Pnnn1W9BSFEHZSTk0NwcHCx7YWBIcCOHTs4ePAgjz/+uD2wLBQWFsb999/P+vXrSUxMBGw9uK48HqBt27YApKWlOfoWhBNJcCnqpMLK2pWF40033URqaio7d+4E4J9//iE9PZ0bb7yxQufcvXs3kZGRFUo7efJkoqOj6dWrFzNmzECn01XyDoQQdV11lFNXO3HiBCqVimbNmhXZHhkZyYkTJ64t40KIOq1t27YsW7aMVatWldgjAmzBJUD//v1L3D9gwACsVqu9LCvJrl27AGjZsmUVcyxciQSXos4wmUyYTCZOnjzJ1KlT8fLyokePHvb9vr6+9O7dm59++gmAn376id69e+Pr61vuubdt28a6desYOXJkmek0Gg0jR47ktdde47PPPmP48OF88cUX0vVMCAFUbzlVkuzsbDw9PVGpVEW2+/n5UVBQgMFguPabEULUSVOmTMHT05P//ve/dO/enVtuuYVZs2aRm5trT5OcnIyvry8+Pj4lnqNRo0b2dCUpKCjgnXfeoWvXrrRq1crxNyGcRoJLUSdkZmYSExNDTEwMAwcOJDExkffee4/Q0NAi6W655RZ++eUXDAYDv/zyC7fccku5505MTGTixIn079+foUOHlpk2NDSUKVOm0L9/f7p168ZTTz3Ff//7X9avXy+zMwpRz1VnOVWWq7uiAfZutyXtE0LUb23atGHt2rV89NFH3HfffVitVubOncuwYcPIy8ur0DnKKlusVisvvfQS6enpvP76647KtnARMqGPqBN8fHxYtGgRCoWCkJAQQkNDSyzY4uPjefnll3nvvfcoKCigX79+ZZ43MzOThx9+mIYNG/LWW29dU94GDhzItGnT2L9/P9HR0dd0DiFE7Vdd5VRZfH19ycvLw2w2F2m9zM7OxsPDA7Vafc3nFkLUXRqNhvj4eOLj4wFYsWIFL7/8Mt988w0PPPAAYWFhZGdnk5ubW2zMJVzu9l84W+yV3nrrLX777TcWLVpE48aNq/dGRI2TlktRJ6hUKtq3b0+7du0ICwsr9Y2Zp6cnN9xwA5999hn9+vXD09Oz1HMWFBTw2GOPYTQamT9/fplpK0JaCISo36qjnCpPixYtMJvNnD59usj2EydO0KJFi2s+rxCifrn77rvx9/e3j9Xu0qULAOvWrSsx/bp161AoFHTu3LnI9s8++4yFCxfy5ptvFtsn6gYJLkW9M2LECPr168e9995bahqTycSECRM4deoUCxYsICgo6Jqv98svvwAQExNzzecQQtQvFSmnKqJTp054e3vz888/27cVFBTwxx9/0Lt376pmUwhRB5U0e2t6enqRWWS7dOlCdHQ0c+fOLTIWE+DixYssWbKE/v37Ex4ebt/+ww8/8MYbb/Df//63xBmwRd0g3WJFvdOtWze6detWZppp06axceNGXnrpJbKyskhISLDva9u2LRqNBoAbb7yRLl262McMzJkzh7y8PHuFbseOHXz66afcdNNNtGnTptruSQhRt1SknCooKLCvkZmcnExubq49iOzbty8eHh5otVoeeeQR5s6di5+fHy1atGDRokVYLJYKrdkrhKh/brvtNvr370/Pnj0JCgri3LlzLFy4EHd3d+68807A1htr5syZjB49mnvvvZdx48YRHh7OiRMnmDdvHj4+PkyZMsV+zu3btzN58mR69uxJXFxckXpVgwYNaNCgQQ3fpaguElwKUYItW7YA8NprrxXbt27dOiIiIgAwm81YLBb7vhYtWvDpp5+yYsUK9Ho9DRs2ZOzYsTz++OM1k3EhRL2RlpbGhAkTimwr/HxlOfXII49gsVj4+OOPyczMpF27dixatKjEdeyEEOKJJ55g3bp1TJ8+naysLEJCQujYsSPvvfdekTGSrVu3ZuXKlXzwwQe8/fbbZGZmEhISQv/+/Rk/fnyRNcP//vtvjEYjmzdvZvPmzUWu9+STT/LUU0/V2P2J6qWwFk4ZJ4QQQgghhBBCXCMZcymEEEIIIYQQosokuBRCCCGEEEIIUWUSXAohhBBCCCGEqDIJLoUQQgghhBBCVJkEl0IIIYQQQgghqkyCSyGEEEIIIYQQVSbrXFZSRkYeFkvVVm8JCvImLS3XQTkSV5JnWz3q6nNVKhUEBHg5OxsOca1lU139t61p8hwdQ57jZVI+XSa/F0XJ8yhKnkdR1f08yiubJLisJIvFWuXgsvA8onrIs60e8lxdW1XKJvm3dQx5jo4hz7HucUTdSX4vipLnUZQ8j6Kc+TykW6wQQgghhBBCiCqT4FIIIYQQQgghRJVJcCmEEEIIIYQQosokuHRh51JyWbczEatV+pELIZwvJ9/Aj1tPYTSZnZ0VIYQQQrggmdDHRaVn63j7ywSy8gwYTRYGdmvi7CwJIeq577ecYt3ORABu7dHMuZkRQtQrJgvojSb7Z63aDTdpIhHC5cjX0kV9u/EEOqOZ6KYBfLPhOKlZBc7OkhCiHkvL1rMp4TwqpYIft50iNVvv7CwJIeoRvdHEjoPJ9p8rA00hhOuQ4NJFHTmbQYcWQTwwMAqL1cquwynOzpIQoh7bvPc8RrOF3rENMRgt7D5y0dlZEkIIIYSLkeDSBaVl6UjL1tMqwo/QAE8iQrz
"text/plain": [
"<Figure size 1080x720 with 6 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = plt.figure(figsize=(15, 10))\n",
"for index, col in enumerate(out_cols):\n",
" try:\n",
" plt.subplot(3,3,index+1)\n",
" plt.title(col)\n",
" sns.distplot(data[col], fit=norm)\n",
" except:\n",
" print(col)\n",
"fig.savefig('no-log.png')"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
" warnings.warn(msg, FutureWarning)\n",
"/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
" warnings.warn(msg, FutureWarning)\n",
"/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
" warnings.warn(msg, FutureWarning)\n",
"/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
" warnings.warn(msg, FutureWarning)\n",
"/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
" warnings.warn(msg, FutureWarning)\n",
"/home/zhaojh/miniconda3/envs/py37/lib/python3.7/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).\n",
" warnings.warn(msg, FutureWarning)\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA4UAAAGqCAYAAAC8teFqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOzdd1wU19rA8d/u0ntHQBQrIhZEBXvBEkvsSYwlxWiKpidv2k1uomkm98bkJprEGGvUWKOm2XtDRRELoCIo0qT3vuX9w0iCYkGBZZfnez98bnbmzM4z6/Iw58wpCp1Op0MIIYQQQgghRIOk1HcAQgghhBBCCCH0RyqFQgghhBBCCNGASaVQCCGEEEIIIRowqRQKIYQQQgghRAMmlUIhhBBCCCGEaMCkUiiEEEIIIYQQDZhUCoXezJ07F19f34qfXr168eKLL3LlypVK+wcPHlzl8YMGDcLX15e5c+dWbNuyZQvPPfccvXv3plOnTowdO5Y//vjjjrEkJiZWiuX6z6uvvlozFyuEMEi1kafi4+N5//33GTlyJH5+fjz22GNVHqvT6Zg/fz59+/alQ4cOTJo0iejo6Jq/SCGE0diwYQNjx46lU6dOdO3aldGjRzN79uybyiUnJ/Ovf/2L3r17065dO0JCQvj444/JysqqVC4uLo5Zs2YxdOhQOnbsyIABA/j444/Jy8urq0sSdcRE3wGIhs3W1paFCxcCkJCQwNdff82TTz5ZUZEzNzcnMTGRM2fO0L59+4rjTp8+TXJyMubm5pXeb+nSpTRu3Jh33nkHR0dH9u/fz+uvv052dvYtb7z+6a233iIwMLDitaOjY01cphDCgNV0noqJiWHfvn107NiR8vLyW553wYIFfPfdd7z55ps0b96cJUuWVJzX1dW1Fq5UCGHIfvjhB77++mumTZvG66+/TmlpKZGRkfz222+88847FeViYmJ47LHHcHZ25tVXX6Vx48bExcUxf/589u7dy8qVK3F3dwfg8OHDhIeHM2HCBHx9fUlISOB///sfERERrF27FqVSni8ZC6kUCr1SqVQEBAQAEBAQgIeHB5MmTWLfvn0AWFpa4u/vz+bNmyvdbG3evJlu3bpx9uzZSu/3/fff4+TkVPG6e/fupKWlsWTJkruqFDZr1qwiHiGEgJrPUyEhIQwcOBCAl156iezs7JvOWVpayoIFC3jmmWeYPHlyxblDQkJYsWKF9GIQQtxkxYoVjB8/ntdee61iW0hICC+88ELFa51OxxtvvIG9vT1r1qzBxsYGgKCgIPr378/IkSOZNWsW3333HQDDhw9n0qRJKBQKAIKDg2nUqBFTp07l+PHjBAUF1eEVitok1XtRr7Rr1w6ApKSkim3Dhg1jy5Yt6HQ64FpC27JlC8OGDbvp+H9WCK/z8/O7qTuEEELcq/vNU3fTsh4eHk5BQQFDhw6t2GZlZUX//v05cODA/V6CEMII5efn4+LictP26xU6gLCwMKKjo5k+fXpFhfA6d3d3HnvsMXbv3k1iYiJwrcfUP48HaNu2LQCZmZk1fQlCj6RSKOqV6zdZ/0xqgwcPJiMjgxMnTgBw/PhxsrKyGDRo0F2958mTJ2nRosVdlX3nnXfw8/OjV69ezJ49m5KSkmpegRDC2NVGnrpRXFwcKpUKHx+fSttbtGhBXFzcvQUuhDBqbdu2ZcWKFWzcuLHKHghwrVIIMGDAgCr3Dxw4EJ1OV5HLqhIeHg5Ay5Yt7zNiUZ9IpVDonVqtRq1Wc+nSJWbOnIm1tTU9evSo2G9nZ0fv3r35888/Afjzzz/p3bs3dnZ2d3zv0NBQdu3axaRJk25bzszMjEmTJvHJJ5+wdOlSxo8fz6pVq6SLlhACqN08VZW8vDysrKxQqVSVttvb21NcXExZWdm9X4wQwii9//77WFlZ8fbbb9O9e3eGDx/O119/TUFBQUWZ1NRU7OzssLW1rfI9PD09K8pVpbi4mDlz5hAUFESrVq1q/iKE3kilUOhVTk4O/v7++Pv7M2TIEBITE/nqq69wc3OrVG748OFs27aNsrIytm3bxvDhw+/43omJibz++usMGDCAsWPH3rasm5sb77//PgMGDCA4OJgXX3yRt99+m927d8tsf0I0cLWZp27nxi5bQEX31Kr2CSEatjZt2rBlyxa+//57Jk6ciE6n47vvvmPcuHEUFhbe1XvcLrfodDreffddsrKy+PTTT2sqbFFPyEQzQq9sbW1ZsmQJCoUCV1dX3NzcqkxIISEhvPfee3z11VcUFxfTv3//275vTk4OTz/9NB4eHvz3v/+9p9iGDBnCrFmziIyMxM/P757eQwhh+GorT92OnZ0dhYWFaDSaSk8L8/LysLS0xNTU9J7fWwhhvMzMzAgJCSEkJASAdevW8d5777F+/XqeeOIJ3N3dycvLo6Cg4KYxhfB39/jrs4/+03//+1927NjBkiVL8Pb2rt0LEXVOnhQKvVKpVLRv35527drh7u5+yxYqKysr+vXrx9KlS+nfvz9WVla3fM/i4mKee+45ysvLWbBgwW3L3g1pkReiYauNPHUnzZs3R6PREB8fX2l7XFwczZs3v+f3FUI0LA8//DAODg4VY5G7du0KwK5du6osv2vXLhQKBV26dKm0fenSpSxevJjPP//8pn3COEilUBiMCRMm0L9/fx599NFbllGr1bz88stcvnyZH3/8EWdn53s+37Zt2wDw9/e/5/cQQjQsd5On7kZgYCA2NjZs3bq1YltxcTF79uyhd+/e9xumEMIIVTUbaFZWVqVZSbt27Yqfnx/fffddpbGGAGlpafz0008MGDAALy+viu2///47n332GW+//XaVMyoL4yDdR4XBCA4OJjg4+LZlZs2axb59+3j33XfJzc0lIiKiYl/btm0xMzMDYNCgQXTt2rWiT/zcuXMpLCysuBELCwtj0aJFDB48mDZt2tTaNQkhjMvd5Kni4uKKNQ5TU1MpKCioqPz17dsXS0tLzM3NeeaZZ/juu++wt7evWLxeq9Xe1ZqrQoiGZ8SIEQwYMICePXvi7OxMUlISixcvxsLCgtGjRwPXej/95z//4fHHH+fRRx9l2rRpeHl5VSxeb2try/vvv1/xnseOHeOdd96hZ8+eBAQEVLqvatSoEY0aNarjqxS1RSqFwqgcOnQIgE8++eSmfbt27aJx48YAaDQatFptxb7mzZuzaNEi1q1bR2lpKR4eHkydOpXp06fXTeBCiAYjMzOTl19+udK266//maeeeeYZtFotP/zwAzk5ObRr144lS5ZUuQ6ZEEI8//zz7Nq1i48//pjc3FxcXV3p1KkTX331VaUxgK1bt2bDhg3MmzePL774gpycHFxdXRkwYAAzZsyotObz0aNHKS8v5+DBgxw8eLDS+V544QVefPHFOrs+UbsUuutTmQkhhBBCCCGEaHBkTKEQQgghhBBCNGBSKRRCCCGEEEKIBkwqhUIIIYQQQgjRgEmlUAghhBBCCCEaMKkUCiGEEEIIIUQDJpVCIYQQQgghhGjAGtQ6hdnZhWi1xrsCh7OzDZmZBfoOw2DJ53d/6vrzUyoVODpa19n5alNd5ib5nlePfF7V19A/M8lNhqWhf1/vlnxOd6e+f063y096qRSuXLmSRYsWkZ6ejp+fH++99x4dOnS4Zfnc3Fy+/PJLduzYQUFBAU2aNOH9998nKCioWufVanVGn9yM/fpqm3x+90c+v3tT17lJ/p2qRz6v6pPPzDg0hPsmkO/r3ZLP6e4Y6udU55XCzZs3M3v2bGbNmkXHjh1ZtmwZ06ZNY+vWrTg5Od1UvqysjKeeegoXFxfmzZuHm5sbCQkJODs713XoQgghhBBCCGF06rxSuGTJEsaPH8+4ceMAmDVrFnv37mXjxo1MnTr1pvK//PILubm5rF69GlNTUwAaN25cpzELIYQQQgghhLGq04lmysrKiIyMpGfPnn8HoFTSo0cPIiIiqjxm9+7dBAQEMHPmTHr06MGIESNYunQpOp1hPpoVQgghhBBCiPqkTp8UZmdno9FocHFxqbTd2dmZ+Pj4Ko9JSEggNDSUMWPG8OOPP3Lx4kU+/PB
"text/plain": [
"<Figure size 1080x720 with 6 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = plt.figure(figsize=(15, 10))\n",
"for index, col in enumerate(out_cols):\n",
" try:\n",
" plt.subplot(3,3,index+1)\n",
" plt.title(col)\n",
" sns.distplot(use_data[col], fit=norm)\n",
" except:\n",
" print(col)\n",
"fig.savefig('logify.png')"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA3gAAAJaCAYAAAB9feXHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOzdd3xT5f4H8E/SvWnTXZaMllW2QKGsQgERkCkgIgjiAPEi259XQVFBQECWooBwGWUJMmQUKENkyd57tulM926T8/ujt7lNm7Zpm93P+/XydW1yTvJ9Gvu5z/fknOeIBEEQQERERERERCZPbOgCiIiIiIiISDvY4BEREREREZkJNnhERERERERmgg0eERERERGRmWCDR0REREREZCbY4BEREREREZkJNngmbsWKFQgICFD+ExwcjMmTJ+PFixdaef3Ro0fjk08+0cprhYSE4Pvvvy9zmwsXLiAgIAAPHjxQPhYQEIDNmzeXWtOZM2ewYcMGrdRYWGfh77NZs2bo06cPVq1ahdzc3FJr0oS26yQyZswmZhORsWI+MZ/MnaWhC6Cqc3Jywtq1awEAL1++xI8//oixY8fiwIEDsLe3N3B1FdO0aVNs374dtWvXLnWbOXPmwNLyf//p/v333zhy5AjGjh2rtTr69euH0aNHIzc3FxcuXMCqVauQnp6OWbNmVfo1dVEnkTFjNjGbiIwV84n5ZM7Y4JkBCwsLtGzZEgDQsmVL+Pj4YNSoUTh16hRee+21EttnZ2fD1tZWz1VqxtHRUTmW0jRo0EDndXh6eirraNeuHWJiYrBt2zbMnDkTIpFI5+9PZA6YTdrHbCLSDuaT9jGfjAdP0TRDzZo1AwBERUUBKPjafMGCBVi1ahW6dOmCNm3aAACysrLwzTffoFOnTggMDMSQIUNw5swZta+5fft2hISEoHnz5nj//fcRGxur8vzixYvRv39/tGrVCl26dMG0adMQHx+v9rVWrVqFTp06oVWrVpg2bRrS0tKUz6k7zaC4oqcZrFixAuvXr0dUVJTy1IDZs2fj5MmTaNSoEV6+fKmy78uXL9GoUSMcP368rF9hCU2bNkVmZiaSkpJK3Wbz5s3o1asXmjVrhtDQUJVTCkqrk6g6YTYxm4iMFfOJ+WRO+A2eGSoMJ3d3d+VjBw4cQIMGDTBnzhzI5XIAwL///W9ERERg6tSpqF27Nnbu3IkPPvgAGzduRNu2bZX7Xr16FU+fPsXs2bORk5ODxYsXY+LEifj999+V28hkMnzwwQfw9PREYmIifvvtN4wZMwb79++HhYWFSh116tTBvHnzEB8fj0WLFuHzzz/H8uXLKzXWYcOG4dmzZ7hw4QJWrlwJAHBzc4Ofnx88PT3xxx9/YPLkycrt9+zZAzc3N3Tt2rVC7xMVFQUrKyu4uLiofX7Hjh2YN28e3n33XQQHB+PChQtYsGABcnNz8f7775daJ1F1wmxiNhEZK+YT88mcsMEzE/n5+QAKjrLMnTsXDg4O6Nixo8o2a9asgY2NDQDg8ePH+PPPPzF//nwMGjQIANC5c2cMGDAAP/30E9atW6fcLzExEdu2bYOfnx8AwNfXF2+99RZOnz6NLl26AADmz5+v3F4ulyuPRl25cgWvvvqq8rmcnBysWbMGDg4OAAA7OzvMnDkTjx8/Rv369Ss8bm9vb3h6esLa2rrE6QmDBg3Cnj178PHHH0MkEkEQBPzxxx944403VM5DV0cQBOTn5yMvLw/nz5/Htm3bEBISohK4hRQKBVasWIHBgwcrjywFBwcjLS0Na9aswZgxY8qsk8icMZuYTUTGivnEfDJXPEXTDCQnJ6Np06Zo2rQp+vTpg8jISCxduhSenp7KbTp06KAMKAC4efMmBEFAnz59lI+JxWL06dMHly9fVnn9Jk2aKAMKANq0aQOJRIIbN24oHzt16hRGjBiBNm3aoEmTJsrwevbsmcprdezYURlQANCrVy8IgoCbN29W7ZegxtChQyGVSnHhwgUAwPnz5xEVFYXBgweXu+9vv/2Gpk2bomXLlvjwww/x6quv4ssvv1S7bUxMDOLi4lR+lwDQt29fpKen4/79+1UfDJEJYjapx2wiMjzmk3rMJ/PAb/DMgJOTE3777TeIRCJ4eHjA09OzxMWsRU85AIC4uDjY29vDzs5O5XGJRIKsrCzk5ubC2tpa+VhxEolEeZ74jRs3MHHiRPTs2RMTJkyARCKBSCTCm2++iZycnBL7FWVrawt7e3vExcVVbvBlqFWrFtq1a4fdu3ejQ4cO2L17N5o3b46GDRuWu++AAQPwzjvvwNraGn5+fnB0dCx128LfQ/GxFf6ckpJShVEQmS5mk3rMJiLDYz6px3wyD2zwzICFhQUCAwPL3KZ4aHl6eiIzMxNZWVkqQSWTyWBnZ6cMqMLHipPJZPDw8AAAHDt2DK6urli2bJnyfQrPZVe3X1HZ2dnIzMxUOWKmTcOGDcMXX3yBadOm4ejRoxov1evu7l7u77RQ4e+h+NgKfy7t3HMic8dsKh2ziciwmE+lYz6ZPp6iWU0FBgZCJBLhyJEjyscEQcCRI0eUK0UVunPnDqRSqfLny5cvQyaToXnz5gAKgsbKykolCPfv36/2fc+ePYuMjAzlz+Hh4RCJRMrVqyrDysqqxNGuQr169YKVlRU+/fRTKBQKvP7665V+n9IUniN++PBhlccPHToER0dHBAQElFsnERVgNmkPs4lIu5hP2sN80i1+g1dN1a9fH6+//jq+/vprpKenK1eCevLkCebMmaOyrZubGz744ANMnjxZuRJU06ZNleeKd+rUCRs3bsS3336LkJAQXLlyBfv27VP7vjY2Nvjggw8wfvx4xMfHY+HChQgNDa3S/Vnq1auHhIQE7N69Gw0bNoSrqytq1qypfL/+/ftjy5Yt6NevH5ydnSv9PqURi8WYPHkyvvzyS9SoUQOdOnXCP//8g7CwMEydOlV5/n5ZdRJRAWaT9jCbiLSL+aQ9zCfdYoNXjX3zzTdYvHgxVq9ejdTUVPj7++Pnn39WWeYXAFq1aoWgoCB89913SExMRLt27TBv3jzl8127dsX06dOxefNm7Ny5Ey1btsSaNWvQu3fvEu/5+uuvw8HBAZ9//jkyMzMREhKCuXPnVmkcr732Gi5cuIBFixYhMTERgwYNwoIFC5TP9+zZE1u2bMGQIUOq9D5lefPNN5Gbm4uNGzdi06ZN8PLywuzZszF27FiN6ySiAswm7WE2EWkX80l7mE+6IxIEQTB0EUS6tHDhQhw6dAjHjx+HWMyzkonIODCbiMhYMZ9MG7/BI7P15MkTPH78GGFhYfj4448ZUERkFJhNRGSsmE/mgd/gkdkaPXo0rl+/jpCQECxcuFBldSsiIkNhNhGRsWI+mQc2eERERERERGaC37sSERERERGZCTZ4REREREREZoINHhERERERkZkwyVU0k5IyoFBU7tJBicQRMlm6lisyPI7LdJjjmICqjUssFsHV1UHLFRkG80mVOY4J4LhMCbOpALOpJHMclzmOCeC4iisvm0yywVMohEqHVOH+5ojjMh3mOCbAfMdVEcynksxxTADHZUrMcUwVxWxSzxzHZY5jAjiuiuApmkRERERERGaCDR4REREREZGZ0OspmiEhIYiKiirx+FtvvYU5c+bosxQiIiIiIiKzo9cGb9euXZDL5cqfHz58iHfffRd9+vTRZxlERERERERmSa8Nnpubm8rPv/zyC2rXro127drpswwiIiIiIiKzZLBr8HJzc7Fv3z4MGTIEIpHIUGUQERERERGZDYPdJuHYsWNIS0vDoEGDKryvROJYpff28HCq0v7GiuMyHeY4JkB1XPKsLFjY2RmwGiIiVedux2DviftISMuDq4sdBnetj6Cm3oYui4gIgiBAyMmB2Na2yq9lsAbv999/R5cuXeDl5VXhfWWy9ErfM8LDwwnx8WmV2teYcVymwxzHBPxvXIIgIGHndiRHHEO9JT/Cwr78mwSLxaIqH7ghIirLudsxCN99GiNfHsNZ1+b4R9QEGw/dAwA2eURkUIq8XET/vBq5Uilemb+wyq9nkFM0o6K
"text/plain": [
"<Figure size 1080x1080 with 6 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = plt.figure(figsize=(15, 15))\n",
"for index, col in enumerate(out_cols):\n",
" try:\n",
" plt.subplot(3,3,index+1)\n",
" plt.title(col)\n",
" rest = stats.probplot(use_data[col], plot=plt)\n",
" except:\n",
" print(col)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"如果做minmax是否还保持高斯分布"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"maxs = use_data[out_cols].max()\n",
"mins = use_data[out_cols].min()"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"PM2.5 PM10 SO2 NO2 O3 CO "
]
}
],
"source": [
"out_data = list()\n",
"for col in out_cols:\n",
" print(col, end=' ')\n",
" d = (use_data[col] - mins[col]) / (maxs[col] - mins[col])\n",
" out_data.append(d)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA4UAAAJaCAYAAACYz+LuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAEAAElEQVR4nOzdd3gU1frA8e9uei+bnlCkCoHQBOmBGDoovSgiIl4VRRQEufdn46oXBIOAgCDtIsaAKGCjd4UIgvQmgpQ0SO999/cHN2vKJtkkm2yyeT/P4yM7c2b2PQm+nnfmzBmFRqPRIIQQQgghhBCiXlIaOwAhhBBCCCGEEMYjRaEQQgghhBBC1GNSFAohhBBCCCFEPSZFoRBCCCGEEELUY1IUCiGEEEIIIUQ9JkWhEEIIIYQQQtRjUhTWQ59++iktW7bU/tOzZ0+mT5/OnTt3DHL+p59+mldffdUg5woKCuKjjz4qs82JEydo2bIlf/zxh3Zby5Yt+fLLL0uN6ZdffuG///2vQWIsiLPg59mmTRsGDhzIihUryMnJKTUmfRg6TiFqO8lPkp+EqI0kN0luMnXmxg5AGIeDgwNr164F4O7duyxdupTJkyfz448/Ymtra+ToKsbf358tW7bQsGHDUtu8++67mJv//df92LFj7Nmzh8mTJxssjqFDh/L000+Tk5PDiRMnWLFiBWlpabz55puVPmd1xClEbSf5SfKTELWR5CbJTaZMisJ6yszMjPbt2wPQvn17vL29eeqppzhy5AiDBg0q0T4rKwtra+sajlI/9vb22r6UplmzZtUeh4eHhzaOLl26EBMTw+bNm5kzZw4KhaLav18IUyH5yfAkPwlRdZKbDE9yU+0h00cFAG3atAEgMjISeHBLf8GCBaxYsYLevXvTqVMnADIzM/nggw/o0aMHbdu2ZdSoUfzyyy86z7llyxaCgoIICAjgH//4B/fu3Suy/+OPP2bYsGF06NCB3r17M2vWLGJjY3Wea8WKFfTo0YMOHTowa9YsUlNTtft0TYEorvAUiE8//ZT169cTGRmpnbYwd+5cDh8+zMMPP8zdu3eLHHv37l0efvhhDhw4UNaPsAR/f38yMjJITEwstc2XX35J//79adOmDf369Ssy3aG0OIWobyQ/SX4SojaS3CS5yZTInUIB/J3Q3NzctNt+/PFHmjVrxrvvvkt+fj4Ab731FgcPHmTmzJk0bNiQrVu38sILL7Bx40YeeeQR7bFnzpzhr7/+Yu7cuWRnZ/Pxxx8zbdo0vv32W22b+Ph4XnjhBTw8PEhISGDDhg0888wz/PDDD5iZmRWJo1GjRrz//vvExsayaNEi/u///o9ly5ZVqq9jxozh1q1bnDhxguXLlwPg6uqKr68vHh4e7Nixg+nTp2vbb9++HVdXVwIDAyv0PZGRkVhYWODk5KRz/9dff83777/Ps88+S8+ePTlx4gQLFiwgJyeHf/zjH6XGKUR9I/lJ8pMQtZHkJslNpkSKwnosLy8PeHA157333sPOzo7u3bsXabN69WqsrKwAuHHjBj/99BPz589nxIgRAPTq1YvHH3+czz77jHXr1mmPS0hIYPPmzfj6+gLg4+PDk08+ydGjR+nduzcA8+fP17bPz8/XXvX6/fff6dy5s3ZfdnY2q1evxs7ODgAbGxvmzJnDjRs3aNq0aYX77eXlhYeHB5aWliWmTowYMYLt27fzyiuvoFAo0Gg07NixgyeeeKLIvHpdNBoNeXl55Obm8uuvv7J582aCgoKKJOkCarWaTz/9lJEjR2qvYPXs2ZPU1FRWr17NM888U2acQpg6yU+Sn4SojSQ3SW4yVTJ9tJ5KSkrC398ff39/Bg4cSEREBJ988gkeHh7aNl27dtUmNYALFy6g0WgYOHCgdptSqWTgwIGcPn26yPlbt26tTWoAnTp1QqVScf78ee22I0eOMH78eDp16kTr1q21Ce/WrVtFztW9e3dtUgPo378/Go2GCxcuVO2HoMPo0aOJiorixIkTAPz6669ERkYycuTIco/dsGED/v7+tG/fnhdffJHOnTvzzjvv6GwbExPD/fv3i/wsAQYPHkxaWhrXrl2remeEqKMkP+km+UkI45LcpJvkJtMgdwrrKQcHBzZs2IBCocDd3R0PD48SD/QWng4BcP/+fWxtbbGxsSmyXaVSkZmZSU5ODpaWltptxalUKu289/PnzzNt2jSCg4N5/vnnUalUKBQKxo4dS3Z2donjCrO2tsbW1pb79+9XrvNlaNCgAV26dGHbtm107dqVbdu2ERAQQPPmzcs99vHHH2fSpElYWlri6+uLvb19qW0Lfg7F+1bwOTk5uQq9EKJuk/ykm+QnIYxLcpNukptMgxSF9ZSZmRlt27Yts03xROfh4UFGRgaZmZlFklt8fDw2NjbapFawrbj4+Hjc3d0B2L9/Py4uLixZskT7PQVz83UdV1hWVhYZGRlFrswZ0pgxY3j77beZNWsW+/bt03tZZDc3t3J/pgUKfg7F+1bwubS59ELUB5KfSif5SQjjkdxUOslNdZ9MHxV6a9u2LQqFgj179mi3aTQa9uzZo11hq8Dly5eJiorSfj59+jTx8fEEBAQAD5KThYVFkeT5ww8/6Pze48ePk56erv28d+9eFAqFdtWvyrCwsChxVa1A//79sbCw4PXXX0etVjNkyJBKf09pCua87969u8j2Xbt2YW9vT8uWLcuNUwjxN8lPhiP5SQjDkdxkOJKbqpfcKRR6a9q0KUOGDOHf//43aWlp2hW0bt68ybvvvlukraurKy+88ALTp0/XrqDl7++vnfveo0cPNm7cyIcffkhQUBC///4733//vc7vtbKy4oUXXuC5554jNjaWhQsX0q9fvyq9P6dJkybExcWxbds2mjdvjouLC35+ftrvGzZsGKGhoQwdOhRHR8dKf09plEol06dP55133sHZ2ZkePXrw22+/ERYWxsyZM7XPI5QVpxDib5KfDEfykxCGI7nJcCQ3VS8pCkWFfPDBB3z88cesXLmSlJQUWrRowapVq4osqQzQoUMHunXrxn/+8x8SEhLo0qUL77//vnZ/YGAgb7zxBl9++SVbt26lffv2rF69mgEDBpT4ziFDhmBnZ8f//d//kZGRQVBQEO+9916V+jFo0CBOnDjBokWLSEhIYMSIESxYsEC7Pzg4mNDQUEaNGlWl7ynL2LFjycnJYePGjWzatAlPT0/mzp3L5MmT9Y5TCPE3yU+GI/lJCMOR3GQ4kpuqj0Kj0WiMHYQQtc3ChQvZtWsXBw4cQKmUWdZCiNpD8pMQojaS3FS3yZ1CIQq5efMmN27cICwsjFdeeUWSmhCi1pD8JISojSQ3mQa5UyhEIU8//TTnzp0jKCiIhQsXFlkVTAghjEnyk2n67bffWLduHRcvXiQ2NpZVq1bRt2/fUtt//fXX7Nixg+vXr2sXDnn99deLrOA4d+5ctm/fXuS4nj17FnlRuhCGIrnJNMidQiEK2bRpk7FDEEIInSQ/maaMjAxatmzJyJEjmT59erntT5w4wZAhQ+jYsSOWlpasXbuWKVOm8NNPPxV53UDfvn2LPI8mA3VRXSQ3mYYaLwqr44qYEEIIIURdFBgYSGBgoN7tQ0JCinz+4IMP2LNnDydOnGDYsGHa7ZaWltr3ugkhRHlqfNJvwRWxd955R6/2BVfEvvjiC8LCwvD09GTKlCncv3+/miMVQgghhKjdMjMzycvLK/Hi7vDwcLp168aAAQOYN28eSUlJxglQCFEn1Pidwuq6IiaEEEIIUd+EhITg7e1N165dtdt69+7NoEGD8Pb25s6dOyxevJgXXniBsLAwWQRECKFTnXumsLQrYvpITExHra74ujoqlT3x8WkVPq62k37VHabYJ6hav5RKBS4udgaOyDgqm5vANP9umGKfQPpVl9SV3LRmzRp27tzJpk2bijwzOHjwYO2fW7RoQcuWLQkODubUqVN06dJF7/NLbirJFPtlin0C6ZcuZeWnOlcU6roipq+qJGmVyr7Sx9Zm0q+6wxT7BKbbr4pQqzWVHngVHG9qTLFPIP2qS2p7n9atW8fq1av
"text/plain": [
"<Figure size 1080x1080 with 6 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = plt.figure(figsize=(15, 15))\n",
"for index, col in enumerate(out_data):\n",
" try:\n",
" plt.subplot(3,3,index+1)\n",
" rest = stats.probplot(col, plot=plt)\n",
" except:\n",
" print(col)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"下面这几列数据有问题直接drop掉"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"drop_cols = [x for x in new_fea_cols if 'agricultural' in x] + ['NH3_power'] + ['CO_Bio', 'VOCs_Bio']\n",
"drop_cols.remove('NH3_agricultural')"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA5MAAANnCAYAAACs7AyGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAADPlElEQVR4nOzdeVhU5d8/8DcgIAjIMihphIEGyK4oguCCouby9YuUWGhqLrgEpgYqWooaIKYSmLmGKD5hJupjLpVlmqKmKSFki1JupLImKMs4zO8PfpzHkW0ODMjyfl2XV3HOPfc5c8+858znbKMml8vlICIiIiIiIhJB/XmvABEREREREbU8LCaJiIiIiIhINBaTREREREREJBqLSSIiIiIiIhKNxSQRERERERGJxmKSiIiIiIiIRGMx2Qji4uJgbW0t/PP09ERQUBBu3bqlMH/YsGHVPt7HxwfW1taIi4sTpt28eRMffPAB/vOf/8DW1haTJk2q9rFyuRybN2/GwIED4ejoiICAAFy7dk31T7KBrK2tkZiY2Oj97t27FydOnFD5cpTh7e2NNWvWPJdlt2XJyckYN24cXFxc0KdPH/z3v/9FZGRklXZZWVkICwuDl5cX7O3t4e3tjdWrVyMvL0+hXWZmJsLDw/Hqq6/CyckJQ4YMwerVq/Hw4cOmekpKuXPnDqytrXHy5MlG73fbtm24cOGCSpejrMb67CBxKrdj06ZNqzIvODi4yjbq559/xsyZM9G3b184OjpizJgxiI+Ph1QqbapVViluw+h5++abb/DWW2/B1dUV9vb2GD58ODZs2KCwDVN2O9ccLF68GOPGjWv0ftPS0hS+XzeluLg4uLm5PZdlNyYWk41EX18fe/fuxd69e7Fo0SJcu3YNU6ZMwePHjwEA2trauHPnDq5evarwuLS0NGRlZUFbW1th+p9//olTp06hW7du6NatW43L3bp1KzZt2oQZM2Zg8+bN0NXVxZQpU5Cdna3y59gQe/fuxYgRI5pkOc9rQ0xNb8uWLVi2bBk8PT2xceNGrFmzBkOGDMH333+v0O7PP//EuHHj8Msvv2D+/Pn47LPPMHPmTJw4cQLjx4/H/fv3hbYpKSm4fPky3njjDWzduhWzZ8/G8ePH8fbbb6O8vLypn2KNOnXqhL1796J3796Nvqzt27fjp59+avTlUPN35swZpKWl1drm8OHDQnEZERGBrVu3wsfHBzExMXjnnXcgk8maYlVVitswep6ioqIwb948mJubIzo6Gp999hkmT56MkydP4v333wcgbjvXHMyZMwdRUVGNvpy0tDRs3Lix0ZfTlrR73ivQWmloaMDZ2RkA4OzsjBdeeAEBAQE4deoUAEBHRwd2dnY4evQoHBwchMcdPXoU/fr1Q3p6ukJ/3t7eGDp0KICKvb75+flVlllaWoqtW7di5syZmDhxorBsb29vJCYmYv78+Y3xVEUpKSlB+/bthbFpaUpLS6sU+tR8JCYmwt/fHwsWLBCmeXt745133hH+lsvlCAkJQceOHbF3717o6ekBAPr27YvBgwfjP//5D8LDw7Fp0yYAwKhRoxAQEAA1NTUAgJubG8zMzDBt2jRcunQJffv2bcJnWL3K92VLzJVcLkdZWRlz1QIZGhqic+fO2Lx5s5CXZ92/fx8ffPCBcNSkUr9+/eDk5ISZM2di9+7dmDJlShOtdcNwG0bP2/fff4/4+Hh8+OGHeO2114Tpffv2hb+/P86cOSN6O/c8VWbqpZdeet6rUi+V69+W8chkE7G3twcA3L17V5g2cuRIHDt2DHK5HEDFl6pjx45h5MiRVR6vrl73S3X58mUUFRXh1VdfFabp6upi8ODB+PHHH5Ve148++ghjxoyBi4sLBgwYgIULF1Y5sllWVobly5fD1dUVbm5uWLNmDXbu3Alra2uhzYULF2BtbY0ff/wRs2bNgouLC1auXAmg+lOEvv32W7z22mtwdHSEm5sbZsyYIYxXdac/1HVa36RJk5CRkYEDBw4IpxwnJyfXuPxnTz9ITk6GtbU10tLSMGnSJDg6OmL79u1KjxE1vcLCQkgkkirTKwtBALh48SKuXbuG2bNnCxvYSp07d8akSZPw/fff486dOwAAIyMjhccDQM+ePQEAubm5Sq3X48ePsXLlSgwfPhxOTk7w9vZGeHg4ioqKFNr9+++/mD9/PpydneHp6YmtW7dizZo18Pb2FtrU9L6sKQ9ffPEFxowZAwcHB3h4eCA4OBiFhYUAKjISHBys0L4yt3/88Ue1z8Xb2xsFBQXYuHGjkKsLFy7UuPxns1uZs0uXLsHPzw8ODg44duyY0mNEzcusWbPw/fff4/fff692/r59+1BaWqqwg6fSwIED0bdvX+zevRsAIJVK8d///heTJk0StosAsGrVKri5uSn9GcttWAVuw1qnnTt3ws7OTqGQrKShoYGBAweK3s7V5YcffsDUqVPh7u6OXr16Yfz48Thz5kyVdseOHcOwYcPg6OiISZMm4ddff1V43wIV25CoqCh88sknGDBggHA2TXUZuXv3LhYsWAA3Nzc4OTlhzJgxOHz4MICat1XVbdcqJScnY9WqVQAgZKryrAllM2ptbS0U8/369cOYMWNEjVFrxCOTTaRygyKRSHD79m0AwLBhw7BixQr8/PPPcHV1xaVLl5CXlwcfHx9ER0eLXkZmZiY0NDSqnAZrZWWFY8eOKd1Pbm4uAgMD0alTJ+Tl5SE+Ph6TJ0/G4cOHoaGhAQCIjo7GgQMHsGDBAlhaWiI5ORlHjx6ttr+lS5di3LhxmDx5co17RA8ePIhFixZh1KhRmDNnDuRyOc6fP4+8vDx07dpV6XV/2vLlyxEUFARzc3PMmTMHAOq152vBggV44403MHfuXBgYGABQboyo6fXs2ROJiYno0qULBg0aBCMjoyptLl68CAAYMmRItX0MHToUcXFx+Pnnn/Hiiy9W2+by5csAgO7duyu1XiUlJZDJZJg/fz6MjY3xzz//YPPmzZg3bx527NghtFu8eDEuX76MpUuXQiKRYOfOnfj777+rfU9V97581qZNmxAbG4s333wTISEhKCkpwQ8//IDHjx9DX19fqXV/1saNG/HWW29h+PDheP311wFUjENBQYHSfZSUlGDx4sWYPn06unXrhk6dOik9RtS8jBgxArGxsdi8ebPCkcdKFy9ehLW1NczNzat9/NChQxEREYF79+7BzMwMa9asgZ+fHxISEjBlyhScP38ee/bswfr162FqaqrUOnEbpojbsNZDKpXiypUrePvtt2ttp4rt3NPu3LmDwYMH4+2334a6ujpOnz6NGTNmIDExUSgGr169igULFmD48OFYtmwZMjMzazwj7quvvkL37t2xfPnyGk9zz83Nhb+/P3R0dLBo0SK88MIL+OOPP/DPP//Uub41GTRoEN5++2189tln2Lt3LwBUKbaVsWPHDri6uiI6OlrY8aXMGLVWLCYb0ZMnTwAAt2/fxooVK9ChQwd4eHgIb2ADAwN4eXnhyJEjcHV1xZEjR+Dl5VXjF8O6PHz4ELq6ulU2BB07dkRxcTHKysqgpaVVZz9P36xEJpMJey4vX76MPn36ID8/H1988QWCg4OFU5O8vLwwevToavsbMWIE3n333RqXV15ejnXr1sHHxwfr168Xptf0Iais7t27Q0dHB8bGxg06JWnSpEmYPHmywrS6xoiejw8++ABz587F4sWLoaamBisrKwwbNgzTpk0TNhj379+HgYFBjcVUly5dhHbVKS4uxrp169C3b1/06NFDqfUyNjZGeHi48PeTJ0/w4osv4s0330RWVha6dOmCP/74A99//z1iYmKEswvc3d0xcOBAdOjQoUqfz74vn93D/PDhQ2zZsgWTJ0/GkiVLhOk13fhLWT179oSGhgbMzMwUclWfYrLy1P1KdY0RNT/q6uqYOXMmli5diuDgYLz88ssK8+/fvw8rK6saH19ZaN2/fx9mZmawtrZGUFAQNmzYgN69eyMsLAwjRoyo9oydmnAbpojbsNajoKAAZWVleOGFF2pt19Dt3LMqL50CKt7vbm5uuH79Or788ku
"text/plain": [
"<Figure size 1080x1080 with 9 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = plt.figure(figsize=(15, 15))\n",
"for index, col in enumerate(drop_cols):\n",
" try:\n",
" plt.subplot(3,4,index+1)\n",
" plt.title(col)\n",
" plt.hist(data[col])\n",
" except:\n",
" print(col)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"use_data.drop(columns=drop_cols, inplace=True)\n",
"use_data.drop(columns=['date', 'pre_time'], inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"ename": "NameError",
"evalue": "name 'use_data' is not defined",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m/tmp/ipykernel_1136037/1598183905.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0muse_data\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreset_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'./data/train_data_mod.csv'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'utf-8-sig'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mNameError\u001b[0m: name 'use_data' is not defined"
]
}
],
"source": [
"use_data.reset_index().to_csv('./data/train_data_mod.csv', encoding='utf-8-sig', index=False)"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [],
"source": [
"data.drop(columns=drop_cols).to_csv('./data/ori_data.csv', encoding='utf-8-sig', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "py37",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.13"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "993bd31d5df1020fab369d79a34ff0a2a159e1798f3e25d3ad4b7751d38184c9"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}