{ "cells": [ { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Importing plotly failed. Interactive plots will not work.\n" ] } ], "source": [ "import pandas as pd\n", "import os\n", "import datetime as dt\n", "from prophet import Prophet" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "data_folder = [x for x in os.listdir('./data/') if x.startswith('城市_')]\n", "data_folder.sort()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['城市_20150101-20141231',\n", " '城市_20160101-20161231',\n", " '城市_20170101-20171231',\n", " '城市_20180101-20181231',\n", " '城市_20190101-20191231',\n", " '城市_20200101-20201231',\n", " '城市_20210101-20211231']" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data_folder" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "# 一个读取数据并合成成一个大文件的函数\n", "total_data = pd.DataFrame()\n", "for folder in data__folder:\n", " for file in os.listdir(f\"./data/{folder}\"):\n", " if file.endswith('csv'):\n", " data = pd.read_csv(f'./data/{folder}/{file}')\n", " use_data = data[(data['type']=='PM2.5')|(data['type']=='O3')].copy()\n", " total_data = pd.concat([total_data, use_data])" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(119419, 394)" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "total_data.shape" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | date | \n", "hour | \n", "type | \n", "北京 | \n", "天津 | \n", "石家庄 | \n", "唐山 | \n", "秦皇岛 | \n", "邯郸 | \n", "保定 | \n", "... | \n", "果洛藏族自治州 | \n", "玉树藏族自治州 | \n", "海西蒙古族藏族自治州 | \n", "博尔塔拉蒙古自治州 | \n", "克孜勒苏柯尔克孜自治州 | \n", "兰州新区 | \n", "赣江新区 | \n", "儋州 | \n", "雄安新区 | \n", "西咸新区 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | \n", "20150504 | \n", "0 | \n", "PM2.5 | \n", "21.0 | \n", "16.0 | \n", "36.0 | \n", "29.0 | \n", "22.0 | \n", "105.0 | \n", "53.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
9 | \n", "20150504 | \n", "0 | \n", "O3 | \n", "70.0 | \n", "57.0 | \n", "17.0 | \n", "68.0 | \n", "52.0 | \n", "47.0 | \n", "61.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
16 | \n", "20150504 | \n", "1 | \n", "PM2.5 | \n", "15.0 | \n", "16.0 | \n", "44.0 | \n", "27.0 | \n", "16.0 | \n", "74.0 | \n", "31.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
24 | \n", "20150504 | \n", "1 | \n", "O3 | \n", "72.0 | \n", "56.0 | \n", "6.0 | \n", "75.0 | \n", "50.0 | \n", "53.0 | \n", "64.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
31 | \n", "20150504 | \n", "2 | \n", "PM2.5 | \n", "15.0 | \n", "19.0 | \n", "62.0 | \n", "25.0 | \n", "12.0 | \n", "57.0 | \n", "22.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "
5 rows × 394 columns
\n", "\n", " | ds | \n", "holiday | \n", "
---|---|---|
0 | \n", "2015-01-02 | \n", "元旦 | \n", "
1 | \n", "2015-01-03 | \n", "元旦 | \n", "
2 | \n", "2015-01-04 | \n", "元旦调休 | \n", "
3 | \n", "2015-01-05 | \n", "oridinary | \n", "
4 | \n", "2015-01-06 | \n", "oridinary | \n", "
... | \n", "... | \n", "... | \n", "
2551 | \n", "2021-12-27 | \n", "oridinary | \n", "
2552 | \n", "2021-12-28 | \n", "oridinary | \n", "
2553 | \n", "2021-12-29 | \n", "oridinary | \n", "
2554 | \n", "2021-12-30 | \n", "oridinary | \n", "
2555 | \n", "2021-12-31 | \n", "oridinary | \n", "
2556 rows × 2 columns
\n", "\n", " | date | \n", "hour | \n", "type | \n", "北京 | \n", "天津 | \n", "石家庄 | \n", "唐山 | \n", "秦皇岛 | \n", "邯郸 | \n", "保定 | \n", "... | \n", "阿克苏地区 | \n", "克州 | \n", "喀什地区 | \n", "和田地区 | \n", "伊犁哈萨克州 | \n", "塔城地区 | \n", "阿勒泰地区 | \n", "石河子 | \n", "五家渠 | \n", "三沙 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | \n", "20150102 | \n", "1 | \n", "PM2.5 | \n", "90.0 | \n", "103.0 | \n", "153.0 | \n", "107.0 | \n", "77.0 | \n", "118.0 | \n", "214.0 | \n", "... | \n", "32.0 | \n", "108.0 | \n", "160.0 | \n", "146.0 | \n", "146.0 | \n", "40.0 | \n", "23.0 | \n", "97.0 | \n", "19.0 | \n", "NaN | \n", "
9 | \n", "20150102 | \n", "1 | \n", "O3 | \n", "5.0 | \n", "3.0 | \n", "6.0 | \n", "9.0 | \n", "11.0 | \n", "6.0 | \n", "23.0 | \n", "... | \n", "17.0 | \n", "53.0 | \n", "15.0 | \n", "23.0 | \n", "8.0 | \n", "79.0 | \n", "44.0 | \n", "6.0 | \n", "14.0 | \n", "NaN | \n", "
16 | \n", "20150102 | \n", "2 | \n", "PM2.5 | \n", "83.0 | \n", "108.0 | \n", "173.0 | \n", "112.0 | \n", "83.0 | \n", "129.0 | \n", "242.0 | \n", "... | \n", "56.0 | \n", "104.0 | \n", "290.0 | \n", "225.0 | \n", "137.0 | \n", "48.0 | \n", "29.0 | \n", "83.0 | \n", "23.0 | \n", "NaN | \n", "
24 | \n", "20150102 | \n", "2 | \n", "O3 | \n", "9.0 | \n", "3.0 | \n", "5.0 | \n", "11.0 | \n", "14.0 | \n", "7.0 | \n", "24.0 | \n", "... | \n", "15.0 | \n", "56.0 | \n", "17.0 | \n", "21.0 | \n", "10.0 | \n", "70.0 | \n", "51.0 | \n", "6.0 | \n", "12.0 | \n", "NaN | \n", "
31 | \n", "20150102 | \n", "3 | \n", "PM2.5 | \n", "74.0 | \n", "127.0 | \n", "187.0 | \n", "108.0 | \n", "74.0 | \n", "142.0 | \n", "274.0 | \n", "... | \n", "41.0 | \n", "109.0 | \n", "221.0 | \n", "176.0 | \n", "129.0 | \n", "36.0 | \n", "15.0 | \n", "98.0 | \n", "29.0 | \n", "NaN | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
324 | \n", "20150103 | \n", "21 | \n", "O3 | \n", "7.0 | \n", "4.0 | \n", "5.0 | \n", "8.0 | \n", "2.0 | \n", "6.0 | \n", "19.0 | \n", "... | \n", "10.0 | \n", "45.0 | \n", "20.0 | \n", "21.0 | \n", "9.0 | \n", "85.0 | \n", "40.0 | \n", "4.0 | \n", "12.0 | \n", "NaN | \n", "
331 | \n", "20150103 | \n", "22 | \n", "PM2.5 | \n", "200.0 | \n", "357.0 | \n", "400.0 | \n", "233.0 | \n", "202.0 | \n", "183.0 | \n", "460.0 | \n", "... | \n", "61.0 | \n", "100.0 | \n", "222.0 | \n", "109.0 | \n", "40.0 | \n", "39.0 | \n", "28.0 | \n", "63.0 | \n", "61.0 | \n", "NaN | \n", "
339 | \n", "20150103 | \n", "22 | \n", "O3 | \n", "7.0 | \n", "4.0 | \n", "5.0 | \n", "9.0 | \n", "4.0 | \n", "7.0 | \n", "20.0 | \n", "... | \n", "16.0 | \n", "35.0 | \n", "20.0 | \n", "22.0 | \n", "8.0 | \n", "98.0 | \n", "31.0 | \n", "5.0 | \n", "16.0 | \n", "NaN | \n", "
346 | \n", "20150103 | \n", "23 | \n", "PM2.5 | \n", "208.0 | \n", "350.0 | \n", "415.0 | \n", "234.0 | \n", "187.0 | \n", "180.0 | \n", "509.0 | \n", "... | \n", "46.0 | \n", "83.0 | \n", "327.0 | \n", "92.0 | \n", "34.0 | \n", "31.0 | \n", "49.0 | \n", "48.0 | \n", "81.0 | \n", "NaN | \n", "
354 | \n", "20150103 | \n", "23 | \n", "O3 | \n", "7.0 | \n", "4.0 | \n", "5.0 | \n", "9.0 | \n", "3.0 | \n", "6.0 | \n", "25.0 | \n", "... | \n", "21.0 | \n", "49.0 | \n", "18.0 | \n", "27.0 | \n", "9.0 | \n", "92.0 | \n", "43.0 | \n", "5.0 | \n", "7.0 | \n", "NaN | \n", "
94 rows × 370 columns
\n", "