wgz_forecast/load/matrix2series.ipynb

423 lines
71 KiB
Plaintext
Raw Normal View History

2025-02-12 16:59:05 +08:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 198,
"metadata": {
"ExecuteTime": {
"start_time": "2025-02-09T19:59:53.412010Z",
"end_time": "2025-02-09T19:59:53.421172Z"
}
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 199,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"原始数据:\n",
" zone_id year month day h1 h2 h3 h4 h5 h6 \\\n",
"0 1 2004 1 1 16,853 16,450 16,517 16,873 17,064 17,727 \n",
"1 1 2004 1 2 14,155 14,038 14,019 14,489 14,920 16,072 \n",
"2 1 2004 1 3 14,439 14,272 14,109 14,081 14,775 15,491 \n",
"3 1 2004 1 4 11,273 10,415 9,943 9,859 9,881 10,248 \n",
"4 1 2004 1 5 10,750 10,321 10,107 10,065 10,419 12,101 \n",
"\n",
" ... h15 h16 h17 h18 h19 h20 h21 h22 \\\n",
"0 ... 13,518 13,138 14,130 16,809 18,150 18,235 17,925 16,904 \n",
"1 ... 16,127 15,448 15,839 17,727 18,895 18,650 18,443 17,580 \n",
"2 ... 13,507 13,414 13,826 15,825 16,996 16,394 15,406 14,278 \n",
"3 ... 14,207 13,614 14,162 16,237 17,430 17,218 16,633 15,238 \n",
"4 ... 13,845 14,350 15,501 17,307 18,786 19,089 19,192 18,416 \n",
"\n",
" h23 h24 \n",
"0 16,162 14,750 \n",
"1 16,467 15,258 \n",
"2 13,315 12,424 \n",
"3 13,580 11,727 \n",
"4 17,006 16,018 \n",
"\n",
"[5 rows x 28 columns]\n",
"\n",
"清理后的数据:\n",
" zone_id year month day h1 h2 h3 h4 h5 h6 \\\n",
"0 1 2004 1 1 16.853 16.450 16.517 16.873 17.064 17.727 \n",
"1 1 2004 1 2 14.155 14.038 14.019 14.489 14.920 16.072 \n",
"2 1 2004 1 3 14.439 14.272 14.109 14.081 14.775 15.491 \n",
"3 1 2004 1 4 11.273 10.415 9.943 9.859 9.881 10.248 \n",
"4 1 2004 1 5 10.750 10.321 10.107 10.065 10.419 12.101 \n",
"\n",
" ... h15 h16 h17 h18 h19 h20 h21 h22 \\\n",
"0 ... 13.518 13.138 14.130 16.809 18.150 18.235 17.925 16.904 \n",
"1 ... 16.127 15.448 15.839 17.727 18.895 18.650 18.443 17.580 \n",
"2 ... 13.507 13.414 13.826 15.825 16.996 16.394 15.406 14.278 \n",
"3 ... 14.207 13.614 14.162 16.237 17.430 17.218 16.633 15.238 \n",
"4 ... 13.845 14.350 15.501 17.307 18.786 19.089 19.192 18.416 \n",
"\n",
" h23 h24 \n",
"0 16.162 14.750 \n",
"1 16.467 15.258 \n",
"2 13.315 12.424 \n",
"3 13.580 11.727 \n",
"4 17.006 16.018 \n",
"\n",
"[5 rows x 28 columns]\n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"# 读取原始 CSV 文件\n",
"data = pd.read_csv('./data/load_original.csv')\n",
"\n",
"# 打印原始数据的前几行以检查格式\n",
"print(\"原始数据:\")\n",
"print(data.head())\n",
"\n",
"# 定义一个函数来清理数值列\n",
"def clean_numeric_column(column):\n",
" # 将逗号替换为小数点,并转换为浮点数\n",
" return column.apply(lambda x: float(str(x).replace(',', '.')) if isinstance(x, str) else x)\n",
"\n",
"# 获取所有需要清理的数值列(假设从 'h1' 到 'h24'\n",
"numeric_columns = ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7', 'h8', 'h9', 'h10',\n",
" 'h11', 'h12', 'h13', 'h14', 'h15', 'h16', 'h17', 'h18', 'h19',\n",
" 'h20', 'h21', 'h22', 'h23', 'h24']\n",
"\n",
"# 对每个数值列应用清理函数\n",
"for col in numeric_columns:\n",
" data[col] = clean_numeric_column(data[col])\n",
"\n",
"# 打印清理后的数据\n",
"print(\"\\n清理后的数据\")\n",
"print(data.head())"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2025-02-09T19:59:53.416168Z",
"end_time": "2025-02-09T19:59:53.802570Z"
}
}
},
{
"cell_type": "code",
"execution_count": 200,
"metadata": {
"ExecuteTime": {
"start_time": "2025-02-09T19:59:53.802570Z",
"end_time": "2025-02-09T19:59:53.806323Z"
}
},
"outputs": [],
"source": [
"use_data = data[data['zone_id']==1].drop(columns=data.columns[:4])"
]
},
{
"cell_type": "code",
"execution_count": 201,
"outputs": [],
"source": [
"user_data_flatten=use_data.values.flatten()"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2025-02-09T19:59:53.807321Z",
"end_time": "2025-02-09T19:59:53.812420Z"
}
}
},
{
"cell_type": "code",
"execution_count": 202,
"outputs": [
{
"data": {
"text/plain": "array([16.853, 16.45 , 16.517, ..., nan, nan, nan])"
},
"execution_count": 202,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"user_data_flatten"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2025-02-09T19:59:53.813420Z",
"end_time": "2025-02-09T19:59:53.818353Z"
}
}
},
{
"cell_type": "code",
"execution_count": 203,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"39600\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\liuhao\\AppData\\Local\\Temp\\ipykernel_18300\\3270028511.py:6: FutureWarning: 'T' is deprecated and will be removed in a future version, please use 'min' instead.\n",
" time_index = pd.date_range(start=start_date, periods=len(use_data.values.flatten()), freq='15T')\n"
]
}
],
"source": [
"import pandas as pd\n",
"\n",
"# 定义起始日期和时间\n",
"start_date = '2004-01-01 00:00:00'\n",
"# 使用 pd.date_range 生成时间索引\n",
"time_index = pd.date_range(start=start_date, periods=len(use_data.values.flatten()), freq='15T')\n",
"\n",
"# 打印生成的时间索引\n",
"print(len(time_index))"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2025-02-09T19:59:53.819353Z",
"end_time": "2025-02-09T19:59:53.822932Z"
}
}
},
{
"cell_type": "code",
"execution_count": 204,
"outputs": [
{
"data": {
"text/plain": "DatetimeIndex(['2004-01-01 00:00:00', '2004-01-01 00:15:00',\n '2004-01-01 00:30:00', '2004-01-01 00:45:00',\n '2004-01-01 01:00:00', '2004-01-01 01:15:00',\n '2004-01-01 01:30:00', '2004-01-01 01:45:00',\n '2004-01-01 02:00:00', '2004-01-01 02:15:00',\n ...\n '2005-02-16 09:30:00', '2005-02-16 09:45:00',\n '2005-02-16 10:00:00', '2005-02-16 10:15:00',\n '2005-02-16 10:30:00', '2005-02-16 10:45:00',\n '2005-02-16 11:00:00', '2005-02-16 11:15:00',\n '2005-02-16 11:30:00', '2005-02-16 11:45:00'],\n dtype='datetime64[ns]', length=39600, freq='15min')"
},
"execution_count": 204,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"time_index"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2025-02-09T19:59:53.822932Z",
"end_time": "2025-02-09T19:59:53.827720Z"
}
}
},
{
"cell_type": "code",
"execution_count": 205,
"outputs": [
{
"data": {
"text/plain": "array([16.853, 16.45 , 16.517, ..., nan, nan, nan])"
},
"execution_count": 205,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"use_data.values.flatten()"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2025-02-09T19:59:53.828720Z",
"end_time": "2025-02-09T19:59:53.832633Z"
}
}
},
{
"cell_type": "code",
"execution_count": 206,
"outputs": [],
"source": [
"# 展平数据并创建Series\n",
"data_series = pd.Series(use_data.values.flatten(), index=time_index)\n"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"start_time": "2025-02-09T19:59:53.832633Z",
"end_time": "2025-02-09T19:59:53.880694Z"
}
}
},
{
"cell_type": "code",
"execution_count": 207,
"metadata": {
"ExecuteTime": {
"start_time": "2025-02-09T19:59:53.837441Z",
"end_time": "2025-02-09T19:59:53.888760Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" index 0\n",
"0 2004-01-01 00:00:00 66.693\n",
"1 2004-01-01 01:00:00 72.720\n",
"2 2004-01-01 02:00:00 72.185\n",
"3 2004-01-01 03:00:00 56.217\n",
"4 2004-01-01 04:00:00 67.324\n",
"... ... ...\n",
"9895 2005-02-16 07:00:00 0.000\n",
"9896 2005-02-16 08:00:00 0.000\n",
"9897 2005-02-16 09:00:00 0.000\n",
"9898 2005-02-16 10:00:00 0.000\n",
"9899 2005-02-16 11:00:00 0.000\n",
"\n",
"[9900 rows x 2 columns]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\liuhao\\AppData\\Local\\Temp\\ipykernel_18300\\3846816627.py:2: FutureWarning: 'H' is deprecated and will be removed in a future version, please use 'h' instead.\n",
" hourly_data = data_series.resample('H').sum().to_frame().reset_index()\n"
]
}
],
"source": [
"# 重采样为每小时,并对每小时的数据进行求和\n",
"hourly_data = data_series.resample('H').sum().to_frame().reset_index()\n",
"\n",
"# 打印结果\n",
"print(hourly_data)"
]
},
{
"cell_type": "code",
"execution_count": 208,
"metadata": {
"ExecuteTime": {
"start_time": "2025-02-09T19:59:53.847029Z",
"end_time": "2025-02-09T19:59:53.888760Z"
}
},
"outputs": [],
"source": [
"hourly_data.columns = ['time', 'power']"
]
},
{
"cell_type": "code",
"execution_count": 209,
"metadata": {
"ExecuteTime": {
"start_time": "2025-02-09T19:59:53.850505Z",
"end_time": "2025-02-09T19:59:53.888760Z"
}
},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 210,
"metadata": {
"ExecuteTime": {
"start_time": "2025-02-09T19:59:53.853687Z",
"end_time": "2025-02-09T19:59:53.949802Z"
}
},
"outputs": [
{
"data": {
"text/plain": "[<matplotlib.lines.Line2D at 0x2d37c1168d0>]"
},
"execution_count": 210,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": "<Figure size 640x480 with 1 Axes>",
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/TGe4hAAAACXBIWXMAAA9hAAAPYQGoP6dpAACv2ElEQVR4nO2dd5xU1fn/P9N3F9hdlrasgqKioAIWBFETCxjFEk1MMSGJMX41RY3GVL+xJMYE0/wpxug3zZJYUjWJRo2CsUREqooigtIElpW2y/Yp9/fH7Llzzrnn3DJzZ+bOzvN+vXzJ3rlz75nbznM/TwsZhmGAIAiCIAgiQITLPQCCIAiCIAgZMlAIgiAIgggcZKAQBEEQBBE4yEAhCIIgCCJwkIFCEARBEETgIAOFIAiCIIjAQQYKQRAEQRCBgwwUgiAIgiACR7TcA8iHTCaDbdu2YdiwYQiFQuUeDkEQBEEQLjAMA/v27UNLSwvCYXuNpCINlG3btmHcuHHlHgZBEARBEHmwZcsW7L///rbrVKSBMmzYMADZH1hfX1/m0RAEQRAE4YaOjg6MGzfOnMftqEgDhbl16uvryUAhCIIgiArDTXgGBckSBEEQBBE4yEAhCIIgCCJwkIFCEARBEETgIAOFIAiCIIjAQQYKQRAEQRCBgwwUgiAIgiACBxkoBEEQBEEEDjJQCIIgCIIIHGSgEARBEAQROMhAIQiCIAgicJCBQhAEQRBE4CADhSAIgiCIwEEGSpWQzhj43Ysb8Pp77eUeCkEQBEE4UpHdjAnv/G3Fe7jpsTcBABtvObvMoyEIgiAIe0hBqRLWbN9X7iEQBEEQhGvIQKkSQqFyj4AgCIIg3EMGCkEQBEEQgYMMlCqBBBSCIAiikiADpUogFw9BEARRSZCBUiWEyEIhCIIgKggyUAiCIAiCCBxkoBAEQRAEETjIQKkSyMFDEARBVBJkoFQLZKEQBEEQFQQZKARBEARBBA4yUKqEECehpDNGGUdCEARBEM6QgVIl8FnGqUymfAMhCIIgCBeQgVIl8CEopKAQBEEQQYcMlCokmSYDhSAIggg2ZKBUIaSgVAbLNu7Gb154F4ZB54sgiOojWu4BEKWBt0koBqUy+NjdiwEAYxtqcfbUsWUeDUEQRGkhBaVKyHBv4Sly8VQUb+/YV+4hEARBlBwyUKoE3ighF09lQYoXQRDViGcD5fnnn8e5556LlpYWhEIhPProo9p1v/SlLyEUCuG2224Tlu/evRvz5s1DfX09Ghsbcckll6Czs9PrUAgPpLlJLpmmCa+SIMWLIIhqxLOB0tXVhWnTpuHOO++0Xe+RRx7Byy+/jJaWFstn8+bNwxtvvIGnn34ajz32GJ5//nlcdtllXodCeCCVIQWlUqGsK4IgqhHPQbJz587F3LlzbdfZunUrrrzySjz11FM4++yzhc/WrFmDJ598EkuXLsX06dMBAHfccQfOOuss/OxnP1MaNEThCDEoZKBUFOTiIQiiGvE9BiWTyeCzn/0svvnNb+KII46wfL548WI0NjaaxgkAzJkzB+FwGEuWLFFus6+vDx0dHcJ/hDd4NwG5DCoLUlAIgqhGfDdQfvzjHyMajeKrX/2q8vPW1laMHj1aWBaNRtHU1ITW1lbld+bPn4+Ghgbzv3Hjxvk97EEP79ahN/LKIkUxQwRBVCG+GijLly/H7bffjnvvvRchvvlLgVx77bVob283/9uyZYtv264WUhly8eTLu+93YsHCddjXmyzL/ul8EQRRjfhaqO2FF15AW1sbxo8fby5Lp9P4+te/jttuuw0bN25Ec3Mz2trahO+lUins3r0bzc3Nyu0mEgkkEgk/h1p1pKkOSt6ccdvzSKYNbG/vxfyPTin5/inriiCIasRXA+Wzn/0s5syZIyw744wz8NnPfhYXX3wxAGDWrFnYu3cvli9fjmOPPRYAsGjRImQyGcycOdPP4RAcaaqDkjcsBmTFpj1l2T8ZlARBVCOeDZTOzk6sX7/e/HvDhg1YtWoVmpqaMH78eIwYMUJYPxaLobm5GYcddhgAYPLkyTjzzDNx6aWX4u6770YymcQVV1yBCy+8kDJ4igjvJkhSDEpeRCNZtyXrjeOnG9MOcvEQBFGNeI5BWbZsGY4++mgcffTRAIBrrrkGRx99NG644QbX23jggQcwadIkzJ49G2eddRZOOukk/OpXv/I6FMIDfKG2NL2R50U0HIJhGLjwVy9j3m+WFLWJn2FQUDNBENWNZwXllFNO8fRg3rhxo2VZU1MTHnzwQa+7JgogTc0CCyYSDqG1oxdLNuwGAHT0ptBQGyvKvoTmjmRQEgRRhVAvniqBV1DIZZAf0XBYNBaKeBj5OKF+CpIlCKIKIQOlSqBmgYUTCYcEY6GYhoPYfZoMFIIgqg8yUKoE3iihyqT5EY2E0JtMm38XM/03Tb2TCIKocshAqRL4OihpikFxTX8qd6wi4RIaKAYZlARBVDdkoFQJpKDkR09/ziCJhkPo6c8ZJUU1UNKUxUMQRHVDBkqVQDEo+dGdTJn/zhhAD6eg9KeKdxxJQSEIotohA6VKSFMvnrzo6uMNkoxgoBRTQcnwWTwpUlAIgqg+yECpEtKUFZIXvIunP5VBb3/pY1D6UmmbNQmCIAYnZKBUCaSg5Ed3f87F05fOCH8XM82YP199STIoCYKoPshAqRL4QEuKQXFPN6eY9CXT6EnyQbJFjEHhzlEvKSgEQVQhZKBUCUJWCLl4XMMbKP1pKQaliLEhctYVGZUEQVQbZKBUCUIMCk12rhFcOqlMyeqgZKR+VxQoSxBEtUEGSpVAMSj50S0FyfZIikqxkDdNgbIEQVQbZKBUCbxRQt1x3WPr4ilRDAoA9JGCQhBElUEGSpWQFgq10WTnlh7JxVOqOiiygcK7lgiCIKoBMlCqBKEyKbl4XNMVgDooAGVeEQRRfZCBUiXwLp40uXhcs7c7af47lTHQ2ScqKsVCNkjIPiEIotogA6VKENJWycXjmt1dfcLf7T05g6WYMShyFo9hkIVCEER1QQZKFWAYYh0Nche4Z3dXv/B3h2CglE5BoTNGEES1QQZKFSDbI5Rm7J6dnaKBsrdMBoqsqBAEQQx2yEAJIGtb92HZxt2+bS8luXQqtZJsd38Kf1n+HvZIqkYxkRUUOe24WFgMlMo8ZQRBEHlDBkoAOeO25/Gxuxejtb3Xl+3Jk12lunhu/Psb+MafX8Xn711akv319KfNtOJwyPp5MlXEOigGKSgEQVQ3ZKAEmC17un3ZjuzSKWZwZzH5x6vbAACvbtlbkv3tGgiQjUfDaBoSt3xe1FL3FWpEEgRB+EW03AMg9Pg1ScnboanPHbsG4k9GDIlDIaBYXGd+QjEoBEFUO6SgBAzemPDrJVpWUChl1R0s/qRpSBzxqPVW6S+mi4fqoBAEUeWQghIw+NgDwyetw5KySpOdK3ZxBoqqf1EpK8mSgkIQRLVBCkrAEIyJIikoNNm5g2ULjdAoKCWtg0LnjCCIKoMMlIDBGw9+yfpyDAoZKO7oS2UzeGpikZIbKNZKskXbFUEQRCAhAyVg8G/Ofrl4rAqKL5sd9LDjFgmHUBNTxKAUMRtKtn3onBEEUW2QgRIwBAPFp0kpLWWbkLvAHRnOQKmNRSyfJ4vaLFDcNqleBEFUG2SgBAyhZ45Pk5I1i8eXzQ562PEPh0KoURkoRY1BEf8mA4UgiGqDDJSAwRslftVB6UvS23g+MCNBq6CUMIuHThlBENUGGSgBg1dQ/Kr42tWXEv6meAZ3MDdLJBxCbdxqoBQzBsVSXI/OGUEQVQYZKAGDN1D8qlTaKRkolRqDElKVcy0iZVVQKPOKIIgqhwyUgMHbJKriYPnQ1U8KSj4woyAixaAkBlKOyUAhCIIoHmSgBAw+9kAObs2Xzr5sPQ+mAviVvjzYYUZCWHLxDKvJFmAuahYPxaAQBFHlkIESMPj00pRPb+gsBmXowMRaxB53gwqzDkoohBquUNvQRPY4bmvvxZ3Pri/KvklBIQii2iEDJWDwNknSJwWFGSjszZ8mO3ewQNVoRFZQYua/731pY1H3zaBTRhBEtUE
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.plot(hourly_data.power[:500])"
]
},
{
"cell_type": "code",
"execution_count": 211,
"metadata": {
"ExecuteTime": {
"start_time": "2025-02-09T19:59:53.922540Z",
"end_time": "2025-02-09T19:59:53.950868Z"
}
},
"outputs": [],
"source": [
"hourly_data.to_csv('data/load_data_hourly.csv', index=False, encoding='utf-8-sig')"
]
},
{
"cell_type": "code",
"execution_count": 211,
"metadata": {
"ExecuteTime": {
"start_time": "2025-02-09T19:59:53.947223Z",
"end_time": "2025-02-09T19:59:53.950868Z"
}
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "py39",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.21"
}
},
"nbformat": 4,
"nbformat_minor": 2
}