pv_forecasting/download.py

110 lines
4.0 KiB
Python
Raw Permalink Normal View History

2023-08-23 16:00:06 +08:00
import datetime as dt
import os
import wget
import pygrib as pg
import numpy as np
import pandas as pd
from logzero import logger
import shutil
import sys
def check_newest_date()->str:
"""_summary_
检查GFS下最新的文件返回time字符串"YYYY-mm-DD HH:00:00"(整点更新)
Returns:
str: _description_
"""
pass
def fetch_files(now:str):
"""_summary_
Args:
now (str): 时间戳"YYYY-mm-dd HH:MM:SS"
Returns:
list: np.array: 返回时间跨度
"""
if now is None:
now = dt.datetime.now()
else:
now = dt.datetime.strptime(now, "%Y-$m-%d %H:%M:%S")
logger.info(now)
hour = now.hour - 8 # GFS是UTC+0datetime的时间是UTC+8, 所以要取一下值
logger.info(f"对应的UTC+0 时间:{hour}")
today = dt.date.today()
yesterday = today - dt.timedelta(days=1)
today_str = dt.datetime.strftime(today, "%Y%m%d")
yesterday_str = dt.datetime.strftime(yesterday, "%Y%m%d")
if hour <= 6:
logger.info("UTC+0 7点以前取前一天最后一次更新的结果")
start_date = dt.datetime(yesterday.year, yesterday.month, yesterday.day, 19, 0, 0)
if not os.path.exists(f'./data/{yesterday_str}'):
os.mkdir(f'./data/{yesterday_str}')
date = yesterday_str
for h in range(1, 5 * 24+1):
h_str = "{:03d}".format(h)
2023-08-31 16:19:25 +08:00
url = f"https://nomads.ncep.noaa.gov/cgi-bin/filter_gfs_0p25_1hr.pl?dir=%2Fgfs.{date}%2F18%2Fatmos&file=gfs.t18z.pgrb2.0p25.f{h_str}&var_DSWRF=on&var_TMP=on&lev_surface=on"
2023-08-23 16:00:06 +08:00
logger.info(url)
out_path = f"./data/{date}/gfs.t18z.pgrb2.0p25.f{h_str}"
if not os.path.exists(out_path):
wget.download(url, out=out_path)
logger.info(f"download file {out_path} finished")
else:
logger.info("UTC+0 7点以后调用, 则取今天00点的预测结果")
if not os.path.exists(f'./data/{today_str}'):
os.mkdir(f'./data/{today_str}')
start_date = dt.datetime(today.year, today.month, today.day, 1, 0, 0)
date = today_str
for h in range(1, 5 * 24+1):
h_str = "{:03d}".format(h)
2023-08-31 16:19:25 +08:00
url = f"https://nomads.ncep.noaa.gov/cgi-bin/filter_gfs_0p25_1hr.pl?dir=%2Fgfs.{date}%2F00%2Fatmos&file=gfs.t00z.pgrb2.0p25.f{h_str}&var_DSWRF=on&var_TMP=on&lev_surface=on"
2023-08-23 16:00:06 +08:00
logger.info(url)
out_path = f"./data/{date}/gfs.t18z.pgrb2.0p25.f{h_str}"
if not os.path.exists(out_path):
wget.download(url, out=out_path)
logger.info(f"download file {out_path} finished")
2023-08-24 09:48:51 +08:00
end_date = start_date + dt.timedelta(hours=119)
2023-08-23 16:00:06 +08:00
time_range = pd.date_range(start_date+dt.timedelta(hours=8), end_date+dt.timedelta(hours=8), freq='H').astype(str).to_list()
return time_range, date
def trans_data(date:str) -> np.array:
file_path = f"./data/{date}/"
files = [f"{file_path}/{x}" for x in os.listdir(file_path) if x.startswith('gfs')]
2023-08-31 16:19:25 +08:00
rad_list = list()
tmp_list = list()
2023-08-23 16:00:06 +08:00
for file in files:
data = pg.open(file)
try:
grb = data.select(name="Downward short-wave radiation flux")[0]
values = grb.values
except Exception as e:
logger.info(f"error occurs when read {file}, {e}")
values = np.full([721, 1440], np.nan)
finally:
2023-08-31 16:19:25 +08:00
rad_list.append(values)
try:
grb_tmp = data.select(name="Temperature")[0]
values_tmp = grb_tmp.values
except Exception as e:
logger.info(f"error occurs when read {file}, {e}")
values_tmp = np.full([721, 1440], np.nan)
finally:
tmp_list.append(values_tmp)
2023-08-23 16:00:06 +08:00
data.close()
2023-08-31 16:19:25 +08:00
rst_rad = np.asarray(rad_list).tolist()
rst_tmp = np.asarray(tmp_list).tolist()
2023-08-23 16:00:06 +08:00
# np.save(f"./data/{date}.npy", rst)
# TODO: 在这里添加写入数据库的代码
2023-08-31 16:19:25 +08:00
# shutil.rmtree(f"./data/{date}/")
return rst_rad, rst_tmp
2023-08-23 16:00:06 +08:00
if __name__ == '__main__':
time_range, date = fetch_files(None)
2023-08-31 16:19:25 +08:00
rad, tmp = trans_data(date)
print(rad.shape, tmp.shape)