pv_forecasting/download.py

110 lines
4.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import datetime as dt
import os
import wget
import pygrib as pg
import numpy as np
import pandas as pd
from logzero import logger
import shutil
import sys
def check_newest_date()->str:
"""_summary_
检查GFS下最新的文件返回time字符串"YYYY-mm-DD HH:00:00"(整点更新)
Returns:
str: _description_
"""
pass
def fetch_files(now:str):
"""_summary_
Args:
now (str): 时间戳,"YYYY-mm-dd HH:MM:SS"
Returns:
list: np.array: 返回时间跨度
"""
if now is None:
now = dt.datetime.now()
else:
now = dt.datetime.strptime(now, "%Y-$m-%d %H:%M:%S")
logger.info(now)
hour = now.hour - 8 # GFS是UTC+0datetime的时间是UTC+8, 所以要取一下值
logger.info(f"对应的UTC+0 时间:{hour}")
today = dt.date.today()
yesterday = today - dt.timedelta(days=1)
today_str = dt.datetime.strftime(today, "%Y%m%d")
yesterday_str = dt.datetime.strftime(yesterday, "%Y%m%d")
if hour <= 6:
logger.info("UTC+0 7点以前取前一天最后一次更新的结果")
start_date = dt.datetime(yesterday.year, yesterday.month, yesterday.day, 19, 0, 0)
if not os.path.exists(f'./data/{yesterday_str}'):
os.mkdir(f'./data/{yesterday_str}')
date = yesterday_str
for h in range(1, 5 * 24+1):
h_str = "{:03d}".format(h)
url = f"https://nomads.ncep.noaa.gov/cgi-bin/filter_gfs_0p25_1hr.pl?dir=%2Fgfs.{date}%2F18%2Fatmos&file=gfs.t18z.pgrb2.0p25.f{h_str}&var_DSWRF=on&var_TMP=on&lev_surface=on"
logger.info(url)
out_path = f"./data/{date}/gfs.t18z.pgrb2.0p25.f{h_str}"
if not os.path.exists(out_path):
wget.download(url, out=out_path)
logger.info(f"download file {out_path} finished")
else:
logger.info("UTC+0 7点以后调用, 则取今天00点的预测结果")
if not os.path.exists(f'./data/{today_str}'):
os.mkdir(f'./data/{today_str}')
start_date = dt.datetime(today.year, today.month, today.day, 1, 0, 0)
date = today_str
for h in range(1, 5 * 24+1):
h_str = "{:03d}".format(h)
url = f"https://nomads.ncep.noaa.gov/cgi-bin/filter_gfs_0p25_1hr.pl?dir=%2Fgfs.{date}%2F00%2Fatmos&file=gfs.t00z.pgrb2.0p25.f{h_str}&var_DSWRF=on&var_TMP=on&lev_surface=on"
logger.info(url)
out_path = f"./data/{date}/gfs.t18z.pgrb2.0p25.f{h_str}"
if not os.path.exists(out_path):
wget.download(url, out=out_path)
logger.info(f"download file {out_path} finished")
end_date = start_date + dt.timedelta(hours=119)
time_range = pd.date_range(start_date+dt.timedelta(hours=8), end_date+dt.timedelta(hours=8), freq='H').astype(str).to_list()
return time_range, date
def trans_data(date:str) -> np.array:
file_path = f"./data/{date}/"
files = [f"{file_path}/{x}" for x in os.listdir(file_path) if x.startswith('gfs')]
rad_list = list()
tmp_list = list()
for file in files:
data = pg.open(file)
try:
grb = data.select(name="Downward short-wave radiation flux")[0]
values = grb.values
except Exception as e:
logger.info(f"error occurs when read {file}, {e}")
values = np.full([721, 1440], np.nan)
finally:
rad_list.append(values)
try:
grb_tmp = data.select(name="Temperature")[0]
values_tmp = grb_tmp.values
except Exception as e:
logger.info(f"error occurs when read {file}, {e}")
values_tmp = np.full([721, 1440], np.nan)
finally:
tmp_list.append(values_tmp)
data.close()
rst_rad = np.asarray(rad_list).tolist()
rst_tmp = np.asarray(tmp_list).tolist()
# np.save(f"./data/{date}.npy", rst)
# TODO: 在这里添加写入数据库的代码
# shutil.rmtree(f"./data/{date}/")
return rst_rad, rst_tmp
if __name__ == '__main__':
time_range, date = fetch_files(None)
rad, tmp = trans_data(date)
print(rad.shape, tmp.shape)