import datetime as dt import os import wget import pygrib as pg import numpy as np import pandas as pd from logzero import logger import shutil import sys def check_newest_date()->str: """_summary_ 检查GFS下最新的文件,返回time字符串"YYYY-mm-DD HH:00:00"(整点更新) Returns: str: _description_ """ pass def fetch_files(now:str): """_summary_ Args: now (str): 时间戳,"YYYY-mm-dd HH:MM:SS" Returns: list: np.array: 返回时间跨度 """ if now is None: now = dt.datetime.now() else: now = dt.datetime.strptime(now, "%Y-$m-%d %H:%M:%S") logger.info(now) hour = now.hour - 8 # GFS是UTC+0,datetime的时间是UTC+8, 所以要取一下值 logger.info(f"对应的UTC+0 时间:{hour}") today = dt.date.today() yesterday = today - dt.timedelta(days=1) today_str = dt.datetime.strftime(today, "%Y%m%d") yesterday_str = dt.datetime.strftime(yesterday, "%Y%m%d") if hour <= 6: logger.info("UTC+0 7点以前取前一天最后一次更新的结果") start_date = dt.datetime(yesterday.year, yesterday.month, yesterday.day, 19, 0, 0) if not os.path.exists(f'./data/{yesterday_str}'): os.mkdir(f'./data/{yesterday_str}') date = yesterday_str for h in range(1, 5 * 24+1): h_str = "{:03d}".format(h) url = f"https://nomads.ncep.noaa.gov/cgi-bin/filter_gfs_0p25_1hr.pl?dir=%2Fgfs.{date}%2F18%2Fatmos&file=gfs.t18z.pgrb2.0p25.f{h_str}&var_DSWRF=on&var_TMP=on&lev_surface=on" logger.info(url) out_path = f"./data/{date}/gfs.t18z.pgrb2.0p25.f{h_str}" if not os.path.exists(out_path): wget.download(url, out=out_path) logger.info(f"download file {out_path} finished") else: logger.info("UTC+0 7点以后调用, 则取今天00点的预测结果") if not os.path.exists(f'./data/{today_str}'): os.mkdir(f'./data/{today_str}') start_date = dt.datetime(today.year, today.month, today.day, 1, 0, 0) date = today_str for h in range(1, 5 * 24+1): h_str = "{:03d}".format(h) url = f"https://nomads.ncep.noaa.gov/cgi-bin/filter_gfs_0p25_1hr.pl?dir=%2Fgfs.{date}%2F00%2Fatmos&file=gfs.t00z.pgrb2.0p25.f{h_str}&var_DSWRF=on&var_TMP=on&lev_surface=on" logger.info(url) out_path = f"./data/{date}/gfs.t18z.pgrb2.0p25.f{h_str}" if not os.path.exists(out_path): wget.download(url, out=out_path) logger.info(f"download file {out_path} finished") end_date = start_date + dt.timedelta(hours=119) time_range = pd.date_range(start_date+dt.timedelta(hours=8), end_date+dt.timedelta(hours=8), freq='H').astype(str).to_list() return time_range, date def trans_data(date:str) -> np.array: file_path = f"./data/{date}/" files = [f"{file_path}/{x}" for x in os.listdir(file_path) if x.startswith('gfs')] rad_list = list() tmp_list = list() for file in files: data = pg.open(file) try: grb = data.select(name="Downward short-wave radiation flux")[0] values = grb.values except Exception as e: logger.info(f"error occurs when read {file}, {e}") values = np.full([721, 1440], np.nan) finally: rad_list.append(values) try: grb_tmp = data.select(name="Temperature")[0] values_tmp = grb_tmp.values except Exception as e: logger.info(f"error occurs when read {file}, {e}") values_tmp = np.full([721, 1440], np.nan) finally: tmp_list.append(values_tmp) data.close() rst_rad = np.asarray(rad_list).tolist() rst_tmp = np.asarray(tmp_list).tolist() # np.save(f"./data/{date}.npy", rst) # TODO: 在这里添加写入数据库的代码 # shutil.rmtree(f"./data/{date}/") return rst_rad, rst_tmp if __name__ == '__main__': time_range, date = fetch_files(None) rad, tmp = trans_data(date) print(rad.shape, tmp.shape)