15 KiB
15 KiB
In [34]:
import numpy as np import netCDF4 as nc import os import pandas as pd import datetime as dt nc_path = "./data/" nc_files = [f"{nc_path}{x}" for x in os.listdir(nc_path) if x.endswith('.nc')] print(nc_files)
['./data/iowa.nc']
In [35]:
stations = pd.read_csv('./data/station.csv') stations.head() stations.dropna(inplace=True)
In [36]:
nc_data = nc.Dataset(nc_files[0]) # 读文件 print(list(nc_data.variables.keys())) # keys
['longitude', 'latitude', 'time', 't2m']
In [37]:
lat = np.asarray(nc_data['latitude'][:]).tolist() lon = np.asarray(nc_data['longitude'][:]).tolist() len(lat), len(lon)
Out[37]:
(13, 31)
In [38]:
np.asarray(nc_data['time'][:])
Out[38]:
array([1016832, 1016833, 1016834, ..., 1025613, 1025614, 1025615])
In [39]:
nc_data['time']
Out[39]:
<class 'netCDF4._netCDF4.Variable'> int32 time(time) units: hours since 1900-01-01 00:00:00.0 long_name: time calendar: gregorian unlimited dimensions: current shape = (8784,) filling on, default _FillValue of -2147483647 used
In [40]:
start_date = dt.datetime(1900, 1, 1) start_date
Out[40]:
datetime.datetime(1900, 1, 1, 0, 0)
In [41]:
np.asarray(nc_data['t2m'][:])
Out[41]:
array([[[267.10449818, 266.56524854, 265.81574602, ..., 266.35826383, 266.94435738, 267.10231939], [266.75916053, 266.21773211, 265.4017766 , ..., 266.49334859, 267.59908269, 267.86380524], [266.70360148, 265.86258993, 265.06733289, ..., 266.6197182 , 267.66117811, 267.95640366], ..., [270.23759303, 270.20055367, 269.91731143, ..., 270.11231282, 269.99683714, 269.90750689], [271.34223774, 271.32153927, 271.25726507, ..., 270.55569585, 270.37376718, 270.24086121], [271.76274352, 271.85643134, 271.79215714, ..., 271.32371806, 270.86617291, 270.71583665]], [[267.45746158, 267.17966631, 266.73846206, ..., 266.19376546, 266.66983029, 266.83541806], [267.5337191 , 267.25374505, 266.67418786, ..., 266.32231386, 267.30821471, 267.57620544], [267.67425083, 267.02933005, 266.40183956, ..., 266.40183956, 267.33218136, 267.62522813], ..., [269.75172367, 269.78549485, 269.62317527, ..., 270.06873709, 269.95652959, 269.69289643], [270.76159117, 270.80080932, 270.75832299, ..., 270.33237024, 270.18748095, 269.91186447], [271.10692881, 271.19516966, 271.15268333, ..., 270.63848973, 270.47290197, 270.30513541]], [[267.29841017, 267.25483444, 267.06745881, ..., 265.76345515, 266.3157775 , 266.53692432], [267.55441757, 267.51084184, 267.31801925, ..., 266.01728376, 266.99338007, 267.2613708 ], [267.77338561, 267.59363573, 267.37684648, ..., 266.09898825, 267.00318461, 267.28860563], ..., [269.32359214, 269.41945874, 269.29091034, ..., 270.08507799, 269.90750689, 269.55781167], [270.24957636, 270.34871114, 270.35088993, ..., 270.20600063, 270.01317803, 269.67110857], [270.62105944, 270.76703813, 270.76921692, ..., 270.43695199, 270.18857034, 270.01099925]], ..., [[271.48168007, 271.36729379, 271.24637114, ..., 270.89776532, 270.98164859, 270.96857587], [271.81830257, 271.70282689, 271.56447396, ..., 271.43701495, 271.53723913, 271.69084357], [272.18107051, 271.92179493, 271.77690563, ..., 272.04271757, 272.15165689, 272.3107083 ], ..., [275.28910932, 274.99388376, 274.74441272, ..., 276.07565121, 276.18350114, 276.35344648], [275.60285456, 275.36536684, 275.34575777, ..., 276.91993095, 276.99618847, 277.08551871], [275.90788466, 275.95254978, 275.9329407 , ..., 278.0169499 , 277.9058318 , 277.90365301]], [[269.88462964, 269.78658425, 269.64496313, ..., 269.72231005, 269.85957359, 269.92602658], [270.1308325 , 270.03278711, 269.93147354, ..., 270.19946427, 270.40427019, 270.72237301], [270.45656107, 270.21471578, 270.10904464, ..., 270.84329566, 271.05463794, 271.37056197], ..., [273.1440941 , 273.02535024, 272.86738823, ..., 274.12127981, 274.27488425, 274.4742432 ], [273.5765832 , 273.50795143, 273.58638774, ..., 274.72698243, 274.85553083, 275.00150951], [273.94697689, 274.12127981, 274.19971612, ..., 275.72704539, 275.64207272, 275.67148634]], [[269.74954488, 269.63951617, 269.51096777, ..., 268.99459539, 269.11442864, 269.11224985], [269.87046752, 269.76043881, 269.70596915, ..., 269.16345133, 269.50443141, 269.90097053], [269.85630541, 269.81381908, 269.78222667, ..., 269.61446012, 269.94672505, 270.34435357], ..., [273.29007279, 273.09833959, 273.18440165, ..., 273.11794867, 273.21381527, 273.48943175], [274.379466 , 274.08641922, 273.44258784, ..., 273.57331502, 273.64957255, 273.81298153], [273.89904359, 273.55370595, 272.90769578, ..., 274.27815243, 274.27488425, 274.25309638]]])
In [42]:
np.asarray(nc_data['t2m'][:]).shape
Out[42]:
(8784, 13, 31)
In [43]:
def find_closest_number_index(arr, target): arr.sort() # 对数组进行排序 left, right = 0, len(arr) - 1 closest_index = 0 while left <= right: mid = left + (right - left) // 2 # 更新最接近数值的索引 if abs(arr[mid] - target) < abs(arr[closest_index] - target): closest_index = mid # 根据目标值与中间值的比较,决定搜索左半部分还是右半部分 if arr[mid] < target: left = mid + 1 elif arr[mid] > target: right = mid - 1 else: return mid # 如果找到精确匹配,直接返回索引 return closest_index
In [44]:
target_cols = ['t2m'] times = np.asarray(nc_data['time'][:]).tolist() times = [start_date + dt.timedelta(hours=x) for x in times]
In [45]:
time_index = pd.to_datetime(times * stations.shape[0]) stations['lat'] = stations['lat'].astype(float) stations['lon'] = stations['lon'].astype(float) stations_out = stations.copy()
In [46]:
lon_index = list() lat_index = list() best_lons = list() best_lats = list() for i in range(stations.shape[0]): # 获得观测点所在的经纬度 stat_lat = stations.iloc[i]['lat'] stat_lon = stations.iloc[i]['lon'] # 获得观测点所在经纬度对应最近的网格数据的经纬度 best_lat_index = find_closest_number_index(lat, stat_lat) best_lon_index = find_closest_number_index(lon, stat_lon) lat_index.append(best_lat_index) lon_index.append(best_lon_index) best_lons.append(lon[best_lon_index]) best_lats.append(lat[best_lat_index]) stations_out['lon_index'] = lon_index stations_out['lat_index'] = lat_index stations_out['best_lon'] = best_lons stations_out['best_lat'] = best_lats stations_out
Out[46]:
state | lon | lat | lon_index | lat_index | best_lon | best_lat | |
---|---|---|---|---|---|---|---|
0 | iowa | 93.5 | 41.1 | 30 | 2 | -89.0 | 41.0 |
In [47]:
for tgt in target_cols: tmp = np.asarray(nc_data[tgt][:]) stations_out[tgt] = stations_out.apply(lambda x: tmp[:, len(lat) - x['lat_index'] - 1, x['lon_index']].squeeze(),axis=1)
In [48]:
result_df = stations_out.explode(column=target_cols).reset_index(drop=True) result_df['date_time'] = time_index result_df[['date_time', 't2m']].to_csv('./temper.csv', index=False, encoding='utf-8-sig')