404 lines
15 KiB
Plaintext
404 lines
15 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 34,
|
||
|
"id": "initial_id",
|
||
|
"metadata": {
|
||
|
"collapsed": true,
|
||
|
"ExecuteTime": {
|
||
|
"end_time": "2024-06-11T06:41:37.671133500Z",
|
||
|
"start_time": "2024-06-11T06:41:37.652982600Z"
|
||
|
}
|
||
|
},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"['./data/iowa.nc']\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"import numpy as np\n",
|
||
|
"import netCDF4 as nc\n",
|
||
|
"import os\n",
|
||
|
"import pandas as pd\n",
|
||
|
"import datetime as dt\n",
|
||
|
"nc_path = \"./data/\"\n",
|
||
|
"nc_files = [f\"{nc_path}{x}\" for x in os.listdir(nc_path) if x.endswith('.nc')]\n",
|
||
|
"print(nc_files)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 35,
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"stations = pd.read_csv('./data/station.csv')\n",
|
||
|
"stations.head()\n",
|
||
|
"stations.dropna(inplace=True)"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"collapsed": false,
|
||
|
"ExecuteTime": {
|
||
|
"end_time": "2024-06-11T06:41:37.722796200Z",
|
||
|
"start_time": "2024-06-11T06:41:37.663028100Z"
|
||
|
}
|
||
|
},
|
||
|
"id": "9c4ed0ec8dc7ff7d"
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 36,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"['longitude', 'latitude', 'time', 't2m']\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"nc_data = nc.Dataset(nc_files[0]) # 读文件\n",
|
||
|
"print(list(nc_data.variables.keys())) # keys"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"collapsed": false,
|
||
|
"ExecuteTime": {
|
||
|
"end_time": "2024-06-11T06:41:37.730197500Z",
|
||
|
"start_time": "2024-06-11T06:41:37.678192900Z"
|
||
|
}
|
||
|
},
|
||
|
"id": "a88ddc29d8fa3d06"
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 37,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": "(13, 31)"
|
||
|
},
|
||
|
"execution_count": 37,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"lat = np.asarray(nc_data['latitude'][:]).tolist()\n",
|
||
|
"lon = np.asarray(nc_data['longitude'][:]).tolist()\n",
|
||
|
"len(lat), len(lon)"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"collapsed": false,
|
||
|
"ExecuteTime": {
|
||
|
"end_time": "2024-06-11T06:41:37.730197500Z",
|
||
|
"start_time": "2024-06-11T06:41:37.693749900Z"
|
||
|
}
|
||
|
},
|
||
|
"id": "4ec1510ef5baa4b9"
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 38,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": "array([1016832, 1016833, 1016834, ..., 1025613, 1025614, 1025615])"
|
||
|
},
|
||
|
"execution_count": 38,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"np.asarray(nc_data['time'][:])"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"collapsed": false,
|
||
|
"ExecuteTime": {
|
||
|
"end_time": "2024-06-11T06:41:37.731222200Z",
|
||
|
"start_time": "2024-06-11T06:41:37.710255800Z"
|
||
|
}
|
||
|
},
|
||
|
"id": "9e1cfd59ea63dee"
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 39,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": "<class 'netCDF4._netCDF4.Variable'>\nint32 time(time)\n units: hours since 1900-01-01 00:00:00.0\n long_name: time\n calendar: gregorian\nunlimited dimensions: \ncurrent shape = (8784,)\nfilling on, default _FillValue of -2147483647 used"
|
||
|
},
|
||
|
"execution_count": 39,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"nc_data['time']"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"collapsed": false,
|
||
|
"ExecuteTime": {
|
||
|
"end_time": "2024-06-11T06:41:37.739215300Z",
|
||
|
"start_time": "2024-06-11T06:41:37.723796300Z"
|
||
|
}
|
||
|
},
|
||
|
"id": "305b595052773792"
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 40,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": "datetime.datetime(1900, 1, 1, 0, 0)"
|
||
|
},
|
||
|
"execution_count": 40,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"start_date = dt.datetime(1900, 1, 1)\n",
|
||
|
"start_date"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"collapsed": false,
|
||
|
"ExecuteTime": {
|
||
|
"end_time": "2024-06-11T06:41:37.788384900Z",
|
||
|
"start_time": "2024-06-11T06:41:37.739215300Z"
|
||
|
}
|
||
|
},
|
||
|
"id": "9dc1b63dbd6e5847"
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 41,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": "array([[[267.10449818, 266.56524854, 265.81574602, ..., 266.35826383,\n 266.94435738, 267.10231939],\n [266.75916053, 266.21773211, 265.4017766 , ..., 266.49334859,\n 267.59908269, 267.86380524],\n [266.70360148, 265.86258993, 265.06733289, ..., 266.6197182 ,\n 267.66117811, 267.95640366],\n ...,\n [270.23759303, 270.20055367, 269.91731143, ..., 270.11231282,\n 269.99683714, 269.90750689],\n [271.34223774, 271.32153927, 271.25726507, ..., 270.55569585,\n 270.37376718, 270.24086121],\n [271.76274352, 271.85643134, 271.79215714, ..., 271.32371806,\n 270.86617291, 270.71583665]],\n\n [[267.45746158, 267.17966631, 266.73846206, ..., 266.19376546,\n 266.66983029, 266.83541806],\n [267.5337191 , 267.25374505, 266.67418786, ..., 266.32231386,\n 267.30821471, 267.57620544],\n [267.67425083, 267.02933005, 266.40183956, ..., 266.40183956,\n 267.33218136, 267.62522813],\n ...,\n [269.75172367, 269.78549485, 269.62317527, ..., 270.06873709,\n 269.95652959, 269.69289643],\n [270.76159117, 270.80080932, 270.75832299, ..., 270.33237024,\n 270.18748095, 269.91186447],\n [271.10692881, 271.19516966, 271.15268333, ..., 270.63848973,\n 270.47290197, 270.30513541]],\n\n [[267.29841017, 267.25483444, 267.06745881, ..., 265.76345515,\n 266.3157775 , 266.53692432],\n [267.55441757, 267.51084184, 267.31801925, ..., 266.01728376,\n 266.99338007, 267.2613708 ],\n [267.77338561, 267.59363573, 267.37684648, ..., 266.09898825,\n 267.00318461, 267.28860563],\n ...,\n [269.32359214, 269.41945874, 269.29091034, ..., 270.08507799,\n 269.90750689, 269.55781167],\n [270.24957636, 270.34871114, 270.35088993, ..., 270.20600063,\n 270.01317803, 269.67110857],\n [270.62105944, 270.76703813, 270.76921692, ..., 270.43695199,\n 270.18857034, 270.01099925]],\n\n ...,\n\n [[271.48168007, 271.36729379, 271.24637114, ..., 270.89776532,\n 270.98164859, 270.96857587],\n [271.81830257, 271.70282689, 271.56447396, ..., 271.43701495,\n 271.53723913, 271.69084357],\n [272.18107051, 271.92179493, 271.77690563, ..., 272.04271757,\n 272.15165689, 272.3107083 ],\n ...,\n [275.28910932, 274.99388376, 274.74441272, ..., 276.07565121,\n 276.18350114, 276.35344648],\n [275.60285456, 275.36536684, 275.34575777, ..., 276.91993095,\n 276.99618847, 277.08551871],\n [275.90788466, 275.95254978, 275.9329407 , ..., 278.0169499 ,\n 277.9058318 , 277.90365301]],\n\n [[269.88462964, 269.78658425, 269.64496313, ..., 269.72231005,\n 269.85957359, 269.92602658],\n [270.1308325 , 270.03278711, 269.93147354, ..., 270.19946427,\n 270.40427019, 270.72237301],\n [270.45656107, 270.21471578, 270.10904464, ..., 270.84329566,\n 271.05463794, 271.37056197],\n ...,\n [273.1440941 , 273.02535024, 272.86738823, ..., 274.12127981,\n 274.27488425, 274.4742432 ],\n [273.5765832 , 273.50795143, 273.58638774, ..., 274.72698243,\n 274.85553083, 275.00150951],\n [273.94697689, 274.12127981, 274.19971612, ..., 275.72704539,\n 275.64207272, 275.67148634]],\n\n [[269.74954488, 269.63951617, 269.51096777, ..., 268.99459539,\n 269.11442864, 269.11224985],\n [269.87046752, 269.76043881, 269.70596915, ..., 269.16345133,\n 269.50443141, 269.90097053],\n [269.85630541, 269.81381908, 269.78222667, ..., 269.61446012,\n 269.94672505, 270.34435357],\n ...,\n [273.29007279, 273.09833959, 273.18440165, ..., 273.11794867,\n 273.21381527, 273.48943175],\n [274.379466 , 274.08641922, 273.44258784, ..., 273.57331502,\n 273.64957255, 273.81298153],\n [273.89904359, 273.55370595, 272.90769578, ..., 274.27815243,\n 274.27488425, 274.25309638]]])
|
||
|
},
|
||
|
"execution_count": 41,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"np.asarray(nc_data['t2m'][:])"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"collapsed": false,
|
||
|
"ExecuteTime": {
|
||
|
"end_time": "2024-06-11T06:41:37.790393500Z",
|
||
|
"start_time": "2024-06-11T06:41:37.756209300Z"
|
||
|
}
|
||
|
},
|
||
|
"id": "d189f90ec07c8690"
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 42,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": "(8784, 13, 31)"
|
||
|
},
|
||
|
"execution_count": 42,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"np.asarray(nc_data['t2m'][:]).shape"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"collapsed": false,
|
||
|
"ExecuteTime": {
|
||
|
"end_time": "2024-06-11T06:41:37.862579700Z",
|
||
|
"start_time": "2024-06-11T06:41:37.787379500Z"
|
||
|
}
|
||
|
},
|
||
|
"id": "6a90a501e4374806"
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 43,
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"def find_closest_number_index(arr, target):\n",
|
||
|
" arr.sort() # 对数组进行排序\n",
|
||
|
" left, right = 0, len(arr) - 1\n",
|
||
|
" closest_index = 0\n",
|
||
|
"\n",
|
||
|
" while left <= right:\n",
|
||
|
" mid = left + (right - left) // 2\n",
|
||
|
" # 更新最接近数值的索引\n",
|
||
|
" if abs(arr[mid] - target) < abs(arr[closest_index] - target):\n",
|
||
|
" closest_index = mid\n",
|
||
|
" # 根据目标值与中间值的比较,决定搜索左半部分还是右半部分\n",
|
||
|
" if arr[mid] < target:\n",
|
||
|
" left = mid + 1\n",
|
||
|
" elif arr[mid] > target:\n",
|
||
|
" right = mid - 1\n",
|
||
|
" else:\n",
|
||
|
" return mid # 如果找到精确匹配,直接返回索引\n",
|
||
|
" return closest_index"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"collapsed": false,
|
||
|
"ExecuteTime": {
|
||
|
"end_time": "2024-06-11T06:41:37.870067500Z",
|
||
|
"start_time": "2024-06-11T06:41:37.820315500Z"
|
||
|
}
|
||
|
},
|
||
|
"id": "738d2236e3320758"
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 44,
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"target_cols = ['t2m']\n",
|
||
|
"times = np.asarray(nc_data['time'][:]).tolist()\n",
|
||
|
"times = [start_date + dt.timedelta(hours=x) for x in times]"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"collapsed": false,
|
||
|
"ExecuteTime": {
|
||
|
"end_time": "2024-06-11T06:41:37.870067500Z",
|
||
|
"start_time": "2024-06-11T06:41:37.834951500Z"
|
||
|
}
|
||
|
},
|
||
|
"id": "336150c5fe22b4db"
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 45,
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"time_index = pd.to_datetime(times * stations.shape[0])\n",
|
||
|
"stations['lat'] = stations['lat'].astype(float)\n",
|
||
|
"stations['lon'] = stations['lon'].astype(float)\n",
|
||
|
"stations_out = stations.copy()"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"collapsed": false,
|
||
|
"ExecuteTime": {
|
||
|
"end_time": "2024-06-11T06:41:37.871070500Z",
|
||
|
"start_time": "2024-06-11T06:41:37.848726Z"
|
||
|
}
|
||
|
},
|
||
|
"id": "dfc81e6752e2c51a"
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 46,
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"text/plain": " state lon lat lon_index lat_index best_lon best_lat\n0 iowa 93.5 41.1 30 2 -89.0 41.0",
|
||
|
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>state</th>\n <th>lon</th>\n <th>lat</th>\n <th>lon_index</th>\n <th>lat_index</th>\n <th>best_lon</th>\n <th>best_lat</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>iowa</td>\n <td>93.5</td>\n <td>41.1</td>\n <td>30</td>\n <td>2</td>\n <td>-89.0</td>\n <td>41.0</td>\n </tr>\n </tbody>\n</table>\n</div>"
|
||
|
},
|
||
|
"execution_count": 46,
|
||
|
"metadata": {},
|
||
|
"output_type": "execute_result"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"lon_index = list()\n",
|
||
|
"lat_index = list()\n",
|
||
|
"best_lons = list()\n",
|
||
|
"best_lats = list()\n",
|
||
|
"for i in range(stations.shape[0]):\n",
|
||
|
" # 获得观测点所在的经纬度\n",
|
||
|
" stat_lat = stations.iloc[i]['lat']\n",
|
||
|
" stat_lon = stations.iloc[i]['lon']\n",
|
||
|
" # 获得观测点所在经纬度对应最近的网格数据的经纬度\n",
|
||
|
" best_lat_index = find_closest_number_index(lat, stat_lat)\n",
|
||
|
" best_lon_index = find_closest_number_index(lon, stat_lon)\n",
|
||
|
" lat_index.append(best_lat_index)\n",
|
||
|
" lon_index.append(best_lon_index)\n",
|
||
|
" best_lons.append(lon[best_lon_index])\n",
|
||
|
" best_lats.append(lat[best_lat_index])\n",
|
||
|
"stations_out['lon_index'] = lon_index\n",
|
||
|
"stations_out['lat_index'] = lat_index\n",
|
||
|
"stations_out['best_lon'] = best_lons\n",
|
||
|
"stations_out['best_lat'] = best_lats\n",
|
||
|
"stations_out"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"collapsed": false,
|
||
|
"ExecuteTime": {
|
||
|
"end_time": "2024-06-11T06:41:37.880042100Z",
|
||
|
"start_time": "2024-06-11T06:41:37.866999300Z"
|
||
|
}
|
||
|
},
|
||
|
"id": "3eccbc89ab82bf16"
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 47,
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"for tgt in target_cols:\n",
|
||
|
" tmp = np.asarray(nc_data[tgt][:])\n",
|
||
|
" stations_out[tgt] = stations_out.apply(lambda x: tmp[:, len(lat) - x['lat_index'] - 1, x['lon_index']].squeeze(),axis=1)"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"collapsed": false,
|
||
|
"ExecuteTime": {
|
||
|
"end_time": "2024-06-11T06:41:37.953643400Z",
|
||
|
"start_time": "2024-06-11T06:41:37.882051700Z"
|
||
|
}
|
||
|
},
|
||
|
"id": "eb2d82717535c00b"
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 48,
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"result_df = stations_out.explode(column=target_cols).reset_index(drop=True)\n",
|
||
|
"result_df['date_time'] = time_index\n",
|
||
|
"result_df[['date_time', 't2m']].to_csv('./temper.csv', index=False, encoding='utf-8-sig')"
|
||
|
],
|
||
|
"metadata": {
|
||
|
"collapsed": false,
|
||
|
"ExecuteTime": {
|
||
|
"end_time": "2024-06-11T06:41:37.961281200Z",
|
||
|
"start_time": "2024-06-11T06:41:37.913630800Z"
|
||
|
}
|
||
|
},
|
||
|
"id": "f347ca967bfcafce"
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 2
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython2",
|
||
|
"version": "2.7.6"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 5
|
||
|
}
|