building-agents/read.ipynb

404 lines
15 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 34,
"id": "initial_id",
"metadata": {
"collapsed": true,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.671133500Z",
"start_time": "2024-06-11T06:41:37.652982600Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['./data/iowa.nc']\n"
]
}
],
"source": [
"import numpy as np\n",
"import netCDF4 as nc\n",
"import os\n",
"import pandas as pd\n",
"import datetime as dt\n",
"nc_path = \"./data/\"\n",
"nc_files = [f\"{nc_path}{x}\" for x in os.listdir(nc_path) if x.endswith('.nc')]\n",
"print(nc_files)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"outputs": [],
"source": [
"stations = pd.read_csv('./data/station.csv')\n",
"stations.head()\n",
"stations.dropna(inplace=True)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.722796200Z",
"start_time": "2024-06-11T06:41:37.663028100Z"
}
},
"id": "9c4ed0ec8dc7ff7d"
},
{
"cell_type": "code",
"execution_count": 36,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['longitude', 'latitude', 'time', 't2m']\n"
]
}
],
"source": [
"nc_data = nc.Dataset(nc_files[0]) # 读文件\n",
"print(list(nc_data.variables.keys())) # keys"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.730197500Z",
"start_time": "2024-06-11T06:41:37.678192900Z"
}
},
"id": "a88ddc29d8fa3d06"
},
{
"cell_type": "code",
"execution_count": 37,
"outputs": [
{
"data": {
"text/plain": "(13, 31)"
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lat = np.asarray(nc_data['latitude'][:]).tolist()\n",
"lon = np.asarray(nc_data['longitude'][:]).tolist()\n",
"len(lat), len(lon)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.730197500Z",
"start_time": "2024-06-11T06:41:37.693749900Z"
}
},
"id": "4ec1510ef5baa4b9"
},
{
"cell_type": "code",
"execution_count": 38,
"outputs": [
{
"data": {
"text/plain": "array([1016832, 1016833, 1016834, ..., 1025613, 1025614, 1025615])"
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.asarray(nc_data['time'][:])"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.731222200Z",
"start_time": "2024-06-11T06:41:37.710255800Z"
}
},
"id": "9e1cfd59ea63dee"
},
{
"cell_type": "code",
"execution_count": 39,
"outputs": [
{
"data": {
"text/plain": "<class 'netCDF4._netCDF4.Variable'>\nint32 time(time)\n units: hours since 1900-01-01 00:00:00.0\n long_name: time\n calendar: gregorian\nunlimited dimensions: \ncurrent shape = (8784,)\nfilling on, default _FillValue of -2147483647 used"
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nc_data['time']"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.739215300Z",
"start_time": "2024-06-11T06:41:37.723796300Z"
}
},
"id": "305b595052773792"
},
{
"cell_type": "code",
"execution_count": 40,
"outputs": [
{
"data": {
"text/plain": "datetime.datetime(1900, 1, 1, 0, 0)"
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"start_date = dt.datetime(1900, 1, 1)\n",
"start_date"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.788384900Z",
"start_time": "2024-06-11T06:41:37.739215300Z"
}
},
"id": "9dc1b63dbd6e5847"
},
{
"cell_type": "code",
"execution_count": 41,
"outputs": [
{
"data": {
"text/plain": "array([[[267.10449818, 266.56524854, 265.81574602, ..., 266.35826383,\n 266.94435738, 267.10231939],\n [266.75916053, 266.21773211, 265.4017766 , ..., 266.49334859,\n 267.59908269, 267.86380524],\n [266.70360148, 265.86258993, 265.06733289, ..., 266.6197182 ,\n 267.66117811, 267.95640366],\n ...,\n [270.23759303, 270.20055367, 269.91731143, ..., 270.11231282,\n 269.99683714, 269.90750689],\n [271.34223774, 271.32153927, 271.25726507, ..., 270.55569585,\n 270.37376718, 270.24086121],\n [271.76274352, 271.85643134, 271.79215714, ..., 271.32371806,\n 270.86617291, 270.71583665]],\n\n [[267.45746158, 267.17966631, 266.73846206, ..., 266.19376546,\n 266.66983029, 266.83541806],\n [267.5337191 , 267.25374505, 266.67418786, ..., 266.32231386,\n 267.30821471, 267.57620544],\n [267.67425083, 267.02933005, 266.40183956, ..., 266.40183956,\n 267.33218136, 267.62522813],\n ...,\n [269.75172367, 269.78549485, 269.62317527, ..., 270.06873709,\n 269.95652959, 269.69289643],\n [270.76159117, 270.80080932, 270.75832299, ..., 270.33237024,\n 270.18748095, 269.91186447],\n [271.10692881, 271.19516966, 271.15268333, ..., 270.63848973,\n 270.47290197, 270.30513541]],\n\n [[267.29841017, 267.25483444, 267.06745881, ..., 265.76345515,\n 266.3157775 , 266.53692432],\n [267.55441757, 267.51084184, 267.31801925, ..., 266.01728376,\n 266.99338007, 267.2613708 ],\n [267.77338561, 267.59363573, 267.37684648, ..., 266.09898825,\n 267.00318461, 267.28860563],\n ...,\n [269.32359214, 269.41945874, 269.29091034, ..., 270.08507799,\n 269.90750689, 269.55781167],\n [270.24957636, 270.34871114, 270.35088993, ..., 270.20600063,\n 270.01317803, 269.67110857],\n [270.62105944, 270.76703813, 270.76921692, ..., 270.43695199,\n 270.18857034, 270.01099925]],\n\n ...,\n\n [[271.48168007, 271.36729379, 271.24637114, ..., 270.89776532,\n 270.98164859, 270.96857587],\n [271.81830257, 271.70282689, 271.56447396, ..., 271.43701495,\n 271.53723913, 271.69084357],\n [272.18107051, 271.92179493, 271.77690563, ..., 272.04271757,\n 272.15165689, 272.3107083 ],\n ...,\n [275.28910932, 274.99388376, 274.74441272, ..., 276.07565121,\n 276.18350114, 276.35344648],\n [275.60285456, 275.36536684, 275.34575777, ..., 276.91993095,\n 276.99618847, 277.08551871],\n [275.90788466, 275.95254978, 275.9329407 , ..., 278.0169499 ,\n 277.9058318 , 277.90365301]],\n\n [[269.88462964, 269.78658425, 269.64496313, ..., 269.72231005,\n 269.85957359, 269.92602658],\n [270.1308325 , 270.03278711, 269.93147354, ..., 270.19946427,\n 270.40427019, 270.72237301],\n [270.45656107, 270.21471578, 270.10904464, ..., 270.84329566,\n 271.05463794, 271.37056197],\n ...,\n [273.1440941 , 273.02535024, 272.86738823, ..., 274.12127981,\n 274.27488425, 274.4742432 ],\n [273.5765832 , 273.50795143, 273.58638774, ..., 274.72698243,\n 274.85553083, 275.00150951],\n [273.94697689, 274.12127981, 274.19971612, ..., 275.72704539,\n 275.64207272, 275.67148634]],\n\n [[269.74954488, 269.63951617, 269.51096777, ..., 268.99459539,\n 269.11442864, 269.11224985],\n [269.87046752, 269.76043881, 269.70596915, ..., 269.16345133,\n 269.50443141, 269.90097053],\n [269.85630541, 269.81381908, 269.78222667, ..., 269.61446012,\n 269.94672505, 270.34435357],\n ...,\n [273.29007279, 273.09833959, 273.18440165, ..., 273.11794867,\n 273.21381527, 273.48943175],\n [274.379466 , 274.08641922, 273.44258784, ..., 273.57331502,\n 273.64957255, 273.81298153],\n [273.89904359, 273.55370595, 272.90769578, ..., 274.27815243,\n 274.27488425, 274.25309638]]])"
},
"execution_count": 41,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.asarray(nc_data['t2m'][:])"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.790393500Z",
"start_time": "2024-06-11T06:41:37.756209300Z"
}
},
"id": "d189f90ec07c8690"
},
{
"cell_type": "code",
"execution_count": 42,
"outputs": [
{
"data": {
"text/plain": "(8784, 13, 31)"
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.asarray(nc_data['t2m'][:]).shape"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.862579700Z",
"start_time": "2024-06-11T06:41:37.787379500Z"
}
},
"id": "6a90a501e4374806"
},
{
"cell_type": "code",
"execution_count": 43,
"outputs": [],
"source": [
"def find_closest_number_index(arr, target):\n",
" arr.sort() # 对数组进行排序\n",
" left, right = 0, len(arr) - 1\n",
" closest_index = 0\n",
"\n",
" while left <= right:\n",
" mid = left + (right - left) // 2\n",
" # 更新最接近数值的索引\n",
" if abs(arr[mid] - target) < abs(arr[closest_index] - target):\n",
" closest_index = mid\n",
" # 根据目标值与中间值的比较,决定搜索左半部分还是右半部分\n",
" if arr[mid] < target:\n",
" left = mid + 1\n",
" elif arr[mid] > target:\n",
" right = mid - 1\n",
" else:\n",
" return mid # 如果找到精确匹配,直接返回索引\n",
" return closest_index"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.870067500Z",
"start_time": "2024-06-11T06:41:37.820315500Z"
}
},
"id": "738d2236e3320758"
},
{
"cell_type": "code",
"execution_count": 44,
"outputs": [],
"source": [
"target_cols = ['t2m']\n",
"times = np.asarray(nc_data['time'][:]).tolist()\n",
"times = [start_date + dt.timedelta(hours=x) for x in times]"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.870067500Z",
"start_time": "2024-06-11T06:41:37.834951500Z"
}
},
"id": "336150c5fe22b4db"
},
{
"cell_type": "code",
"execution_count": 45,
"outputs": [],
"source": [
"time_index = pd.to_datetime(times * stations.shape[0])\n",
"stations['lat'] = stations['lat'].astype(float)\n",
"stations['lon'] = stations['lon'].astype(float)\n",
"stations_out = stations.copy()"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.871070500Z",
"start_time": "2024-06-11T06:41:37.848726Z"
}
},
"id": "dfc81e6752e2c51a"
},
{
"cell_type": "code",
"execution_count": 46,
"outputs": [
{
"data": {
"text/plain": " state lon lat lon_index lat_index best_lon best_lat\n0 iowa 93.5 41.1 30 2 -89.0 41.0",
"text/html": "<div>\n<style scoped>\n .dataframe tbody tr th:only-of-type {\n vertical-align: middle;\n }\n\n .dataframe tbody tr th {\n vertical-align: top;\n }\n\n .dataframe thead th {\n text-align: right;\n }\n</style>\n<table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: right;\">\n <th></th>\n <th>state</th>\n <th>lon</th>\n <th>lat</th>\n <th>lon_index</th>\n <th>lat_index</th>\n <th>best_lon</th>\n <th>best_lat</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>iowa</td>\n <td>93.5</td>\n <td>41.1</td>\n <td>30</td>\n <td>2</td>\n <td>-89.0</td>\n <td>41.0</td>\n </tr>\n </tbody>\n</table>\n</div>"
},
"execution_count": 46,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lon_index = list()\n",
"lat_index = list()\n",
"best_lons = list()\n",
"best_lats = list()\n",
"for i in range(stations.shape[0]):\n",
" # 获得观测点所在的经纬度\n",
" stat_lat = stations.iloc[i]['lat']\n",
" stat_lon = stations.iloc[i]['lon']\n",
" # 获得观测点所在经纬度对应最近的网格数据的经纬度\n",
" best_lat_index = find_closest_number_index(lat, stat_lat)\n",
" best_lon_index = find_closest_number_index(lon, stat_lon)\n",
" lat_index.append(best_lat_index)\n",
" lon_index.append(best_lon_index)\n",
" best_lons.append(lon[best_lon_index])\n",
" best_lats.append(lat[best_lat_index])\n",
"stations_out['lon_index'] = lon_index\n",
"stations_out['lat_index'] = lat_index\n",
"stations_out['best_lon'] = best_lons\n",
"stations_out['best_lat'] = best_lats\n",
"stations_out"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.880042100Z",
"start_time": "2024-06-11T06:41:37.866999300Z"
}
},
"id": "3eccbc89ab82bf16"
},
{
"cell_type": "code",
"execution_count": 47,
"outputs": [],
"source": [
"for tgt in target_cols:\n",
" tmp = np.asarray(nc_data[tgt][:])\n",
" stations_out[tgt] = stations_out.apply(lambda x: tmp[:, len(lat) - x['lat_index'] - 1, x['lon_index']].squeeze(),axis=1)"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.953643400Z",
"start_time": "2024-06-11T06:41:37.882051700Z"
}
},
"id": "eb2d82717535c00b"
},
{
"cell_type": "code",
"execution_count": 48,
"outputs": [],
"source": [
"result_df = stations_out.explode(column=target_cols).reset_index(drop=True)\n",
"result_df['date_time'] = time_index\n",
"result_df[['date_time', 't2m']].to_csv('./temper.csv', index=False, encoding='utf-8-sig')"
],
"metadata": {
"collapsed": false,
"ExecuteTime": {
"end_time": "2024-06-11T06:41:37.961281200Z",
"start_time": "2024-06-11T06:41:37.913630800Z"
}
},
"id": "f347ca967bfcafce"
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}