MAE_ATMO/论文绘图.ipynb at master

538 KiB

Raw Permalink Blame History

In [139]:

import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point, Polygon

In [140]:

ori_data = np.load('./np_data/20200220.npy')
data = ori_data[:,:,0].copy()
plt.imshow(data, cmap='RdYlGn_r')

Out[140]:

<matplotlib.image.AxesImage at 0x7f8391dc35b0>

No description has been provided for this image

In [ ]:

In [10]:

data = ori_data[:,:,0].copy()
plt.imshow(data)

Out[10]:

<matplotlib.image.AxesImage at 0x7fea807c26d0>

In [11]:

plt.clf()

<Figure size 640x480 with 0 Axes>

In [12]:

# 创建一个190x110的经纬度网格
lon_min, lat_min = 114.025, 33.525
lon_max, lat_max = 123.475, 38.975
lon_step = (lon_max - lon_min) / 190
lat_step = (lat_max - lat_min) / 110

In [13]:

lons = np.linspace(lon_min, lon_max, 190)
lats = np.linspace(lat_min, lat_max, 110)
lon_grid, lat_grid = np.meshgrid(lons, lats)

In [14]:

# 初始化一个空的GeoDataFrame
gdf = gpd.GeoDataFrame(columns=['geometry', 'value'])

# 将网格转换为多边形并添加到GeoDataFrame
for i in range(lat_grid.shape[0] - 1):
    for j in range(lat_grid.shape[1] - 1):
        polygon = Polygon([
            (lon_grid[i, j], lat_grid[i, j]),
            (lon_grid[i, j+1], lat_grid[i, j+1]),
            (lon_grid[i+1, j+1], lat_grid[i+1, j+1]),
            (lon_grid[i+1, j], lat_grid[i+1, j])
        ])
        # 使用concat而不是append
        gdf = pd.concat([gdf, gpd.GeoDataFrame({'geometry': [polygon], 'value': [data[i, j]]})], ignore_index=True)

In [15]:

# 读取世界地图
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

/tmp/ipykernel_7233/3979395723.py:2: FutureWarning: The geopandas.dataset module is deprecated and will be removed in GeoPandas 1.0. You can get the original 'naturalearth_lowres' data from https://www.naturalearthdata.com/downloads/110m-cultural-vectors/.
  world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

In [17]:

fig, ax = plt.subplots(1, 1, figsize=(10, 10))
world.plot(ax=ax, color='white', edgecolor='black')

# 绘制网格
gdf.plot(ax=ax, column='value', legend=False, cmap='RdYlGn_r')
# 设置地图范围
ax.set_xlim(lon_min, lon_max)
ax.set_ylim(lat_min, lat_max)
plt.savefig('./origin.png', bbox_inches='tight')
plt.show()

In [13]:

import numpy as np
import matplotlib.pyplot as plt

In [6]:

import os
show_list = os.listdir('./out_mat/96/train/')

In [23]:

val_list = [x for x in show_list if '20201106' in x]

In [ ]:

In [68]:

for i, p in enumerate(val_list):
    if i >= 10:
        break
    val_data = np.load(f'./out_mat/96/train/{p}')[:,:,0]
    plt.imshow(val_data, cmap='RdYlGn_r')
    plt.savefig(f'./figures/full/{i}.png', bbox_inches='tight')
    plt.clf()

<Figure size 640x480 with 0 Axes>

In [69]:

masks = [x for x in os.listdir('./out_mat/96/mask/30/')][:5]
masks

Out[69]:

['20200503-439.jpg',
 '20201212-1053.jpg',
 '20200416-1333.jpg',
 '20200505-626.jpg',
 '20200516-624.jpg']

In [70]:

new_miss = val_list[5:10]
new_miss

Out[70]:

['20201106-859.npy',
 '20201106-866.npy',
 '20201106-1088.npy',
 '20201106-1142.npy',
 '20201106-1238.npy']

In [71]:

import cv2

In [91]:

for img, msk in zip(new_miss, masks):
    img_np = np.load(f'./out_mat/96/train/{img}')[:,:,0]
    msk_np = cv2.cvtColor(cv2.imread(f'./out_mat/96/mask/30/{msk}'), cv2.COLOR_BGR2GRAY)
    msk_np_2 = msk_np.astype(float)
    msk_np_2[msk_np_2 == 0] = np.nan
    miss = img_np * msk_np_2
    plt.imshow(miss, cmap='RdYlGn_r')
    plt.savefig(f'./figures/miss/{img}.png', bbox_inches='tight')
    plt.clf()
    plt.imshow(msk_np_2, cmap='gray')
    plt.savefig(f'./figures/mask/{img}.png', bbox_inches='tight')
    plt.clf()

<Figure size 640x480 with 0 Axes>

In [25]:

import os
data_list = [x for x in os.listdir('./np_data/') if 'npy' in x]
dates = list()
miss_rate_list = list()
for path in data_list:
    dates.append(path.split('.')[0].strip())
    data = np.load(f"./np_data/{path}")[:,:,0]
    miss_rate = (np.isnan(data) * 1).sum() / (data.shape[0] * data.shape[1])
    miss_rate_list.append(miss_rate)

In [28]:

miss_df = pd.DataFrame([dates, miss_rate_list]).T
miss_df.columns = ['date', 'rate']
miss_df.head()

Out[28]:

	date	rate
0	20201116	0.14933
1	20200120	0.054163
2	20200724	0.124641
3	20200622	0.364641
4	20200711	0.896986

In [30]:

miss_df.date = pd.to_datetime(miss_df.date)
miss_df['month'] = miss_df.date.apply(lambda x: x.month)

In [33]:

miss_df.groupby('month')['rate'].mean()

Out[33]:

month
1     0.496714
2     0.445747
3     0.246096
4     0.232876
5     0.349770
6     0.427705
7     0.523877
8     0.510536
9     0.295989
10    0.393019
11    0.345657
12    0.313314
Name: rate, dtype: float64

In [34]:

import os

In [41]:

len(os.listdir('./out_mat/96/mask/50/'))

Out[41]:

In [69]:

with open('./POMINO_data/POMINO_v2.1_daily_20200220.txt', 'r', encoding='utf-8') as fr:
    d = fr.readlines()
    dd = [float(x.strip()) for x in d]
    vcd = np.zeros([160,280])
    ct = 0
    for j in range(280):
        for i in range(160):
            vcd[i,j] = dd[ct]
            ct += 1
            if i == 159:
                break

In [70]:

plt.imshow(vcd)

Out[70]:

<matplotlib.image.AxesImage at 0x7fea72102820>

In [106]:

img1.shape

Out[106]:

(110, 190)

In [129]:

img1 = np.load('./np_data/20200320.npy')[:,:,0]
data = img1[:96, -96:].copy()
plt.imshow(data, cmap='RdYlGn_r')
plt.xticks([])
plt.yticks([])
plt.savefig('./figures/fig4-ori_and_miss/a.png', bbox_inches='tight')

In [130]:

plt.imshow(np.isnan(data) * 1, cmap='gray')
plt.xticks([])
plt.yticks([])
plt.savefig('./figures/fig4-ori_and_miss/a-mask.png', bbox_inches='tight')

In [137]:

img2 = np.load('./np_data/20200621.npy')
data = img2[:,:,0][110-96:110, 20:116].copy()
plt.imshow(data, cmap='RdYlGn_r')
plt.xticks([])
plt.yticks([])
plt.savefig('./figures/fig4-ori_and_miss/b.png', bbox_inches='tight')

In [138]:

plt.imshow(np.isnan(data) * 1, cmap='gray')
plt.xticks([])
plt.yticks([])
plt.savefig('./figures/fig4-ori_and_miss/b-mask.png', bbox_inches='tight')

In [133]:

img3 = np.load('./np_data/20200922.npy')
data = img3[:,:,0][10:106, :96].copy()
plt.imshow(data, cmap='RdYlGn_r')
plt.xticks([])
plt.yticks([])
plt.savefig('./figures/fig4-ori_and_miss/c.png', bbox_inches='tight')

In [134]:

plt.imshow(np.isnan(data) * 1, cmap='gray')
plt.xticks([])
plt.yticks([])
plt.savefig('./figures/fig4-ori_and_miss/c-mask.png', bbox_inches='tight')

In [135]:

img4 = np.load('./np_data/20201221.npy')
data = img4[:,:,0][:96, -96:].copy()
plt.imshow(data, cmap='RdYlGn_r')
plt.xticks([])
plt.yticks([])
plt.savefig('./figures/fig4-ori_and_miss/d.png', bbox_inches='tight')

In [136]:

plt.imshow(np.isnan(data) * 1, cmap='gray')
plt.xticks([])
plt.yticks([])
plt.savefig('./figures/fig4-ori_and_miss/d-mask.png', bbox_inches='tight')

In [166]:

ori_data_16 = np.load('./np_data/20200220.npy')
data = ori_data_16[25:55,65:95,0].copy()
plt.imshow(data, cmap='RdYlGn_r')
plt.gca().axis('off')  # 获取当前坐标轴并关闭
plt.savefig('./figures/fig1/color.png', bbox_inches='tight')

In [167]:

plt.clf()
plt.imshow(data, cmap='gray')
plt.gca().axis('off')  # 获取当前坐标轴并关闭
plt.savefig('./figures/fig1/gray.png', bbox_inches='tight')

In [158]:

data.shape

Out[158]:

(30, 30)

In [164]:

pd.DataFrame(data.astype(int)).to_csv('./numeric.csv', index=False)

In [161]:

for i in range(data.shape[0]):
    for j in range(data.shape[1]):
        print(int(data[i][j]), end=' ')
    print()

8 7 7 7 7 7 6 6 8 9 9 10 10 10 10 9 10 9 9 9 8 8 9 9 8 9 9 9 9 10 
8 8 7 7 7 7 7 7 8 9 9 10 10 10 9 9 9 10 10 9 9 9 9 10 9 9 10 10 10 10 
8 8 7 8 8 8 8 8 8 8 9 9 10 10 9 9 9 9 10 9 9 9 10 10 10 10 10 11 12 13 
9 9 10 9 8 9 9 9 9 9 9 9 9 10 9 9 9 9 8 9 9 9 10 10 10 11 12 14 15 16 
10 9 9 10 9 10 11 11 10 11 12 10 11 11 11 10 9 9 8 9 9 9 10 12 12 14 15 17 18 20 
8 8 8 10 10 11 12 11 11 13 12 10 11 12 12 11 10 9 8 9 9 10 12 15 16 17 19 22 25 24 
8 8 8 9 9 10 11 11 11 13 11 11 11 13 14 13 12 11 10 10 10 12 15 16 18 19 20 21 21 20 
7 8 9 9 9 9 10 10 11 10 11 12 16 17 18 19 16 15 13 12 14 15 15 15 16 16 17 18 18 16 
7 8 8 9 9 10 10 10 10 11 12 13 16 19 22 25 25 22 16 18 19 18 15 15 15 15 15 15 15 14 
8 8 7 8 9 10 12 11 10 11 13 13 15 21 22 24 25 23 19 21 22 19 16 15 14 13 13 12 12 12 
10 9 7 8 8 9 10 10 10 11 11 12 15 19 21 22 24 23 22 20 18 16 14 13 12 12 11 11 12 13 
9 8 8 9 8 9 10 11 11 12 12 14 17 19 21 20 22 23 23 19 16 15 14 12 12 12 12 11 12 12 
10 10 10 10 11 11 11 12 13 14 17 21 24 25 22 22 24 26 25 20 18 19 15 13 12 12 13 13 13 13 
10 10 10 12 12 13 13 14 15 17 21 25 28 28 26 25 28 32 27 20 15 17 17 16 15 13 13 13 13 13 
10 9 9 13 14 15 15 16 18 22 25 28 31 32 32 28 27 24 20 17 15 16 18 18 17 15 14 14 13 13 
10 10 12 14 16 17 17 23 27 30 29 30 33 37 35 30 27 23 18 15 15 16 17 17 17 18 17 17 14 13 
10 11 10 12 15 18 25 33 43 41 36 36 36 37 36 35 29 22 19 17 15 15 16 17 18 19 19 18 16 14 
12 13 11 13 15 20 30 39 47 46 39 30 28 30 34 39 31 25 21 18 17 17 17 17 17 17 17 15 16 14 
15 14 14 14 16 20 26 38 36 31 26 22 20 21 30 40 36 27 21 20 18 17 17 16 16 15 13 13 13 13 
16 15 15 15 16 17 17 18 19 20 17 15 13 16 24 28 30 22 18 17 15 15 15 15 15 14 14 14 14 13 
18 17 16 16 16 14 12 10 10 13 11 10 10 12 15 18 18 16 13 13 12 11 12 13 15 14 14 15 17 16 
18 16 17 16 15 10 8 8 8 9 10 9 10 11 12 12 11 10 8 10 9 8 8 11 13 14 14 16 17 16 
15 12 12 11 9 7 6 7 7 6 7 9 10 9 9 8 7 6 6 6 6 6 7 7 9 12 14 14 14 13 
9 7 5 6 6 6 6 6 6 5 6 8 10 8 6 5 4 5 5 4 5 6 6 7 7 10 11 11 10 7 
4 3 4 4 5 6 7 7 8 7 8 9 11 8 5 5 4 4 4 4 5 6 6 6 7 9 9 6 6 4 
4 2 4 5 6 8 10 8 9 9 10 12 11 9 5 4 4 4 3 4 5 5 6 6 6 7 7 5 4 4 
3 3 3 5 7 11 15 11 11 11 11 11 12 8 5 4 3 3 3 4 5 5 6 5 5 5 6 6 5 5 
4 4 5 7 9 13 14 14 14 11 10 11 15 9 6 4 3 3 4 4 4 4 5 6 6 5 5 7 6 6 
4 4 6 8 9 14 16 18 14 10 10 15 17 12 6 4 3 4 3 4 4 3 5 5 5 4 4 6 7 6 
4 4 5 5 9 11 13 19 14 10 14 17 15 9 5 4 3 5 5 4 4 4 4 5 5 4 3 4 5 5

In [168]:

Out[168]:

	train	validation	test
0	9624	1500	1743
1	6534	1117	1150
2	5380	840	956
3	5211	818	890

In [169]:

In [ ]:

538 KiB Raw Permalink Blame History

538 KiB

Raw Permalink Blame History