21 KiB
21 KiB
In [1]:
import numpy as np
In [2]:
import os
In [3]:
npy_list = os.listdir('./np_data/')
In [4]:
len(npy_list)
Out[4]:
361
In [5]:
len(os.listdir('./out_mat/96/'))
Out[5]:
5
In [6]:
def sliding_window(matrix, window_size): rows = len(matrix) - window_size + 1 cols = len(matrix[0]) - window_size + 1 for i in range(rows): for j in range(cols): sub_matrix = matrix[i : i+window_size, j : j+window_size, :-3] yield sub_matrix
In [7]:
window_size = 96
In [8]:
data = np.load(f"./np_data/{npy_list[0]}")
In [9]:
data.shape
Out[9]:
(110, 190, 11)
In [10]:
data[0][0]
Out[10]:
array([ nan, 2.90520200e+02, 9.77973000e+01, 2.80806000e+02, 4.36411383e+05, -1.35540000e+00, 2.04530000e+00, nan, 6.93860000e+00, 0.00000000e+00, 0.00000000e+00])
In [11]:
num_samples = len(npy_list) valid_list = np.random.choice(npy_list, size=int(num_samples * 0.2), replace=False) train_list = [x for x in npy_list if x not in valid_list] test_list = np.random.choice(valid_list, size=int(num_samples * 0.1), replace=False) val_list = [x for x in valid_list if x not in test_list] for file in npy_list: data = np.load(f"./np_data/{file}") file_id = file.split('.')[0] for ind, mat in enumerate(sliding_window(data, window_size)): if (np.isnan(mat) * 1).sum() != 0: continue else: if file in train_list: np.save(f'./out_mat/{window_size}/train/{file_id}-{ind}.npy', mat) elif file in val_list: np.save(f'./out_mat/{window_size}/test/{file_id}-{ind}.npy', mat) else: np.save(f'./out_mat/{window_size}/valid/{file_id}-{ind}.npy', mat)
In [12]:
import matplotlib.pyplot as plt
筛选mask
In [13]:
import cv2
In [14]:
mask_list = {} for file in npy_list: data = np.load(f"./np_data/{file}") file_id = file.split('.')[0] count = 0 for ind, mat in enumerate(sliding_window(data, window_size)): cur_no2 = np.isnan(mat[:,:,0]) na_sums = (cur_no2 * 1).sum() miss_rate = round(na_sums / (window_size**2), 2) * 100 if (miss_rate % 10 == 0) and miss_rate > 0: fold_path = str(int(miss_rate)) if not os.path.exists(f"./out_mat/96/mask/{fold_path}"): os.mkdir(f"./out_mat/96/mask/{fold_path}") if fold_path not in mask_list: mask_list[fold_path] = 1 else: mask_list[fold_path] += 1 msk = 1 - (cur_no2 * 1) # cv2.imwrite(f'./out_mat/96/mask/{fold_path}/{file_id}-{ind}.jpg', msk)
In [15]:
dd = cur_no2 * 1 dd.max()
Out[15]:
1
In [16]:
dd.min()
Out[16]:
0
In [17]:
(1 - dd).max()
Out[17]:
1
In [19]:
d = plt.imread("./out_mat/96/mask/70/20200110-1145.jpg") plt.imshow(d, cmap='gray')
Out[19]:
<matplotlib.image.AxesImage at 0x7fa6680b2370>
In [20]:
np.argwhere(d==2)
Out[20]:
array([[ 7, 3], [ 7, 4], [ 7, 5], [33, 47], [56, 48], [56, 49], [64, 15], [71, 3], [71, 4]])
In [21]:
d.max()
Out[21]:
2
In [22]:
mask_list
Out[22]:
{'10': 7033, '20': 4791, '40': 3699, '30': 3849, '50': 4245, '90': 2494, '80': 2549, '60': 3831, '70': 3144, '100': 17936}
In [25]:
mask_list
Out[25]:
{'10': 7033, '20': 4791, '40': 3699, '30': 3849, '50': 4245, '90': 2494, '80': 2549, '60': 3831, '70': 3144, '100': 17936}
In [ ]:
mask_list
In [ ]:
plt.imshow('2', mat[:,:,0])
In [27]:
(np.isnan(mat[:,:,0]) * 1).sum()
Out[27]:
4679
In [ ]: