30 KiB
30 KiB
In [1]:
import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import DataLoader, Dataset, random_split import numpy as np import pandas as pd import os from PIL import Image MAX_VALUE = 107.49169921875
In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') device
Out[2]:
device(type='cuda')
In [3]:
class GrayScaleDataset(Dataset): def __init__(self, data_dir): self.data_dir = data_dir self.file_list = [x for x in os.listdir(data_dir) if x.endswith('npy')] def __len__(self): return len(self.file_list) def __getitem__(self, idx): file_path = os.path.join(self.data_dir, self.file_list[idx]) data = np.load(file_path)[:,:,0] / MAX_VALUE return torch.tensor(data, dtype=torch.float32).unsqueeze(0)
In [4]:
class NO2Dataset(Dataset): def __init__(self, image_dir, mask_dir): self.image_dir = image_dir self.mask_dir = mask_dir self.image_filenames = [f for f in os.listdir(image_dir) if f.endswith('.npy')] # 仅加载 .npy 文件 self.mask_filenames = [f for f in os.listdir(mask_dir) if f.endswith('.jpg')] # 仅加载 .jpg 文件 def __len__(self): return len(self.image_filenames) def __getitem__(self, idx): image_path = os.path.join(self.image_dir, self.image_filenames[idx]) mask_idx = idx % len(self.mask_filenames) mask_path = os.path.join(self.mask_dir, self.mask_filenames[mask_idx]) # 加载图像数据 (.npy 文件) image = np.load(image_path).astype(np.float32)[:,:,:1] / MAX_VALUE # 形状为 (96, 96, 1) # 加载掩码数据 (.jpg 文件) mask = np.array(Image.open(mask_path).convert('L')).astype(np.float32) # 将掩码数据中非0值设为1,0值保持不变 mask = np.where(mask != 0, 1.0, 0.0) # 保持掩码数据形状为 (96, 96, 1) mask = mask[:, :, np.newaxis] # 将形状调整为 (96, 96, 1) # 应用掩码 masked_image = image.copy() masked_image[:, :, 0] = image[:, :, 0] * mask.squeeze() # 遮盖NO2数据 # cGAN的输入和目标 X = masked_image[:, :, :1] # 形状为 (96, 96, 8) y = image[:, :, 0:1] # 目标输出为NO2数据,形状为 (96, 96, 1) # 转换形状为 (channels, height, width) X = np.transpose(X, (2, 0, 1)) # 转换为 (1, 96, 96) y = np.transpose(y, (2, 0, 1)) # 转换为 (1, 96, 96) mask = np.transpose(mask, (2, 0, 1)) # 转换为 (1, 96, 96) return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.float32), torch.tensor(mask, dtype=torch.float32)
In [5]:
class PatchMasking: def __init__(self, patch_size, mask_ratio): self.patch_size = patch_size self.mask_ratio = mask_ratio def __call__(self, x): batch_size, C, H, W = x.shape num_patches = (H // self.patch_size) * (W // self.patch_size) num_masked = int(num_patches * self.mask_ratio) # 为每个样本生成独立的mask masks = [] for _ in range(batch_size): mask = torch.zeros(num_patches, dtype=torch.bool, device=x.device) mask[:num_masked] = 1 mask = mask[torch.randperm(num_patches)] mask = mask.view(H // self.patch_size, W // self.patch_size) mask = mask.repeat_interleave(self.patch_size, dim=0).repeat_interleave(self.patch_size, dim=1) masks.append(mask) # 将所有mask堆叠成一个批量张量 masks = torch.stack(masks, dim=0) masks = torch.unsqueeze(masks, dim=1) # 应用mask到输入x上 masked_x = x * (1 - masks.float()) return masked_x, masks
In [12]:
class Mlp(nn.Module): def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.ReLU6, drop=0.): super().__init__() out_features = out_features or in_features hidden_features = hidden_features or in_features self.fc1 = nn.Conv2d(in_features, hidden_features, 1, 1, 0, bias=True) self.act = act_layer() self.fc2 = nn.Conv2d(hidden_features, out_features, 1, 1, 0, bias=True) self.drop = nn.Dropout(drop, inplace=True) def forward(self, x): x = self.fc1(x) x = self.act(x) x = self.drop(x) x = self.fc2(x) x = self.drop(x) return x
In [6]:
class ViTEncoder(nn.Module): def __init__(self, img_size=96, patch_size=8, dim=128, depth=4, heads=4, mlp_dim=256): super(ViTEncoder, self).__init__() self.patch_size = patch_size self.dim = dim self.patch_embedding = nn.Conv2d(1, dim, kernel_size=patch_size, stride=patch_size) # 定义 Transformer 编码器层 encoder_layer = nn.TransformerEncoderLayer(d_model=dim, nhead=heads, dim_feedforward=mlp_dim) self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=depth) def forward(self, x): x = self.patch_embedding(x) x = x.flatten(2).transpose(1, 2) # 形状变为 (batch_size, num_patches, dim) x = self.transformer_encoder(x) return x class ConvDecoder(nn.Module): def __init__(self, dim=128, patch_size=8, img_size=96): super(ConvDecoder, self).__init__() self.dim = dim self.patch_size = patch_size self.img_size = img_size self.decoder = nn.Sequential( nn.ConvTranspose2d(dim, 128, kernel_size=patch_size, stride=patch_size), nn.ReLU(), nn.ConvTranspose2d(128, 1, kernel_size=3, stride=1, padding=1) ) def forward(self, x): # x = x.transpose(1, 2).view(-1, self.dim, self.img_size // self.patch_size, self.img_size // self.patch_size) x = self.decoder(x) return x class MAEModel(nn.Module): def __init__(self, encoder, decoder): super(MAEModel, self).__init__() self.encoder = encoder self.decoder = decoder def forward(self, x): encoded = self.encoder(x) decoded = self.decoder(encoded) return decoded
In [7]:
def masked_mse_loss(preds, target, mask): loss = (preds - target) ** 2 loss = loss.mean(dim=-1) # 对每个像素点求平均 loss = (loss * mask).sum() / mask.sum() # 只计算被mask的像素点的损失 return loss
In [8]:
def train_model(model, train_loader, val_loader, epochs, criterion, optimizer, device): model.to(device) for epoch in range(epochs): model.train() train_loss = 0 for data in train_loader: data = data.to(device) optimizer.zero_grad() masked_data, mask = PatchMasking(patch_size=16, mask_ratio=0.2)(data) output = model(masked_data) loss = masked_mse_loss(output, data, mask) loss.backward() optimizer.step() train_loss += loss.item() train_loss /= len(train_loader) model.eval() val_loss = 0 with torch.no_grad(): for data in val_loader: data = data.to(device) masked_data, mask = PatchMasking(patch_size=16, mask_ratio=0.2)(data) output = model(masked_data) loss = masked_mse_loss(output, data, mask) val_loss += loss.item() val_loss /= len(val_loader) print(f'Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
In [9]:
train_dir = './out_mat/96/train/' train_dataset = GrayScaleDataset(train_dir) val_dir = './out_mat/96/valid/' val_dataset = GrayScaleDataset(val_dir) train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
In [13]:
encoder = ViTEncoder() decoder = ConvDecoder() model = MAEModel(encoder, decoder) criterion = nn.MSELoss() optimizer = optim.Adam(model.parameters(), lr=0.001)
In [19]:
a.transpose(1, 2).reshape(-1, 128, 6, 6).shape
Out[19]:
torch.Size([128, 128, 6, 6])
In [15]:
for i in train_loader: a = encoder(i) b = model.mlp(a) c = decoder(b) break
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) Cell In[15], line 3 1 for i in train_loader: 2 a = encoder(i) ----> 3 b = model.mlp(a) 4 c = decoder(b) 5 break File ~/miniconda3/envs/python38/lib/python3.8/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs) 1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1510 else: -> 1511 return self._call_impl(*args, **kwargs) File ~/miniconda3/envs/python38/lib/python3.8/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs) 1515 # If we don't have any hooks, we want to skip the rest of the logic in 1516 # this function, and just call forward. 1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1518 or _global_backward_pre_hooks or _global_backward_hooks 1519 or _global_forward_hooks or _global_forward_pre_hooks): -> 1520 return forward_call(*args, **kwargs) 1522 try: 1523 result = None Cell In[12], line 13, in Mlp.forward(self, x) 12 def forward(self, x): ---> 13 x = self.fc1(x) 14 x = self.act(x) 15 x = self.drop(x) File ~/miniconda3/envs/python38/lib/python3.8/site-packages/torch/nn/modules/module.py:1511, in Module._wrapped_call_impl(self, *args, **kwargs) 1509 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc] 1510 else: -> 1511 return self._call_impl(*args, **kwargs) File ~/miniconda3/envs/python38/lib/python3.8/site-packages/torch/nn/modules/module.py:1520, in Module._call_impl(self, *args, **kwargs) 1515 # If we don't have any hooks, we want to skip the rest of the logic in 1516 # this function, and just call forward. 1517 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks 1518 or _global_backward_pre_hooks or _global_backward_hooks 1519 or _global_forward_hooks or _global_forward_pre_hooks): -> 1520 return forward_call(*args, **kwargs) 1522 try: 1523 result = None File ~/miniconda3/envs/python38/lib/python3.8/site-packages/torch/nn/modules/conv.py:460, in Conv2d.forward(self, input) 459 def forward(self, input: Tensor) -> Tensor: --> 460 return self._conv_forward(input, self.weight, self.bias) File ~/miniconda3/envs/python38/lib/python3.8/site-packages/torch/nn/modules/conv.py:456, in Conv2d._conv_forward(self, input, weight, bias) 452 if self.padding_mode != 'zeros': 453 return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode), 454 weight, bias, self.stride, 455 _pair(0), self.dilation, self.groups) --> 456 return F.conv2d(input, weight, bias, self.stride, 457 self.padding, self.dilation, self.groups) RuntimeError: Given groups=1, weight of size [256, 128, 1, 1], expected input[1, 32, 144, 128] to have 128 channels, but got 32 channels instead
In [ ]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') train_model(model, train_loader, val_loader, epochs=100, criterion=criterion, optimizer=optimizer, device=device)
In [19]:
test_set = NO2Dataset('./out_mat/96/test/', './out_mat/96/mask/20/') test_loader = DataLoader(test_set, batch_size=32, shuffle=False, num_workers=4)
In [20]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error, mean_absolute_error
In [21]:
def cal_ioa(y_true, y_pred): # 计算平均值 mean_observed = np.mean(y_true) mean_predicted = np.mean(y_pred) # 计算IoA numerator = np.sum((y_true - y_pred) ** 2) denominator = 2 * np.sum((np.abs(y_true - mean_observed) + np.abs(y_pred - mean_predicted)) ** 2) IoA = 1 - (numerator / denominator) return IoA
In [22]:
eva_list = list() device = 'cpu' model = model.to(device) with torch.no_grad(): for batch_idx, (X, y, mask) in enumerate(test_loader): X, y, mask = X.to(device), y.to(device), mask.to(device) mask_rev = (torch.squeeze(mask, dim=1)==0) * 1 # mask取反获得修复区域 reconstructed = model(X) rev_data = y * MAX_VALUE rev_recon = reconstructed * MAX_VALUE # todo: 这里需要只评估修补出来的模块 data_label = torch.squeeze(rev_data, dim=1) * mask_rev data_label = data_label[mask_rev==1] recon_no2 = torch.squeeze(rev_recon, dim=1) * mask_rev recon_no2 = recon_no2[mask_rev==1] mae = mean_absolute_error(data_label, recon_no2) rmse = np.sqrt(mean_squared_error(data_label, recon_no2)) mape = mean_absolute_percentage_error(data_label, recon_no2) r2 = r2_score(data_label, recon_no2) ioa = cal_ioa(data_label.detach().numpy(), recon_no2.detach().numpy()) eva_list.append([mae, rmse, mape, r2, ioa])
In [23]:
pd.DataFrame(eva_list, columns=['mae', 'rmse', 'mape', 'r2', 'ioa']).describe()
Out[23]:
mae | rmse | mape | r2 | ioa | |
---|---|---|---|---|---|
count | 149.000000 | 149.000000 | 149.000000 | 149.000000 | 149.000000 |
mean | 7.068207 | 9.016465 | 0.814727 | -0.952793 | 0.564749 |
std | 0.659118 | 0.774556 | 0.054147 | 0.162851 | 0.033048 |
min | 5.609327 | 7.113544 | 0.599120 | -1.402735 | 0.461420 |
25% | 6.613351 | 8.499699 | 0.782008 | -1.049951 | 0.544980 |
50% | 7.086443 | 9.045812 | 0.811261 | -0.938765 | 0.567080 |
75% | 7.495309 | 9.530408 | 0.848900 | -0.849266 | 0.586134 |
max | 8.663801 | 10.995004 | 0.984343 | -0.591799 | 0.630479 |
In [ ]: