import colorsys import copy import time import cv2 import numpy as np import torch import torch.nn.functional as F from PIL import Image from torch import nn import os from dimaoshibie.nets.segformer import SegFormer from dimaoshibie.utils.utils import cvtColor, preprocess_input, resize_image, show_config file_name = __file__ path = os.path.dirname(file_name) class SegFormer_Segmentation(object): # 默认配置参数 _defaults = { "model_path": path + "/logs/best_epoch_weights_voc_12000.pth", # 模型权重路径 "num_classes": 10 + 1, # 类别数(包括背景) "phi": "b0", # 模型规模(b0-b5) "input_shape": [512, 512], # 输入图像尺寸 "mix_type": 0, # 可视化方式:0-混合原图,1-仅分割图,2-仅目标区域 "cuda": True, # 是否使用GPU } def __init__(self, **kwargs): # 更新默认配置 self.__dict__.update(self._defaults) for name, value in kwargs.items(): setattr(self, name, value) # 颜色设置 self.colors = [ (0, 0, 0), # Background (黑色) (252, 250, 205), # Cropland (淡黄色) (0, 123, 79), # Forest (深绿色) (157, 221, 106), # Grass (浅绿色) (77, 208, 159), # Shrub (浅蓝绿色) (111, 208, 242), # Wetland (浅蓝色) (10, 78, 151), # Water (深蓝色) (92, 106, 55), # Tundra (土黄色) (155, 36, 22), # Impervious surface (红色) (205, 205, 205), # Bareland (灰色) (211, 242, 255) # Ice/snow (浅天蓝色) ] # 如果类别数小于等于11,使用预设颜色;否则动态生成颜色 if self.num_classes <= 11: # 10个标签 + 背景 self.colors = self.colors[:self.num_classes] else: hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)] self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors)) # 初始化模型 self.generate() show_config(**self._defaults) def generate(self, onnx=False): """ 加载模型权重并初始化模型 :param onnx: 是否用于导出ONNX模型 """ self.net = SegFormer(num_classes=self.num_classes, phi=self.phi, pretrained=False) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.net.load_state_dict(torch.load(self.model_path, map_location=device)) self.net = self.net.eval() print('{} model, and classes loaded.'.format(self.model_path)) if not onnx: if self.cuda: self.net = nn.DataParallel(self.net) self.net = self.net.cuda() def detect_image(self, image, count=False, name_classes=None): """ 对单张图像进行预测 :param image: 输入图像(PIL格式) :param count: 是否进行像素点计数 :param name_classes: 类别名称列表 :return: 预测结果图像、计数字典、类别像素数数组 """ # 将图像转换为RGB格式 image = cvtColor(image) old_img = copy.deepcopy(image) # 备份原图 orininal_h = np.array(image).shape[0] # 原图高度 orininal_w = np.array(image).shape[1] # 原图宽度 # 图像预处理:调整大小并归一化 image_data, nw, nh = resize_image(image, (self.input_shape[1], self.input_shape[0])) image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0) with torch.no_grad(): # 将图像数据转换为Tensor images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() # 模型预测 pr = self.net(images)[0] pr = F.softmax(pr.permute(1, 2, 0), dim=-1).cpu().numpy() # 转换为概率图 pr = pr[int((self.input_shape[0] - nh) // 2): int((self.input_shape[0] - nh) // 2 + nh), int((self.input_shape[1] - nw) // 2): int((self.input_shape[1] - nw) // 2 + nw)] # 裁剪填充区域 pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation=cv2.INTER_LINEAR) # 恢复原图大小 pr = pr.argmax(axis=-1) # 获取每个像素的预测类别 # 计数功能 count_dict = {} classes_nums = np.zeros([self.num_classes]) if count: total_points_num = orininal_h * orininal_w # 总像素数 for i in range(self.num_classes): num = np.sum(pr == i) # 统计每个类别的像素数 ratio = num / total_points_num * 100 # 计算比例 count_dict[name_classes[i]] = num # 保存到字典 classes_nums[i] = num # 保存到数组 # 可视化 if self.mix_type == 0: # 混合原图和分割图 seg_img = np.reshape(np.array(self.colors, np.uint8)[np.reshape(pr, [-1])], [orininal_h, orininal_w, -1]) image = Image.fromarray(np.uint8(seg_img)) image = Image.blend(old_img, image, 1.0) elif self.mix_type == 1: # 仅显示分割图 seg_img = np.reshape(np.array(self.colors, np.uint8)[np.reshape(pr, [-1])], [orininal_h, orininal_w, -1]) image = Image.fromarray(np.uint8(seg_img)) elif self.mix_type == 2: # 仅显示目标区域 seg_img = (np.expand_dims(pr != 0, -1) * np.array(old_img, np.float32)).astype('uint8') image = Image.fromarray(np.uint8(seg_img)) return image, count_dict, classes_nums def get_FPS(self, image, test_interval): """ 计算模型的FPS(每秒帧数) :param image: 测试图像 :param test_interval: 测试次数 :return: 平均每帧耗时 """ image = cvtColor(image) image_data, nw, nh = resize_image(image, (self.input_shape[1], self.input_shape[0])) image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() pr = self.net(images)[0] pr = F.softmax(pr.permute(1, 2, 0), dim=-1).cpu().numpy().argmax(axis=-1) pr = pr[int((self.input_shape[0] - nh) // 2): int((self.input_shape[0] - nh) // 2 + nh), int((self.input_shape[1] - nw) // 2): int((self.input_shape[1] - nw) // 2 + nw)] t1 = time.time() for _ in range(test_interval): with torch.no_grad(): pr = self.net(images)[0] pr = F.softmax(pr.permute(1, 2, 0), dim=-1).cpu().numpy().argmax(axis=-1) pr = pr[int((self.input_shape[0] - nh) // 2): int((self.input_shape[0] - nh) // 2 + nh), int((self.input_shape[1] - nw) // 2): int((self.input_shape[1] - nw) // 2 + nw)] t2 = time.time() tact_time = (t2 - t1) / test_interval # 平均每帧耗时 return tact_time def convert_to_onnx(self, simplify, model_path): """ 将模型导出为ONNX格式 :param simplify: 是否简化模型 :param model_path: ONNX模型保存路径 """ import onnx self.generate(onnx=True) im = torch.zeros(1, 3, *self.input_shape).to('cpu') # 创建输入张量 input_layer_names = ["images"] output_layer_names = ["output"] print(f'Starting export with onnx {onnx.__version__}.') torch.onnx.export(self.net, im, f=model_path, verbose=False, opset_version=12, training=torch.onnx.TrainingMode.EVAL, do_constant_folding=True, input_names=input_layer_names, output_names=output_layer_names, dynamic_axes=None) model_onnx = onnx.load(model_path) # 加载ONNX模型 onnx.checker.check_model(model_onnx) # 检查模型 if simplify: import onnxsim print(f'Simplifying with onnx-simplifier {onnxsim.__version__}.') model_onnx, check = onnxsim.simplify( model_onnx, dynamic_input_shape=False, input_shapes=None) assert check, 'assert check failed' onnx.save(model_onnx, model_path) # 保存简化后的模型 print('Onnx model save as {}'.format(model_path)) def get_miou_png(self, image): """ 获取用于计算mIoU的预测结果图像 :param image: 输入图像 :return: 预测结果图像(PIL格式) """ image = cvtColor(image) orininal_h = np.array(image).shape[0] orininal_w = np.array(image).shape[1] image_data, nw, nh = resize_image(image, (self.input_shape[1], self.input_shape[0])) image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0) with torch.no_grad(): images = torch.from_numpy(image_data) if self.cuda: images = images.cuda() pr = self.net(images)[0] pr = F.softmax(pr.permute(1, 2, 0), dim=-1).cpu().numpy() pr = pr[int((self.input_shape[0] - nh) // 2): int((self.input_shape[0] - nh) // 2 + nh), int((self.input_shape[1] - nw) // 2): int((self.input_shape[1] - nw) // 2 + nw)] pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation=cv2.INTER_LINEAR) pr = pr.argmax(axis=-1) image = Image.fromarray(np.uint8(pr)) # 转换为PIL图像 return image