ai-station-code/dimaoshibie/segformer.py

import colorsys
import copy
import time

import cv2
import numpy as np
import torch
import torch.nn.functional as F
from PIL import Image
from torch import nn
import os
from dimaoshibie.nets.segformer import SegFormer
from dimaoshibie.utils.utils import cvtColor, preprocess_input, resize_image, show_config

file_name = __file__
path = os.path.dirname(file_name)

class SegFormer_Segmentation(object):
    # 默认配置参数
    _defaults = {
        "model_path": path + "/logs/best_epoch_weights_voc_12000.pth",  # 模型权重路径
        "num_classes": 10 + 1,  # 类别数（包括背景）
        "phi": "b0",  # 模型规模（b0-b5）
        "input_shape": [512, 512],  # 输入图像尺寸
        "mix_type": 1,  # 可视化方式：0-混合原图，1-仅分割图，2-仅目标区域
        "cuda": True,  # 是否使用GPU
    }

    def __init__(self, **kwargs):
        # 更新默认配置
        self.__dict__.update(self._defaults)
        for name, value in kwargs.items():
            setattr(self, name, value)

        # 颜色设置
        self.colors = [
            (0, 0, 0),  # Background (黑色)
            (252, 250, 205),  # Cropland (淡黄色)
            (0, 123, 79),  # Forest (深绿色)
            (157, 221, 106),  # Grass (浅绿色)
            (77, 208, 159),  # Shrub (浅蓝绿色)
            (111, 208, 242),  # Wetland (浅蓝色)
            (10, 78, 151),  # Water (深蓝色)
            (92, 106, 55),  # Tundra (土黄色)
            (155, 36, 22),  # Impervious surface (红色)
            (205, 205, 205),  # Bareland (灰色)
            (211, 242, 255)  # Ice/snow (浅天蓝色)
        ]

        # 如果类别数小于等于11，使用预设颜色；否则动态生成颜色
        if self.num_classes <= 11:  # 10个标签 + 背景
            self.colors = self.colors[:self.num_classes]
        else:
            hsv_tuples = [(x / self.num_classes, 1., 1.) for x in range(self.num_classes)]
            self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
            self.colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), self.colors))

        # 初始化模型
        self.generate()
        show_config(**self._defaults)

    def generate(self, onnx=False):
        """
        加载模型权重并初始化模型
        :param onnx: 是否用于导出ONNX模型
        """
        self.net = SegFormer(num_classes=self.num_classes, phi=self.phi, pretrained=False)
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.net.load_state_dict(torch.load(self.model_path, map_location=device))
        self.net = self.net.eval()
        print('{} model, and classes loaded.'.format(self.model_path))
        if not onnx:
            if self.cuda:
                self.net = nn.DataParallel(self.net)
                self.net = self.net.cuda()

    def detect_image(self, image, count=False, name_classes=None):
        """
        对单张图像进行预测
        :param image: 输入图像（PIL格式）
        :param count: 是否进行像素点计数
        :param name_classes: 类别名称列表
        :return: 预测结果图像、计数字典、类别像素数数组
        """
        # 将图像转换为RGB格式
        image = cvtColor(image)
        old_img = copy.deepcopy(image)  # 备份原图
        orininal_h = np.array(image).shape[0]  # 原图高度
        orininal_w = np.array(image).shape[1]  # 原图宽度

        # 图像预处理：调整大小并归一化
        image_data, nw, nh = resize_image(image, (self.input_shape[1], self.input_shape[0]))
        image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0)

        with torch.no_grad():
            # 将图像数据转换为Tensor
            images = torch.from_numpy(image_data)
            if self.cuda:
                images = images.cuda()

            # 模型预测
            pr = self.net(images)[0]
            pr = F.softmax(pr.permute(1, 2, 0), dim=-1).cpu().numpy()  # 转换为概率图
            pr = pr[int((self.input_shape[0] - nh) // 2): int((self.input_shape[0] - nh) // 2 + nh),
                    int((self.input_shape[1] - nw) // 2): int((self.input_shape[1] - nw) // 2 + nw)]  # 裁剪填充区域
            pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation=cv2.INTER_LINEAR)  # 恢复原图大小
            pr = pr.argmax(axis=-1)  # 获取每个像素的预测类别

        # 计数功能
        count_dict = {}
        classes_nums = np.zeros([self.num_classes])
        if count:
            total_points_num = orininal_h * orininal_w  # 总像素数
            for i in range(self.num_classes):
                num = np.sum(pr == i)  # 统计每个类别的像素数
                ratio = num / total_points_num * 100  # 计算比例
                count_dict[name_classes[i]] = num  # 保存到字典
                classes_nums[i] = num  # 保存到数组

        # 可视化
        if self.mix_type == 0:
            # 混合原图和分割图
            seg_img = np.reshape(np.array(self.colors, np.uint8)[np.reshape(pr, [-1])], [orininal_h, orininal_w, -1])
            image = Image.fromarray(np.uint8(seg_img))
            image = Image.blend(old_img, image, 1.0)
        elif self.mix_type == 1:
            # 仅显示分割图
            seg_img = np.reshape(np.array(self.colors, np.uint8)[np.reshape(pr, [-1])], [orininal_h, orininal_w, -1])
            image = Image.fromarray(np.uint8(seg_img))
        elif self.mix_type == 2:
            # 仅显示目标区域
            seg_img = (np.expand_dims(pr != 0, -1) * np.array(old_img, np.float32)).astype('uint8')
            image = Image.fromarray(np.uint8(seg_img))

        return image, count_dict, classes_nums

    def get_FPS(self, image, test_interval):
        """
        计算模型的FPS（每秒帧数）
        :param image: 测试图像
        :param test_interval: 测试次数
        :return: 平均每帧耗时
        """
        image = cvtColor(image)
        image_data, nw, nh = resize_image(image, (self.input_shape[1], self.input_shape[0]))
        image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0)

        with torch.no_grad():
            images = torch.from_numpy(image_data)
            if self.cuda:
                images = images.cuda()

            pr = self.net(images)[0]
            pr = F.softmax(pr.permute(1, 2, 0), dim=-1).cpu().numpy().argmax(axis=-1)
            pr = pr[int((self.input_shape[0] - nh) // 2): int((self.input_shape[0] - nh) // 2 + nh),
                    int((self.input_shape[1] - nw) // 2): int((self.input_shape[1] - nw) // 2 + nw)]

        t1 = time.time()
        for _ in range(test_interval):
            with torch.no_grad():
                pr = self.net(images)[0]
                pr = F.softmax(pr.permute(1, 2, 0), dim=-1).cpu().numpy().argmax(axis=-1)
                pr = pr[int((self.input_shape[0] - nh) // 2): int((self.input_shape[0] - nh) // 2 + nh),
                        int((self.input_shape[1] - nw) // 2): int((self.input_shape[1] - nw) // 2 + nw)]
        t2 = time.time()
        tact_time = (t2 - t1) / test_interval  # 平均每帧耗时
        return tact_time

    def convert_to_onnx(self, simplify, model_path):
        """
        将模型导出为ONNX格式
        :param simplify: 是否简化模型
        :param model_path: ONNX模型保存路径
        """
        import onnx
        self.generate(onnx=True)

        im = torch.zeros(1, 3, *self.input_shape).to('cpu')  # 创建输入张量
        input_layer_names = ["images"]
        output_layer_names = ["output"]

        print(f'Starting export with onnx {onnx.__version__}.')
        torch.onnx.export(self.net,
                          im,
                          f=model_path,
                          verbose=False,
                          opset_version=12,
                          training=torch.onnx.TrainingMode.EVAL,
                          do_constant_folding=True,
                          input_names=input_layer_names,
                          output_names=output_layer_names,
                          dynamic_axes=None)

        model_onnx = onnx.load(model_path)  # 加载ONNX模型
        onnx.checker.check_model(model_onnx)  # 检查模型

        if simplify:
            import onnxsim
            print(f'Simplifying with onnx-simplifier {onnxsim.__version__}.')
            model_onnx, check = onnxsim.simplify(
                model_onnx,
                dynamic_input_shape=False,
                input_shapes=None)
            assert check, 'assert check failed'
            onnx.save(model_onnx, model_path)  # 保存简化后的模型

        print('Onnx model save as {}'.format(model_path))

    def get_miou_png(self, image):
        """
        获取用于计算mIoU的预测结果图像
        :param image: 输入图像
        :return: 预测结果图像（PIL格式）
        """
        image = cvtColor(image)
        orininal_h = np.array(image).shape[0]
        orininal_w = np.array(image).shape[1]

        image_data, nw, nh = resize_image(image, (self.input_shape[1], self.input_shape[0]))
        image_data = np.expand_dims(np.transpose(preprocess_input(np.array(image_data, np.float32)), (2, 0, 1)), 0)

        with torch.no_grad():
            images = torch.from_numpy(image_data)
            if self.cuda:
                images = images.cuda()

            pr = self.net(images)[0]
            pr = F.softmax(pr.permute(1, 2, 0), dim=-1).cpu().numpy()
            pr = pr[int((self.input_shape[0] - nh) // 2): int((self.input_shape[0] - nh) // 2 + nh),
                    int((self.input_shape[1] - nw) // 2): int((self.input_shape[1] - nw) // 2 + nw)]
            pr = cv2.resize(pr, (orininal_w, orininal_h), interpolation=cv2.INTER_LINEAR)
            pr = pr.argmax(axis=-1)

        image = Image.fromarray(np.uint8(pr))  # 转换为PIL图像
        return image