“提交项目”

This commit is contained in:
L_J 2025-05-19 20:48:24 +08:00
commit 63abdae2a9
3171 changed files with 426882 additions and 0 deletions

8
pytorch_segmentation/.idea/.gitignore vendored Normal file
View File

@ -0,0 +1,8 @@
# 默认忽略的文件
/shelf/
/workspace.xml
# 基于编辑器的 HTTP 客户端请求
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

View File

@ -0,0 +1,119 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PublishConfigData" remoteFilesAllowedToDisappearOnAutoupload="false">
<serverData>
<paths name="root@123.125.240.150:45809">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@124.16.151.196:10341">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@124.16.151.196:10341 password">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@222.187.226.110:28961">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@connect.east.seetacloud.com:15907">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@connect.east.seetacloud.com:26749">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@connect.east.seetacloud.com:26749 (2)">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@connect.east.seetacloud.com:26749 (3)">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@connect.east.seetacloud.com:26749 (4)">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@connect.east.seetacloud.com:26749 (5)">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@connect.east.seetacloud.com:26749 password">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-42.seetacloud.com:12154">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-42.seetacloud.com:14975">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-42.seetacloud.com:34252">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-8.seetacloud.com:35693">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-8.seetacloud.com:35693 (2)">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
</serverData>
</component>
</project>

View File

@ -0,0 +1,27 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredPackages">
<value>
<list size="12">
<item index="0" class="java.lang.String" itemvalue="sklearn" />
<item index="1" class="java.lang.String" itemvalue="tqdm" />
<item index="2" class="java.lang.String" itemvalue="scipy" />
<item index="3" class="java.lang.String" itemvalue="h5py" />
<item index="4" class="java.lang.String" itemvalue="matplotlib" />
<item index="5" class="java.lang.String" itemvalue="torch" />
<item index="6" class="java.lang.String" itemvalue="numpy" />
<item index="7" class="java.lang.String" itemvalue="torchvision" />
<item index="8" class="java.lang.String" itemvalue="opencv_python" />
<item index="9" class="java.lang.String" itemvalue="Pillow" />
<item index="10" class="java.lang.String" itemvalue="lxml" />
<item index="11" class="java.lang.String" itemvalue="requests" />
</list>
</value>
</option>
</inspection_tool>
<inspection_tool class="PyPep8NamingInspection" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
</profile>
</component>

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
</project>

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/pytorch_segmentation.iml" filepath="$PROJECT_DIR$/.idea/pytorch_segmentation.iml" />
</modules>
</component>
</project>

View File

@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="GOOGLE" />
<option name="myDocStringFormat" value="Google" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="py.test" />
</component>
</module>

View File

@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.9 (pytorch)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="PLAIN" />
<option name="myDocStringFormat" value="Plain" />
</component>
</module>

View File

@ -0,0 +1,140 @@
import matplotlib.pyplot as plt
import numpy as np
class ActivateFunc():
def __init__(self, x, b=1, lamb=2, alpha=1, a=2):
super(ActivateFunc, self).__init__()
self.x = x
self.b = b
self.lamb = lamb
self.alpha = alpha
self.a = a
def __init__(self, x, b=1, lamb=2, alpha=1, a=2):
super(ActivateFunc, self).__init__()
self.x = x
self.b = b
self.lamb = lamb
self.alpha = alpha
self.a = a
def Sigmoid(self):
y = np.exp(self.x) / (np.exp(self.x) + 1)
y_grad = y*(1-y)
return [y, y_grad]
def Tanh(self):
y = np.tanh(self.x)
y_grad = 1 - y * y
return [y, y_grad]
def Swish(self): #b是一个常数指定b
y = self.x * (np.exp(self.b*self.x) / (np.exp(self.b*self.x) + 1))
y_grad = np.exp(self.b*self.x)/(1+np.exp(self.b*self.x)) + self.x * (self.b*np.exp(self.b*self.x) / ((1+np.exp(self.b*self.x))*(1+np.exp(self.b*self.x))))
return [y, y_grad]
def ELU(self): # alpha是个常数指定alpha
y = np.where(self.x > 0, self.x, self.alpha * (np.exp(self.x) - 1))
y_grad = np.where(self.x > 0, 1, self.alpha * np.exp(self.x))
return [y, y_grad]
def SELU(self): # lamb大于1指定lamb和alpha
y = np.where(self.x > 0, self.lamb * self.x, self.lamb * self.alpha * (np.exp(self.x) - 1))
y_grad = np.where(self.x > 0, self.lamb*1, self.lamb * self.alpha * np.exp(self.x))
return [y, y_grad]
def ReLU(self):
y = np.where(self.x < 0, 0, self.x)
y_grad = np.where(self.x < 0, 0, 1)
return [y, y_grad]
def PReLU(self): # a大于1指定a
y = np.where(self.x < 0, self.x / self.a, self.x)
y_grad = np.where(self.x < 0, 1 / self.a, 1)
return [y, y_grad]
def LeakyReLU(self): # a大于1指定a
y = np.where(self.x < 0, self.x / self.a, self.x)
y_grad = np.where(self.x < 0, 1 / self.a, 1)
return [y, y_grad]
def Mish(self):
f = 1 + np.exp(x)
y = self.x * ((f*f-1) / (f*f+1))
y_grad = (f*f-1) / (f*f+1) + self.x*(4*f*(f-1)) / ((f*f+1)*(f*f+1))
return [y, y_grad]
def ReLU6(self):
y = np.where(np.where(self.x < 0, 0, self.x) > 6, 6, np.where(self.x < 0, 0, self.x))
y_grad = np.where(self.x > 6, 0, np.where(self.x < 0, 0, 1))
return [y, y_grad]
def Hard_Swish(self):
f = self.x + 3
relu6 = np.where(np.where(f < 0, 0, f) > 6, 6, np.where(f < 0, 0, f))
relu6_grad = np.where(f > 6, 0, np.where(f < 0, 0, 1))
y = self.x * relu6 / 6
y_grad = relu6 / 6 + self.x * relu6_grad / 6
return [y, y_grad]
def Hard_Sigmoid(self):
f = (2 * self.x + 5) / 10
y = np.where(np.where(f > 1, 1, f) < 0, 0, np.where(f > 1, 1, f))
y_grad = np.where(f > 0, np.where(f >= 1, 0, 1 / 5), 0)
return [y, y_grad]
def PlotActiFunc(x, y, title):
plt.grid(which='minor', alpha=0.2)
plt.grid(which='major', alpha=0.5)
plt.plot(x, y)
plt.title(title)
plt.show()
def PlotMultiFunc(x, y):
plt.grid(which='minor', alpha=0.2)
plt.grid(which='major', alpha=0.5)
plt.plot(x, y)
if __name__ == '__main__':
x = np.arange(-10, 10, 0.01)
activateFunc = ActivateFunc(x)
activateFunc.b = 1
PlotActiFunc(x, activateFunc.Sigmoid()[0], title='Sigmoid')
PlotActiFunc(x, activateFunc.Tanh()[0], title='Tanh')
PlotActiFunc(x, activateFunc.ReLU()[0], title='ReLU')
PlotActiFunc(x, activateFunc.LeakyReLU()[0], title='LeakyReLU')
PlotActiFunc(x, activateFunc.ReLU6()[0], title='ReLU6')
PlotActiFunc(x, activateFunc.Swish()[0], title='Swish')
PlotActiFunc(x, activateFunc.Mish()[0], title='Mish')
PlotActiFunc(x, activateFunc.ELU()[0], title='ELU')
PlotActiFunc(x, activateFunc.Hard_Swish()[0], title='Hard_Swish')
PlotActiFunc(x, activateFunc.Hard_Sigmoid()[0], title='Hard_Sigmoid')
plt.figure(1)
PlotMultiFunc(x, activateFunc.Swish()[0])
PlotMultiFunc(x, activateFunc.Mish()[0])
plt.legend(['Swish', 'Mish'])
plt.figure(2)
PlotMultiFunc(x, activateFunc.Swish()[0])
PlotMultiFunc(x, activateFunc.Hard_Swish()[0])
plt.legend(['Swish', 'Hard-Swish'])
plt.figure(3)
PlotMultiFunc(x, activateFunc.Sigmoid()[0])
PlotMultiFunc(x, activateFunc.Hard_Sigmoid()[0])
plt.legend(['Sigmoid', 'Hard-Sigmoid'])
plt.figure(4)
PlotMultiFunc(x, activateFunc.ReLU()[0])
PlotMultiFunc(x, activateFunc.ReLU6()[0])
plt.legend(['ReLU', 'ReLU6'])
plt.show()

View File

@ -0,0 +1,31 @@
"""
ReLU函数
1ReLU 函数在正输入时是线性的收敛速度快计算速度快同时符合恒等性的特点当输入为正时由于导数是1能够完整传递梯度不存在梯度消失的问题梯度饱和问题
2计算速度快ReLU 函数中只存在线性关系且无论是函数还是其导数都不包含复杂的数学运算因此它的计算速度比 sigmoid tanh 更快
3当输入大于0时梯度为1能够有效避免链式求导法则梯度相乘引起的梯度消失和梯度爆炸计算成本低
4它保留了 step 函数的生物学启发只有输入超出阈值时神经元才激活不过当输入为正的时候导数不为零从而允许基于梯度的学习尽管在 x=0 的时候导数是未定义的当输入为负值的时候ReLU 的学习速度可能会变得很慢甚至使神经元直接无效因为此时输入小于零而梯度为零从而其权重无法得到更新在剩下的训练过程中会一直保持静默
ReLU不足
1ReLU的输入值为负的时候输出始终为0其一阶导数也始终为0这样会导致神经元不能更新参数也就是神经元不学习了这种现象叫做Dead Neuron为了解决ReLU函数这个缺点在ReLU函数的负半区间引入一个泄露Leaky所以称为Leaky ReLU函数
2与Sigmoid一样其输出不是以0为中心的ReLU的输出为0或正数
3ReLU在小于0的时候梯度为零导致了某些神经元永远被抑制最终造成特征的学习不充分;这是典型的 Dead ReLU 问题所以需要改进随机初始化避免将过多的负数特征送入ReLU
"""
import torch
import torch.nn as nn
# Relu函数
print('*' * 25 + "Relu函数" + "*" * 25)
m = nn.ReLU()
input = torch.randn(2)
print("原:", input)
print("结果:", m(input))
print('*' * 50)

View File

@ -0,0 +1,31 @@
import torch
import torch.nn as nn
# Sigmoid函数
print('*' * 25 + "Sigmoid函数" + "*" * 25)
m = nn.Sigmoid()
input = torch.randn(2)
print("原:", input)
print("结果:", m(input))
print('*' * 50)
"""
Sigmoid优点
1其值域为[0,1]非常适合作为模型的输出函数用于输出一个(0,1)范围内的概率值可用于将预测概率作为输出的模型比如用于表示二分类的类别或者用于表示置信度
2Sigmoid 函数的输出范围是 0 1由于输出值限定在0到1因此它对每个神经元的输出进行了归一化
3该函数是连续可导的即可微可以提供非常平滑的梯度值防止模型训练过程中出现突变的梯度即避免跳跃的输出值
Sigmoid不足
1从其导数的函数图像上可以看到其导数的最大值只有0.25而且当x在[-5,5]的范围外时其导数值就已经几乎接近于0了这种情况会导致训练过程中神经元处于一种饱和状态反向传播时其权重几乎得不到更新从而使得模型变得难以训练这种现象被称为梯度消失问题
2其输出不是以0为中心而是都大于0的这会降低权重更新的效率这样下一层的神经元会得到上一层输出的全正信号作为输入所以Sigmoid激活函数不适合放在神经网络的前面层而一般是放在最后的输出层中使用
3需要进行指数运算计算机运行得较慢计算量大及计算复杂度高训练耗时指数的越大其倒数就越小容易产生梯度消失
版权声明本文为CSDN博主小wu学cv的原创文章遵循CC 4.0 BY-SA版权协议转载请附上原文出处链接及本声明
原文链接https://blog.csdn.net/caip12999203000/article/details/127067360
"""

View File

@ -0,0 +1,31 @@
"""
Tanh优点
1在分类任务中双曲正切函数Tanh逐渐取代 Sigmoid 函数作为标准的激活函数其具有很多神经网络所钟爱的特征它是完全可微分的反对称对称中心在原点
2输出是S型曲线具备打破网络层与网络层之间的线性关系可以把网络层输出非线形地映射到 (1,1) 区间里负输入将被强映射为负而零输入被映射为接近零tanh 的输出间隔为1且值域是以0为中心的[-1,1]可以解决Sigmoid激活函数输出不以0为中心的问题
3在一般的二元分类问题中tanh 函数用于隐藏层 sigmoid 函数用于输出层但这并不是固定的需要根据特定问题进行调整
Tanh不足
1当输入较大或较小时输出几乎是平滑的并且梯度较小这不利于权重更新
2Tanh函数也需要进行指数运算所以其也会存在计算复杂度高且计算量大的问题
3当神经网络的层数增多的时候由于在进行反向传播的时候链式求导多项相乘函数进入饱和区导数接近于零的地方就会逐层传递这种现象被称为梯度消失
版权声明本文为CSDN博主小wu学cv的原创文章遵循CC 4.0 BY-SA版权协议转载请附上原文出处链接及本声明
原文链接https://blog.csdn.net/caip12999203000/article/details/127067360
"""
import torch
import torch.nn as nn
# Tanh函数
print('*' * 25 + "Tanh函数" + "*" * 25)
m = nn.Tanh()
input = torch.randn(2)
print("原:", input)
print("结果:", m(input))
print('*' * 50)

View File

@ -0,0 +1,325 @@
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
def load_data(pix, use_type='train'):
datasets = list()
file_list = [x for x in os.listdir(f"./out_mat/{pix}/{use_type}/") if x.endswith('.npy')][:3000]
for file in file_list:
file_img = np.load(f"./out_mat/{pix}/{use_type}/{file}")[:,:,:1]
datasets.append(file_img)
return np.asarray(datasets)
train_set = load_data(96, 'train')
val_set = load_data(96, 'valid')
test_set = load_data(96, 'test')
def load_mask(mask_rate):
mask_files = os.listdir(f'./out_mat/96/mask/{mask_rate}')
masks = list()
for file in mask_files:
d = cv2.imread(f'./out_mat/96/mask/{mask_rate}/{file}', cv2.IMREAD_GRAYSCALE)
d = (d > 0) * 1
masks.append(d)
return np.asarray(masks)
masks = load_mask(20)
maxs = train_set.max(axis=0)
mins = train_set.min(axis=0)
len(train_set)
norm_train = (train_set - mins) / (maxs-mins)
del train_set
norm_valid = (val_set - mins) / (maxs-mins)
del val_set
norm_test = (test_set - mins) / (maxs-mins)
del test_set
norm_train.shape
trans_train = np.transpose(norm_train, (0, 3, 1, 2))
trans_val = np.transpose(norm_valid, (0, 3, 1, 2))
trans_test = np.transpose(norm_test, (0, 3, 1, 2))
# 可视化特定特征的函数
def visualize_feature(input_feature,masked_feature, output_feature, title):
plt.figure(figsize=(12, 6))
plt.subplot(1, 3, 1)
plt.imshow(input_feature[0].cpu().numpy())
plt.title(title + " Input")
plt.subplot(1, 3, 2)
plt.imshow(masked_feature[0].cpu().numpy())
plt.title(title + " Masked")
plt.subplot(1, 3, 3)
plt.imshow(output_feature[0].detach().cpu().numpy())
plt.title(title + " Recovery")
plt.show()
# 设置随机种子以确保结果的可重复性
torch.manual_seed(0)
np.random.seed(0)
# 数据准备
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
# 将numpy数组转换为PyTorch张量
tensor_train = torch.tensor(trans_train.astype(np.float32), device=device)
tensor_valid = torch.tensor(trans_val.astype(np.float32), device=device)
tensor_test = torch.tensor(trans_test.astype(np.float32), device=device)
# 创建一个数据集和数据加载器
train_set = TensorDataset(tensor_train, tensor_train) # 输出和标签相同,因为我们是自编码器
val_set = TensorDataset(tensor_valid, tensor_valid)
test_set = TensorDataset(tensor_test, tensor_test)
batch_size = 64
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
def mask_data(data, device, masks):
mask_inds = np.random.choice(masks.shape[0], data.shape[0])
mask = torch.from_numpy(masks[mask_inds]).to(device)
tmp_first_channel = data[:, 0, :, :] * mask
masked_data = torch.clone(data)
masked_data[:, 0, :, :] = tmp_first_channel
return masked_data
class SEBlock(nn.Module):
def __init__(self, in_channels, reduced_dim):
super(SEBlock, self).__init__()
self.se = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(in_channels, reduced_dim, kernel_size=1),
nn.ReLU(),
nn.Conv2d(reduced_dim, in_channels, kernel_size=1),
nn.Sigmoid()
)
def forward(self, x):
return x * self.se(x)
class Conv(nn.Sequential):
def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, bias=False):
super(Conv, self).__init__(
nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias,
dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2)
)
class ConvBNReLU(nn.Sequential):
def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, norm_layer=nn.BatchNorm2d, bias=False):
super(ConvBNReLU, self).__init__(
nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias,
dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2),
norm_layer(out_channels),
nn.ReLU()
)
class SeparableBNReLU(nn.Sequential):
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, norm_layer=nn.BatchNorm2d):
super(SeparableBNReLU, self).__init__(
nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, dilation=dilation,
padding=((stride - 1) + dilation * (kernel_size - 1)) // 2, groups=in_channels, bias=False),
norm_layer(out_channels),
nn.Conv2d(out_channels, out_channels, kernel_size=1, bias=False),
nn.ReLU6()
)
class ResidualBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
super(ResidualBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = downsample
if in_channels != out_channels or stride != 1:
self.downsample = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels)
)
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += identity
out = self.relu(out)
return out
class Mlp(nn.Module):
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.ReLU6, drop=0.):
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
self.fc1 = nn.Conv2d(in_features, hidden_features, 1, 1, 0, bias=True)
self.act = act_layer()
self.fc2 = nn.Conv2d(hidden_features, out_features, 1, 1, 0, bias=True)
self.drop = nn.Dropout(drop, inplace=True)
def forward(self, x):
x = self.fc1(x)
x = self.act(x)
x = self.drop(x)
x = self.fc2(x)
x = self.drop(x)
return x
class MultiHeadAttentionBlock(nn.Module):
def __init__(self, embed_dim, num_heads, dropout=0.1):
super(MultiHeadAttentionBlock, self).__init__()
self.attention = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout)
self.norm = nn.LayerNorm(embed_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
B, C, H, W = x.shape
x = x.view(B, C, H * W).permute(2, 0, 1) # (B, C, H, W) -> (HW, B, C)
attn_output, _ = self.attention(x, x, x)
attn_output = self.norm(attn_output)
attn_output = self.dropout(attn_output)
attn_output = attn_output.permute(1, 2, 0).view(B, C, H, W)
return attn_output
class SpatialAttentionBlock(nn.Module):
def __init__(self):
super(SpatialAttentionBlock, self).__init__()
self.conv = nn.Conv2d(2, 1, kernel_size=7, padding=3, bias=False)
def forward(self, x): #(B, 64, H, W)
avg_out = torch.mean(x, dim=1, keepdim=True) #(B, 1, H, W)
max_out, _ = torch.max(x, dim=1, keepdim=True)#(B, 1, H, W)
out = torch.cat([avg_out, max_out], dim=1)#(B, 2, H, W)
out = torch.sigmoid(self.conv(out))#(B, 1, H, W)
return x * out #(B, C, H, W)
class DecoderAttentionBlock(nn.Module):
def __init__(self, in_channels):
super(DecoderAttentionBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels, in_channels // 2, kernel_size=1)
self.conv2 = nn.Conv2d(in_channels // 2, in_channels, kernel_size=1)
self.spatial_attention = SpatialAttentionBlock()
def forward(self, x):
# 通道注意力
b, c, h, w = x.size()
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.conv1(avg_pool)
max_out = self.conv1(max_pool)
out = avg_out + max_out
out = torch.sigmoid(self.conv2(out))
# 添加空间注意力
out = x * out
out = self.spatial_attention(out)
return out
class MaskedAutoencoder(nn.Module):
def __init__(self):
super(MaskedAutoencoder, self).__init__()
self.encoder = nn.Sequential(
Conv(1, 32, kernel_size=3, stride=2),
nn.ReLU(),
SEBlock(32,32),
ConvBNReLU(32, 64, kernel_size=3, stride=2),
ResidualBlock(64,64),
SeparableBNReLU(64, 128, kernel_size=3, stride=2),
MultiHeadAttentionBlock(embed_dim=128, num_heads=4),
SEBlock(128, 128)
)
self.mlp = Mlp(in_features=128, hidden_features=256, out_features=128, act_layer=nn.ReLU6, drop=0.1)
self.decoder = nn.Sequential(
nn.ConvTranspose2d(128, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
nn.ReLU(),
DecoderAttentionBlock(32),
nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1),
nn.ReLU(),
DecoderAttentionBlock(16),
nn.ReLU(),
nn.ConvTranspose2d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=1), # 修改为 output_padding=1
nn.Sigmoid()
)
def forward(self, x):
encoded = self.encoder(x)
print("Encoded size:", encoded.size())
decoded = self.decoder(encoded)
print("Encoded size:", decoded.size())
return decoded
# 实例化模型、损失函数和优化器
model = MaskedAutoencoder()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
# 训练函数
def train_epoch(model, device, data_loader, criterion, optimizer):
model.train()
running_loss = 0.0
for batch_idx, (data, _) in enumerate(data_loader):
masked_data = mask_data(data, device, masks)
optimizer.zero_grad()
reconstructed = model(masked_data)
loss = criterion(reconstructed, data)
loss.backward()
optimizer.step()
running_loss += loss.item()
return running_loss / (batch_idx + 1)
# 评估函数
def evaluate(model, device, data_loader, criterion):
model.eval()
running_loss = 0.0
with torch.no_grad():
for batch_idx, (data, _) in enumerate(data_loader):
data = data.to(device)
masked_data = mask_data(data, device, masks)
reconstructed = model(masked_data)
if batch_idx == 8:
rand_ind = np.random.randint(0, len(data))
visualize_feature(data[rand_ind], masked_data[rand_ind], reconstructed[rand_ind], title='NO_2')
loss = criterion(reconstructed, data)
running_loss += loss.item()
return running_loss / (batch_idx + 1)
# 测试函数
def test(model, device, data_loader):
model.eval()
with torch.no_grad():
for batch_idx, (data, _) in enumerate(data_loader):
data = data.to(device)
masked_data = mask_data(data, device, masks)
masked_ind = np.argwhere(masked_data[0][0]==0)
reconstructed = model(masked_data)
recon_no2 = reconstructed[0][0]
ori_no2 = data[0][0]
return
model = model.to(device)
num_epochs = 100
train_losses = list()
val_losses = list()
for epoch in range(num_epochs):
train_loss = train_epoch(model, device, train_loader, criterion, optimizer)
train_losses.append(train_loss)
val_loss = evaluate(model, device, val_loader, criterion)
val_losses.append(val_loss)
print(f'Epoch {epoch+1}, Train Loss: {train_loss}, Val Loss: {val_loss}')
# 测试模型
test_loss = evaluate(model, device, test_loader, criterion)
print(f'Test Loss: {test_loss}')

View File

@ -0,0 +1,100 @@
import torch
import torch.nn as nn
class MLP(nn.Module):
def __init__(self, input_dim, output_dim):
super(MLP, self).__init__()
self.fc1 = nn.Linear(input_dim, output_dim)
self.act = nn.GELU() # 使用 GELU 激活函数
self.fc2 = nn.Linear(output_dim, input_dim)
def forward(self, x):
return self.fc2(self.act(self.fc1(x)))
class Attention(nn.Module):
def __init__(self, dim, heads):
super(Attention, self).__init__()
self.heads = heads
self.dim = dim
self.scale = dim ** -0.5
self.qkv = nn.Linear(dim, dim * 3)
self.attn_drop = nn.Dropout(0.1)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(0.1)
def forward(self, x):
B, N, C = x.shape
qkv = self.qkv(x).reshape(B, N, 3, self.heads, C // self.heads).permute(2, 0, 3, 1,
4) # (3, B, heads, N, head_dim)
q, k, v = qkv[0], qkv[1], qkv[2]
attn = (q @ k.transpose(-2, -1)) * self.scale
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
out = (attn @ v).transpose(1, 2).reshape(B, N, C)
return self.proj_drop(self.proj(out))
class ViTEncoder(nn.Module):
def __init__(self, img_size=96, patch_size=8, dim=128, depth=4, heads=4, mlp_dim=256):
super(ViTEncoder, self).__init__()
self.patch_size = patch_size
self.dim = dim
self.patch_embedding = nn.Conv2d(1, dim, kernel_size=patch_size, stride=patch_size)
self.attention_layers = nn.ModuleList([
nn.Sequential(
Attention(dim, heads),
MLP(dim, mlp_dim)
) for _ in range(depth)
])
def forward(self, x):
x = self.patch_embedding(x) # 形状变为 (batch_size, dim, num_patches_h, num_patches_w)
x = x.flatten(2).transpose(1, 2) # 形状变为 (batch_size, num_patches, dim)
for attention_layer in self.attention_layers:
x = attention_layer[0](x) + x # 自注意力
x = attention_layer[1](x) + x # MLP
return x
class ConvDecoder(nn.Module):
def __init__(self, dim=128, patch_size=8, img_size=96):
super(ConvDecoder, self).__init__()
self.dim = dim
self.patch_size = patch_size
self.img_size = img_size
self.decoder = nn.Sequential(
nn.ConvTranspose2d(dim, 128, kernel_size=patch_size, stride=patch_size),
nn.ReLU(),
nn.ConvTranspose2d(128, 1, kernel_size=3, stride=1, padding=1)
)
def forward(self, x):
x = x.transpose(1, 2).view(-1, self.dim, self.img_size // self.patch_size, self.img_size // self.patch_size)
x = self.decoder(x)
return x
class MAEModel(nn.Module):
def __init__(self, encoder, decoder):
super(MAEModel, self).__init__()
self.encoder = encoder
self.decoder = decoder
def forward(self, x, mask):
encoded = self.encoder(x)
decoded = self.decoder(encoded)
return decoded * mask
model = MAEModel()
x = torch.randn(1, 1, 256, 256)
output = model(x)
print(output.shape)

View File

@ -0,0 +1,90 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class MLP(nn.Module):
def __init__(self, input_dim, output_dim):
super(MLP, self).__init__()
self.fc1 = nn.Linear(input_dim, output_dim)
self.act = nn.GELU() # 使用 GELU 激活函数
self.fc2 = nn.Linear(output_dim, input_dim)
def forward(self, x):
return self.fc2(self.act(self.fc1(x)))
class Attention(nn.Module):
def __init__(self, dim, heads):
super(Attention, self).__init__()
self.heads = heads
self.dim = dim
self.scale = dim ** -0.5
self.qkv = nn.Linear(dim, dim * 3)
self.attn_drop = nn.Dropout(0.1)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(0.1)
def forward(self, x):
B, N, C = x.shape
qkv = self.qkv(x).reshape(B, N, 3, self.heads, C // self.heads).permute(2, 0, 3, 1,
4) # (3, B, heads, N, head_dim)
q, k, v = qkv[0], qkv[1], qkv[2]
attn = (q @ k.transpose(-2, -1)) * self.scale
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
out = (attn @ v).transpose(1, 2).reshape(B, N, C)
return self.proj_drop(self.proj(out))
class ViTEncoder(nn.Module):
def __init__(self, img_size=96, patch_size=8, dim=128, depth=4, heads=4, mlp_dim=256):
super(ViTEncoder, self).__init__()
self.patch_size = patch_size
self.dim = dim
self.patch_embedding = nn.Conv2d(1, dim, kernel_size=patch_size, stride=patch_size)
self.attention_layers = nn.ModuleList([
nn.Sequential(
Attention(dim, heads),
MLP(dim, mlp_dim)
) for _ in range(depth)
])
def forward(self, x):
x = self.patch_embedding(x) # 形状变为 (batch_size, dim, num_patches_h, num_patches_w)
x = x.flatten(2).transpose(1, 2) # 形状变为 (batch_size, num_patches, dim)
for attention_layer in self.attention_layers:
x = attention_layer[0](x) + x # 自注意力
x = attention_layer[1](x) + x # MLP
return x
class ConvDecoder(nn.Module):
def __init__(self, dim=128, patch_size=8, img_size=96):
super(ConvDecoder, self).__init__()
self.dim = dim
self.patch_size = patch_size
self.img_size = img_size
self.decoder = nn.Sequential(
nn.ConvTranspose2d(dim, 128, kernel_size=patch_size, stride=patch_size),
nn.ReLU(),
nn.ConvTranspose2d(128, 1, kernel_size=3, stride=1, padding=1)
)
def forward(self, x):
x = x.transpose(1, 2).view(-1, self.dim, self.img_size // self.patch_size, self.img_size // self.patch_size)
x = self.decoder(x)
return x
model = ConvDecoder()
x = torch.randn(1, 1, 256, 256)
output = model(x)
print(output.shape)

View File

@ -0,0 +1,215 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class SEBlock(nn.Module):
def __init__(self, in_channels, reduced_dim):
super(SEBlock, self).__init__()
self.se = nn.Sequential(
nn.AdaptiveAvgPool2d(1), # 全局平均池化
nn.Conv2d(in_channels, reduced_dim, kernel_size=1),
nn.ReLU(),
nn.Conv2d(reduced_dim, in_channels, kernel_size=1),
nn.Sigmoid() # 使用Sigmoid是因为我们要对通道进行权重归一化
)
def forward(self, x):
return x * self.se(x)
# 定义Masked Autoencoder模型
class Conv(nn.Sequential):
def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, bias=False):
super(Conv, self).__init__(
nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias,
dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2)
)
class ConvBNReLU(nn.Sequential):
def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, norm_layer=nn.BatchNorm2d,
bias=False):
super(ConvBNReLU, self).__init__(
nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias,
dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2),
norm_layer(out_channels),
nn.ReLU()
)
class SeparableBNReLU(nn.Sequential):
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, norm_layer= nn.BatchNorm2d):
super(SeparableBNReLU, self).__init__(
nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, dilation=dilation,
padding=((stride - 1) + dilation * (kernel_size -1))//2, groups=in_channels, bias=False),
norm_layer(out_channels),
nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
nn.ReLU6()
)
class ResidualBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
super(ResidualBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channels)
self.downsample = downsample
if in_channels != out_channels or stride != 1:
self.downsample = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels)
)
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += identity
out = self.relu(out)
return out
class Mlp(nn.Module):
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.ReLU6, drop=0.):
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
self.fc1 = nn.Conv2d(in_features, hidden_features, 1, 1, 0, bias=True)
self.act = act_layer()
self.fc2 = nn.Conv2d(hidden_features, out_features, 1, 1, 0, bias=True)
self.drop = nn.Dropout(drop, inplace=True)
def forward(self, x):
x = self.fc1(x)
x = self.act(x)
x = self.drop(x)
x = self.fc2(x)
x = self.drop(x)
return x
class MultiHeadAttentionBlock(nn.Module):
def __init__(self, embed_dim, num_heads, dropout=0.1):
super(MultiHeadAttentionBlock, self).__init__()
self.attention = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout)
self.norm = nn.LayerNorm(embed_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
# (B, C, H, W) -> (HW, B, C) for MultiheadAttention compatibility
B, C, H, W = x.shape
x = x.view(B, C, H * W).permute(2, 0, 1) # (B, C, H, W) -> (HW, B, C)
# Apply multihead attention
attn_output, _ = self.attention(x, x, x)
# Apply normalization and dropout
attn_output = self.norm(attn_output)
attn_output = self.dropout(attn_output)
# Reshape back to (B, C, H, W)
attn_output = attn_output.permute(1, 2, 0).view(B, C, H, W)
return attn_output
class SpatialAttentionBlock(nn.Module):
def __init__(self):
super(SpatialAttentionBlock, self).__init__()
self.conv = nn.Conv2d(2, 1, kernel_size=7, padding=3, bias=False)
def forward(self, x):
avg_out = torch.mean(x, dim=1, keepdim=True)
max_out, _ = torch.max(x, dim=1, keepdim=True)
out = torch.cat([avg_out, max_out], dim=1)
out = torch.sigmoid(self.conv(out))
return x * out
class DecoderAttentionBlock(nn.Module):
def __init__(self, in_channels):
super(DecoderAttentionBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels, in_channels // 2, kernel_size=1)
self.conv2 = nn.Conv2d(in_channels // 2, in_channels, kernel_size=1)
self.spatial_attention = SpatialAttentionBlock()
def forward(self, x):
# 通道注意力
b, c, h, w = x.size()
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.conv1(avg_pool)
max_out = self.conv1(max_pool)
out = avg_out + max_out
out = torch.sigmoid(self.conv2(out))
# 添加空间注意力
out = x * out
out = self.spatial_attention(out)
return out
class MaskedAutoencoder(nn.Module):
def __init__(self):
super(MaskedAutoencoder, self).__init__()
self.encoder = nn.Sequential(
Conv(1, 32, kernel_size=3, stride=2),
nn.ReLU(),
SEBlock(32,32),
ConvBNReLU(32, 64, kernel_size=3, stride=2),
ResidualBlock(64,64),
SeparableBNReLU(64, 128, kernel_size=3, stride=2),
MultiHeadAttentionBlock(embed_dim=128, num_heads=4),
SEBlock(128, 128)
)
self.mlp = Mlp(in_features=128, hidden_features=256, out_features=128, act_layer=nn.ReLU6, drop=0.1)
self.decoder = nn.Sequential(
nn.ConvTranspose2d(128, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
nn.ReLU(),
DecoderAttentionBlock(128), # 在第一层后添加注意力模块
nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
nn.ReLU(),
nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
DecoderAttentionBlock(32), # 在最后一层前添加注意力模块
nn.ReLU(),
nn.ConvTranspose2d(32, 1, kernel_size=3, stride=2, padding=1, output_padding=1),
nn.Sigmoid() # Sigmoid输出
)
# class MaskedAutoencoder(nn.Module):
# def __init__(self):
# super(MaskedAutoencoder, self).__init__()
# self.encoder = nn.Sequential(
# nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1),
# nn.ReLU(),
# nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
# nn.ReLU(),
# nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
# nn.ReLU(),
# SEBlock(128, 128)
# )
# self.decoder = nn.Sequential(
# nn.ConvTranspose2d(128, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
# nn.ReLU(),
# nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1),
# nn.ReLU(),
# nn.ConvTranspose2d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=1),
# nn.Sigmoid() # 使用Sigmoid是因为输入数据是0-1之间的
# )
#
# def forward(self, x):
# encoded = self.encoder(x)
# decoded = self.decoder(encoded)
# return decoded
# 实例化模型、损失函数和优化器
model = MaskedAutoencoder()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

View File

@ -0,0 +1,190 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
class SEBlock(nn.Module):
def __init__(self, in_channels, reduced_dim):
super(SEBlock, self).__init__()
self.se = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(in_channels, reduced_dim, kernel_size=1),
nn.ReLU(),
nn.Conv2d(reduced_dim, in_channels, kernel_size=1),
nn.Sigmoid()
)
def forward(self, x):
return x * self.se(x)
class Conv(nn.Sequential):
def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, bias=False):
super(Conv, self).__init__(
nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias,
dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2)
)
class ConvBNReLU(nn.Sequential):
def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, norm_layer=nn.BatchNorm2d, bias=False):
super(ConvBNReLU, self).__init__(
nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias,
dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2),
norm_layer(out_channels),
nn.ReLU()
)
class SeparableBNReLU(nn.Sequential):
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, norm_layer=nn.BatchNorm2d):
super(SeparableBNReLU, self).__init__(
nn.Conv2d(in_channels, in_channels, kernel_size=kernel_size, stride=stride, dilation=dilation,
padding=((stride - 1) + dilation * (kernel_size - 1)) // 2, groups=in_channels, bias=False),
# 分离卷积,仅调整空间信息
norm_layer(in_channels), # 对输入通道进行归一化
nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False), # 这里进行升维操作
nn.ReLU6()
)
class ResidualBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
super(ResidualBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
self.bn1 = nn.BatchNorm2d(out_channels)
self.relu = nn.ReLU(inplace=True)
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channels)
# 如果输入和输出通道不一致,进行降采样操作
self.downsample = downsample
if in_channels != out_channels or stride != 1:
self.downsample = nn.Sequential(
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(out_channels)
)
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out += identity
out = self.relu(out)
return out
class Mlp(nn.Module):
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.ReLU6, drop=0.):
super().__init__()
out_features = out_features or in_features
hidden_features = hidden_features or in_features
self.fc1 = nn.Conv2d(in_features, hidden_features, 1, 1, 0, bias=True)
self.act = act_layer()
self.fc2 = nn.Conv2d(hidden_features, out_features, 1, 1, 0, bias=True)
self.drop = nn.Dropout(drop, inplace=True)
def forward(self, x):
x = self.fc1(x)
x = self.act(x)
x = self.drop(x)
x = self.fc2(x)
x = self.drop(x)
return x
class MultiHeadAttentionBlock(nn.Module):
def __init__(self, embed_dim, num_heads, dropout=0.1):
super(MultiHeadAttentionBlock, self).__init__()
self.attention = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout)
self.norm = nn.LayerNorm(embed_dim)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
B, C, H, W = x.shape
x = x.view(B, C, H * W).permute(2, 0, 1) # (B, C, H, W) -> (HW, B, C)
attn_output, _ = self.attention(x, x, x)
attn_output = self.norm(attn_output)
attn_output = self.dropout(attn_output)
attn_output = attn_output.permute(1, 2, 0).view(B, C, H, W)
return attn_output
class SpatialAttentionBlock(nn.Module):
def __init__(self):
super(SpatialAttentionBlock, self).__init__()
self.conv = nn.Conv2d(2, 1, kernel_size=7, padding=3, bias=False)
def forward(self, x): #(B, 64, H, W)
avg_out = torch.mean(x, dim=1, keepdim=True) #(B, 1, H, W)
max_out, _ = torch.max(x, dim=1, keepdim=True)#(B, 1, H, W)
out = torch.cat([avg_out, max_out], dim=1)#(B, 2, H, W)
out = torch.sigmoid(self.conv(out))#(B, 1, H, W)
return x * out #(B, C, H, W)
class DecoderAttentionBlock(nn.Module):
def __init__(self, in_channels):
super(DecoderAttentionBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels, in_channels // 2, kernel_size=1)
self.conv2 = nn.Conv2d(in_channels // 2, in_channels, kernel_size=1)
self.spatial_attention = SpatialAttentionBlock()
def forward(self, x):
# 通道注意力
b, c, h, w = x.size()
avg_pool = F.adaptive_avg_pool2d(x, 1)
max_pool = F.adaptive_max_pool2d(x, 1)
avg_out = self.conv1(avg_pool)
max_out = self.conv1(max_pool)
out = avg_out + max_out
out = torch.sigmoid(self.conv2(out))
# 添加空间注意力
out = x * out
out = self.spatial_attention(out)
return out
class MaskedAutoencoder(nn.Module):
def __init__(self):
super(MaskedAutoencoder, self).__init__()
self.encoder = nn.Sequential(
Conv(1, 32, kernel_size=3, stride=2),
nn.ReLU(),
SEBlock(32,32),
ConvBNReLU(32, 64, kernel_size=3, stride=2),
ResidualBlock(64,64),
SeparableBNReLU(64, 128, kernel_size=3, stride=2),
MultiHeadAttentionBlock(embed_dim=128, num_heads=4),
SEBlock(128, 128)
)
self.mlp = Mlp(in_features=128, hidden_features=256, out_features=128, act_layer=nn.ReLU6, drop=0.1)
self.decoder = nn.Sequential(
nn.ConvTranspose2d(128, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
nn.ReLU(),
DecoderAttentionBlock(32),
nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1),
nn.ReLU(),
DecoderAttentionBlock(16),
nn.ReLU(),
nn.ConvTranspose2d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=1), # 修改为 output_padding=1
nn.Sigmoid()
)
def forward(self, x):
encoded = self.encoder(x)
print("Encoded size:", encoded.size())
decoded = self.decoder(encoded)
print("Encoded size:", decoded.size())
return decoded
model = MaskedAutoencoder()
x = torch.randn(1, 1, 256, 256)
output = model(x)
print(output.shape)

View File

@ -0,0 +1,8 @@
# 默认忽略的文件
/shelf/
/workspace.xml
# 数据源本地存储已忽略文件
/dataSources/
/dataSources.local.xml
# 基于编辑器的 HTTP 客户端请求
/httpRequests/

View File

@ -0,0 +1,182 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PublishConfigData" remoteFilesAllowedToDisappearOnAutoupload="false">
<serverData>
<paths name="root@connect.westa.seetacloud.com:41442">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-3.seetacloud.com:47627">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-3.seetacloud.com:60211">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-41.seetacloud.com:10087">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-41.seetacloud.com:17758">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-41.seetacloud.com:18218">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-41.seetacloud.com:24544">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-41.seetacloud.com:26650">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-41.seetacloud.com:29425">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-41.seetacloud.com:30917">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-41.seetacloud.com:52181">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-41.seetacloud.com:56391">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-41.seetacloud.com:56529">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-41.seetacloud.com:59186">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-42.seetacloud.com:16236">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-42.seetacloud.com:18720">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-42.seetacloud.com:23687">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-42.seetacloud.com:26700">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-42.seetacloud.com:34775">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-42.seetacloud.com:35796">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-42.seetacloud.com:39635">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-42.seetacloud.com:46129">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-45.autodl.pro:45028">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-45.autodl.pro:48066">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
<paths name="root@region-45.autodl.pro:54865">
<serverdata>
<mappings>
<mapping local="$PROJECT_DIR$" web="/" />
</mappings>
</serverdata>
</paths>
</serverData>
</component>
</project>

View File

@ -0,0 +1,46 @@
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
<option name="ignoredPackages">
<value>
<list size="33">
<item index="0" class="java.lang.String" itemvalue="scikit-image" />
<item index="1" class="java.lang.String" itemvalue="protobuf" />
<item index="2" class="java.lang.String" itemvalue="torchmetrics" />
<item index="3" class="java.lang.String" itemvalue="scikit-learn" />
<item index="4" class="java.lang.String" itemvalue="PyYAML" />
<item index="5" class="java.lang.String" itemvalue="dgl" />
<item index="6" class="java.lang.String" itemvalue="opencv-python-headless" />
<item index="7" class="java.lang.String" itemvalue="imagecodecs" />
<item index="8" class="java.lang.String" itemvalue="histocartography" />
<item index="9" class="java.lang.String" itemvalue="wandb" />
<item index="10" class="java.lang.String" itemvalue="mmcv-full" />
<item index="11" class="java.lang.String" itemvalue="tifffile" />
<item index="12" class="java.lang.String" itemvalue="timm" />
<item index="13" class="java.lang.String" itemvalue="opencv-python" />
<item index="14" class="java.lang.String" itemvalue="h5py" />
<item index="15" class="java.lang.String" itemvalue="loguru" />
<item index="16" class="java.lang.String" itemvalue="addict" />
<item index="17" class="java.lang.String" itemvalue="omegaconf" />
<item index="18" class="java.lang.String" itemvalue="albumentations" />
<item index="19" class="java.lang.String" itemvalue="tqdm" />
<item index="20" class="java.lang.String" itemvalue="pytorch-lightning" />
<item index="21" class="java.lang.String" itemvalue="tensorboard" />
<item index="22" class="java.lang.String" itemvalue="pytorch-toolbelt" />
<item index="23" class="java.lang.String" itemvalue="openslide-python" />
<item index="24" class="java.lang.String" itemvalue="einops" />
<item index="25" class="java.lang.String" itemvalue="Pillow" />
<item index="26" class="java.lang.String" itemvalue="pandas" />
<item index="27" class="java.lang.String" itemvalue="scipy" />
<item index="28" class="java.lang.String" itemvalue="matplotlib" />
<item index="29" class="java.lang.String" itemvalue="segmentation-models-pytorch" />
<item index="30" class="java.lang.String" itemvalue="torch" />
<item index="31" class="java.lang.String" itemvalue="numpy" />
<item index="32" class="java.lang.String" itemvalue="torchvision" />
</list>
</value>
</option>
</inspection_tool>
</profile>
</component>

View File

@ -0,0 +1,6 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

View File

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (yolov8) (20)" project-jdk-type="Python SDK" />
</project>

View File

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/模块缝合库.iml" filepath="$PROJECT_DIR$/.idea/模块缝合库.iml" />
</modules>
</component>
</project>

View File

@ -0,0 +1,15 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.9 (yolov8) (20)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyDocumentationSettings">
<option name="format" value="PLAIN" />
<option name="myDocStringFormat" value="Plain" />
</component>
<component name="TestRunnerService">
<option name="PROJECT_TEST_RUNNER" value="pytest" />
</component>
</module>

View File

@ -0,0 +1,80 @@
# https:// tinyurl.com/ 5ft8v46w
"""
以下是这个模块的主要特点和作用
线性变换模块中包括两个线性层 to_query to_key分别用于将输入的特征进行线性变换将特征维度从 in_dims 映射到 token_dim * num_heads这两个线性层的输出用于计算查询Query和键Key
可学习的权重模块中包括一个可学习的权重向量 w_g用于计算加性注意力的权重这个权重向量的形状是 (token_dim * num_heads, 1)
归一化通过 torch.nn.functional.normalize 函数对查询Query和键Key进行 L2 归一化以确保它们具有单位长度
权重计算计算查询Query与权重向量 w_g 的点积并乘以缩放因子 scale_factor通常是 token_dim 的倒数的平方根以得到加性注意力的权重 A
归一化对权重 A 进行归一化以确保它们在序列长度维度上的和为 1
加权求和通过将注意力权重 A 与查询Query相乘然后在序列长度维度上求和得到全局上下文向量 G
扩展 G通过 einops.repeat 操作将全局上下文向量 G 扩展为与键Key相同形状的张量
注意力计算通过将扩展后的 G 与键Key相乘然后加上原始查询Query得到注意力加权的输出
投影层通过线性层 Proj 对注意力加权的输出进行投影将特征维度从 token_dim * num_heads 投影回 token_dim * num_heads
最终投影通过线性层 final 对投影后的输出进行最终的线性变换将特征维度从 token_dim * num_heads 投影回 token_dim并得到最终的输出
总的来说这个模块实现了一种高效的加性注意力机制用于学习输入序列的全局上下文信息并将加权后的全局上下文信息与原始特征进行融合生成最终的输出特征这种模块通常用于自注意力机制的一部分可以用于处理序列数据如自然语言处理中的 Transformer 模型
"""
import torch
import torch.nn as nn
import einops
class EfficientAdditiveAttnetion(nn.Module):
"""
Efficient Additive Attention module for SwiftFormer.
Input: tensor in shape [B, N, D]
Output: tensor in shape [B, N, D]
"""
def __init__(self, in_dims=512, token_dim=256, num_heads=2):
super().__init__()
self.to_query = nn.Linear(in_dims, token_dim * num_heads)
self.to_key = nn.Linear(in_dims, token_dim * num_heads)
self.w_g = nn.Parameter(torch.randn(token_dim * num_heads, 1))
self.scale_factor = token_dim ** -0.5
self.Proj = nn.Linear(token_dim * num_heads, token_dim * num_heads)
self.final = nn.Linear(token_dim * num_heads, token_dim)
def forward(self, x):
query = self.to_query(x)
key = self.to_key(x)
query = torch.nn.functional.normalize(query, dim=-1) # BxNxD
key = torch.nn.functional.normalize(key, dim=-1) # BxNxD
query_weight = query @ self.w_g # BxNx1 (BxNxD @ Dx1)
A = query_weight * self.scale_factor # BxNx1
A = torch.nn.functional.normalize(A, dim=1) # BxNx1
G = torch.sum(A * query, dim=1) # BxD
G = einops.repeat(
G, "b d -> b repeat d", repeat=key.shape[1]
) # BxNxD
out = self.Proj(G * key) + query # BxNxD
out = self.final(out) # BxNxD
return out
# 输入 B N C , 输出 B N C
if __name__ == '__main__':
block = EfficientAdditiveAttnetion(64, 32).cuda()
input = torch.rand(3, 64 * 64, 64).cuda()
output = block(input)
print(input.size(), output.size())

View File

@ -0,0 +1,57 @@
# https://github.com/zcablii/Large-Selective-Kernel-Network
"""
以下是该模块的主要组件和操作
conv0这是一个深度可分离卷积层使用 5x5 的卷积核进行卷积操作groups=dim 意味着将输入的每个通道分为一组进行卷积操作这一步旨在捕获输入中的空间特征
conv_spatial这是另一个深度可分离卷积层使用 7x7 的卷积核进行卷积操作stride=1 表示步幅为 1padding=9 用于零填充操作groups=dim 表示将输入的每个通道分为一组进行卷积操作并且通过 dilation=3 进行扩张卷积这一步旨在捕获输入中的更大范围的空间特征
conv1 conv2这是两个 1x1 的卷积层用于降低通道数将输入的通道数减少到 dim // 2这两个卷积层分别应用于 conv0 conv_spatial 的输出
conv_squeeze这是一个 7x7 的卷积层用于进行通道维度的压缩将输入通道的数量从 2 降低到 2通过 sigmoid 函数将输出的值缩放到 (0, 1) 范围内
conv这是一个 1x1 的卷积层用于将通道数从 dim // 2 恢复到 dim最终的输出通道数与输入的通道数相同
在前向传播过程中该模块通过一系列卷积操作将输入的特征图进行加权其中使用了 sigmoid 权重来调整不同部分的注意力最终输出的特征图是输入特征图乘以注意力加权的结果
这个 LSKblock 模块的目的是引入空间和通道注意力以更好地捕获输入特征图中的重要信息
"""
import torch
import torch.nn as nn
class LSKblock(nn.Module):
def __init__(self, dim):
super().__init__()
self.conv0 = nn.Conv2d(dim, dim, 5, padding=2, groups=dim)
self.conv_spatial = nn.Conv2d(dim, dim, 7, stride=1, padding=9, groups=dim, dilation=3)
self.conv1 = nn.Conv2d(dim, dim // 2, 1)
self.conv2 = nn.Conv2d(dim, dim // 2, 1)
self.conv_squeeze = nn.Conv2d(2, 2, 7, padding=3)
self.conv = nn.Conv2d(dim // 2, dim, 1)
def forward(self, x):
attn1 = self.conv0(x)
attn2 = self.conv_spatial(attn1)
attn1 = self.conv1(attn1)
attn2 = self.conv2(attn2)
attn = torch.cat([attn1, attn2], dim=1)
avg_attn = torch.mean(attn, dim=1, keepdim=True)
max_attn, _ = torch.max(attn, dim=1, keepdim=True)
agg = torch.cat([avg_attn, max_attn], dim=1)
sig = self.conv_squeeze(agg).sigmoid()
attn = attn1 * sig[:, 0, :, :].unsqueeze(1) + attn2 * sig[:, 1, :, :].unsqueeze(1)
attn = self.conv(attn)
return x * attn
# 输入 N C H W, 输出 N C H W
if __name__ == '__main__':
block = LSKblock(64).cuda()
input = torch.rand(1, 64, 64, 64).cuda()
output = block(input)
print(input.size(), output.size())

View File

@ -0,0 +1,110 @@
# https://www.haoranyou.com/castling-vit/
"""
以下是该模块的主要组件和操作
qkv这是一个线性层将输入特征 x 映射到三个不同的线性变换分别对应查询 (query) (key)和值 (value)这三个变换将输入特征的通道划分成多个头 (heads)
attn_drop proj_drop这是用于进行注意力矩阵和输出特征的丢弃操作的 Dropout
kq_matmulkqv_matmul qk_matmul这些是自定义的矩阵乘法操作用于计算注意力矩阵中的各个部分kq_matmul 用于计算键和查询的点积kqv_matmul 用于计算键和值的点积qk_matmul 用于计算查询和键的点积
dconv这是一个深度卷积层用于对值进行深度卷积操作
在前向传播过程中该模块首先将输入特征 x 映射为查询键和值然后通过上述矩阵乘法操作计算注意力矩阵的各个部分接下来对查询和键进行标准化处理并计算值的深度卷积最后根据注意力矩阵和深度卷积的结果计算最终的输出特征
此模块实现了线性角注意力机制可用于处理序列或图像数据中的信息交互和特征提取任务该模块的参数配置如 num_headsqkv_biasattn_drop 等可以根据具体任务进行调整
"""
import torch
import torch.nn as nn
import math
class MatMul(nn.Module):
def __init__(self):
super(MatMul, self).__init__()
def forward(self, x, y):
return torch.matmul(x, y)
class LinAngularAttention(nn.Module):
def __init__(
self,
in_channels,
num_heads=8,
qkv_bias=False,
attn_drop=0.0,
proj_drop=0.0,
res_kernel_size=9,
sparse_reg=False,
):
super().__init__()
assert in_channels % num_heads == 0, "dim should be divisible by num_heads"
self.num_heads = num_heads
head_dim = in_channels // num_heads
self.scale = head_dim**-0.5
self.sparse_reg = sparse_reg
self.qkv = nn.Linear(in_channels, in_channels * 3, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(in_channels, in_channels)
self.proj_drop = nn.Dropout(proj_drop)
self.kq_matmul = MatMul()
self.kqv_matmul = MatMul()
if self.sparse_reg:
self.qk_matmul = MatMul()
self.sv_matmul = MatMul()
self.dconv = nn.Conv2d(
in_channels=self.num_heads,
out_channels=self.num_heads,
kernel_size=(res_kernel_size, 1),
padding=(res_kernel_size // 2, 0),
bias=False,
groups=self.num_heads,
)
def forward(self, x):
N, L, C = x.shape
qkv = (
self.qkv(x)
.reshape(N, L, 3, self.num_heads, C // self.num_heads)
.permute(2, 0, 3, 1, 4)
)
q, k, v = qkv.unbind(0) # make torchscript happy (cannot use tensor as tuple)
if self.sparse_reg:
attn = self.qk_matmul(q * self.scale, k.transpose(-2, -1))
attn = attn.softmax(dim=-1)
mask = attn > 0.02 # note that the threshold could be different; adapt to your codebases.
sparse = mask * attn
q = q / q.norm(dim=-1, keepdim=True)
k = k / k.norm(dim=-1, keepdim=True)
dconv_v = self.dconv(v)
attn = self.kq_matmul(k.transpose(-2, -1), v)
if self.sparse_reg:
x = (
self.sv_matmul(sparse, v)
+ 0.5 * v
+ 1.0 / math.pi * self.kqv_matmul(q, attn)
)
else:
x = 0.5 * v + 1.0 / math.pi * self.kqv_matmul(q, attn)
x = x / x.norm(dim=-1, keepdim=True)
x += dconv_v
x = x.transpose(1, 2).reshape(N, L, C)
x = self.proj(x)
x = self.proj_drop(x)
return x
if __name__ == '__main__':
block = LinAngularAttention(in_channels=128)
input = torch.rand(32,784,128)
output = block(input)
print(input.size(), output.size())

View File

@ -0,0 +1,119 @@
# https://github.com/lancopku/MUSE
"""
以下是该模块的主要组件和操作
多头自注意力通过输入的querieskeys和values首先使用线性变换(fc_q, fc_k和fc_v)将它们映射到不同的子空间然后计算多头自注意力得分并使用softmax函数进行归一化最后使用这些得分加权values以获得最终的输出
动态参数的卷积融合在多头自注意力的输出上应用卷积操作这些卷积操作具有不同的kernel_size13和5并使用动态参数(dy_paras)来决定它们的权重这样可以通过调整这些参数来动态控制不同kernel_size的卷积操作的贡献
初始化权重通过init_weights方法来初始化模块中的权重
前向传播根据输入的querieskeysvalues以及可选的注意力掩码(attention_mask)和注意力权重(attention_weights)计算多头自注意力的输出并与动态参数的卷积融合的结果相加以获得最终输出
"""
import numpy as np
import torch
from torch import nn
from torch.nn import init
class Depth_Pointwise_Conv1d(nn.Module):
def __init__(self, in_ch, out_ch, k):
super().__init__()
if (k == 1):
self.depth_conv = nn.Identity()
else:
self.depth_conv = nn.Conv1d(
in_channels=in_ch,
out_channels=in_ch,
kernel_size=k,
groups=in_ch,
padding=k // 2
)
self.pointwise_conv = nn.Conv1d(
in_channels=in_ch,
out_channels=out_ch,
kernel_size=1,
groups=1
)
def forward(self, x):
out = self.pointwise_conv(self.depth_conv(x))
return out
class MUSEAttention(nn.Module):
def __init__(self, d_model, d_k, d_v, h, dropout=.1):
super(MUSEAttention, self).__init__()
self.fc_q = nn.Linear(d_model, h * d_k)
self.fc_k = nn.Linear(d_model, h * d_k)
self.fc_v = nn.Linear(d_model, h * d_v)
self.fc_o = nn.Linear(h * d_v, d_model)
self.dropout = nn.Dropout(dropout)
self.conv1 = Depth_Pointwise_Conv1d(h * d_v, d_model, 1)
self.conv3 = Depth_Pointwise_Conv1d(h * d_v, d_model, 3)
self.conv5 = Depth_Pointwise_Conv1d(h * d_v, d_model, 5)
self.dy_paras = nn.Parameter(torch.ones(3))
self.softmax = nn.Softmax(-1)
self.d_model = d_model
self.d_k = d_k
self.d_v = d_v
self.h = h
self.init_weights()
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, queries, keys, values, attention_mask=None, attention_weights=None):
# Self Attention
b_s, nq = queries.shape[:2]
nk = keys.shape[1]
q = self.fc_q(queries).view(b_s, nq, self.h, self.d_k).permute(0, 2, 1, 3) # (b_s, h, nq, d_k)
k = self.fc_k(keys).view(b_s, nk, self.h, self.d_k).permute(0, 2, 3, 1) # (b_s, h, d_k, nk)
v = self.fc_v(values).view(b_s, nk, self.h, self.d_v).permute(0, 2, 1, 3) # (b_s, h, nk, d_v)
att = torch.matmul(q, k) / np.sqrt(self.d_k) # (b_s, h, nq, nk)
if attention_weights is not None:
att = att * attention_weights
if attention_mask is not None:
att = att.masked_fill(attention_mask, -np.inf)
att = torch.softmax(att, -1)
att = self.dropout(att)
out = torch.matmul(att, v).permute(0, 2, 1, 3).contiguous().view(b_s, nq, self.h * self.d_v) # (b_s, nq, h*d_v)
out = self.fc_o(out) # (b_s, nq, d_model)
v2 = v.permute(0, 1, 3, 2).contiguous().view(b_s, -1, nk) # bs,dim,n
self.dy_paras = nn.Parameter(self.softmax(self.dy_paras))
out2 = self.dy_paras[0] * self.conv1(v2) + self.dy_paras[1] * self.conv3(v2) + self.dy_paras[2] * self.conv5(v2)
out2 = out2.permute(0, 2, 1) # bs.n.dim
out = out + out2
return out
if __name__ == '__main__':
block = MUSEAttention(d_model=256, d_k=256, d_v=256, h=256).cuda()
# input = torch.rand(64, 64, 512).cuda()
input = torch.rand(1, 128, 256, 256).cuda()
output = block(input, input, input)
print(input.size(), output.size())

View File

@ -0,0 +1,75 @@
# https://github.com/apple/ml-cvnets
"""
以下是该模块的主要组件和操作
自注意力计算使用线性变换(fc_i, fc_k, fc_v和fc_o)将输入映射到不同的子空间并计算权重(weight_i)来为每个查询分配注意力权重注意力权重通过对fc_i的输出进行softmax操作得到然后用于加权fc_k(input)的输出得到context_score接下来通过对context_score进行求和以获得一个上下文向量(context_vector)该向量用于加权fc_v(input)的输出最后对v进行线性变换(fc_o)以获得最终的输出
初始化权重通过init_weights方法来初始化模块中的权重
前向传播根据输入执行自注意力计算返回计算得到的注意力输出
"""
import numpy as np
import torch
from torch import nn
from torch.nn import init
class MobileViTv2Attention(nn.Module):
'''
Scaled dot-product attention
'''
def __init__(self, d_model):
'''
:param d_model: Output dimensionality of the model
:param d_k: Dimensionality of queries and keys
:param d_v: Dimensionality of values
:param h: Number of heads
'''
super(MobileViTv2Attention, self).__init__()
self.fc_i = nn.Linear(d_model, 1)
self.fc_k = nn.Linear(d_model, d_model)
self.fc_v = nn.Linear(d_model, d_model)
self.fc_o = nn.Linear(d_model, d_model)
self.d_model = d_model
self.init_weights()
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, input):
'''
Computes
:param queries: Queries (b_s, nq, d_model)
:return:
'''
i = self.fc_i(input) # (bs,nq,1)
weight_i = torch.softmax(i, dim=1) # bs,nq,1
context_score = weight_i * self.fc_k(input) # bs,nq,d_model
context_vector = torch.sum(context_score, dim=1, keepdim=True) # bs,1,d_model
v = self.fc_v(input) * context_vector # bs,nq,d_model
out = self.fc_o(v) # bs,nq,d_model
return out
if __name__ == '__main__':
block = MobileViTv2Attention(d_model=256)
# input = torch.rand(64, 64, 512).cuda()
input = torch.rand(1, 128, 256, 256)
output = block(input)
print(input.size(), output.size())

View File

@ -0,0 +1,89 @@
# https://github.com/sail-sg/volo
"""
以下是该模块的主要组件和操作
v_pj通过线性变换将输入特征映射到新的特征空间以产生 v
attn通过线性变换将输入图像的局部区域映射到注意力得分的空间这个得分表示局部区域的重要性
attn_drop一个用于应用注意力得分的丢弃层以防止过度拟合
proj proj_drop用于最终输出的线性变换和丢弃层
unflod一个用于手动卷积的操作 v 特征张量按指定的 kernel_sizepadding stride 进行展开
pool用于在输入图像上执行平均池化以减小图像尺寸
在前向传播中模块首先将输入图像的局部区域映射到 v 特征空间然后计算注意力得分注意力得分被应用于 v 特征以获得加权特征表示最后通过线性变换和丢弃层来进一步处理特征表示以产生最终的输出
这个模块的主要用途是捕获输入图像的局部信息并根据局部区域的重要性来加权特征表示这对于各种计算机视觉任务如图像分类和分割可能都会有所帮助
"""
import numpy as np
import torch
from torch import nn
from torch.nn import init
import math
from torch.nn import functional as F
class OutlookAttention(nn.Module):
def __init__(self, dim, num_heads=1, kernel_size=3, padding=1, stride=1, qkv_bias=False,
attn_drop=0.1):
super().__init__()
self.dim = dim
self.num_heads = num_heads
self.head_dim = dim // num_heads
self.kernel_size = kernel_size
self.padding = padding
self.stride = stride
self.scale = self.head_dim ** (-0.5)
self.v_pj = nn.Linear(dim, dim, bias=qkv_bias)
self.attn = nn.Linear(dim, kernel_size ** 4 * num_heads)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(attn_drop)
self.unflod = nn.Unfold(kernel_size, padding, stride) # 手动卷积
self.pool = nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True)
def forward(self, x):
B, H, W, C = x.shape
# 映射到新的特征v
v = self.v_pj(x).permute(0, 3, 1, 2) # B,C,H,W
h, w = math.ceil(H / self.stride), math.ceil(W / self.stride)
v = self.unflod(v).reshape(B, self.num_heads, self.head_dim, self.kernel_size * self.kernel_size,
h * w).permute(0, 1, 4, 3, 2) # B,num_head,H*W,kxk,head_dim
# 生成Attention Map
attn = self.pool(x.permute(0, 3, 1, 2)).permute(0, 2, 3, 1) # B,H,W,C
attn = self.attn(attn).reshape(B, h * w, self.num_heads, self.kernel_size * self.kernel_size \
, self.kernel_size * self.kernel_size).permute(0, 2, 1, 3,
4) # Bnum_headH*W,kxk,kxk
attn = self.scale * attn
attn = attn.softmax(-1)
attn = self.attn_drop(attn)
# 获取weighted特征
out = (attn @ v).permute(0, 1, 4, 3, 2).reshape(B, C * self.kernel_size * self.kernel_size,
h * w) # B,dimxkxk,H*W
out = F.fold(out, output_size=(H, W), kernel_size=self.kernel_size,
padding=self.padding, stride=self.stride) # B,C,H,W
out = self.proj(out.permute(0, 2, 3, 1)) # B,H,W,C
out = self.proj_drop(out)
return out
# 输入 B, H, W, C, 输出 B, H, W, C
if __name__ == '__main__':
block = OutlookAttention(dim=256).cuda()
# input = torch.rand(1, 64, 64, 512).cuda()
input = torch.rand(1, 128, 256, 256).cuda()
output = block(input)
print(input.size(), output.size())

View File

@ -0,0 +1,73 @@
# https://github.com/imankgoyal/NonDeepNetworks
"""
模块包括以下组件
sseSqueeze-and-Excitation模块
通过自适应平均池化将输入张量池化到大小为 1x1
然后使用一个具有相同通道数的卷积层产生一组注意力权重这些权重通过 Sigmoid 激活函数进行缩放
这些注意力权重用于对输入特征进行加权以突出重要的特征
conv1x1 conv3x3 模块
conv1x1 是一个1x1卷积层用于捕捉输入的全局信息
conv3x3 是一个3x3卷积层用于捕捉局部信息
两者都后跟批归一化层以稳定训练
silu 激活函数
Silu或Swish激活函数是一种非线性激活函数它将输入映射到一个非线性范围内
在前向传播中输入张量 x 通过这些组件最终输出特征张量 y这个模块旨在提高神经网络的特征表示能力通过不同尺度的特征融合和注意力加权来捕获全局和局部信息
"""
import numpy as np
import torch
from torch import nn
from torch.nn import init
from einops import rearrange
def to_3d(x):
return rearrange(x, 'b c h w -> b (h w) c')
def to_4d(x,h,w):
return rearrange(x, 'b (h w) c -> b c h w',h=h,w=w)
class ParNetAttention(nn.Module):
def __init__(self, channel=512):
super().__init__()
self.sse = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
nn.Conv2d(channel, channel, kernel_size=1),
nn.Sigmoid()
)
self.conv1x1 = nn.Sequential(
nn.Conv2d(channel, channel, kernel_size=1),
nn.BatchNorm2d(channel)
)
self.conv3x3 = nn.Sequential(
nn.Conv2d(channel, channel, kernel_size=3, padding=1),
nn.BatchNorm2d(channel)
)
self.silu = nn.SiLU()
def forward(self, x):
b, c, _, _ = x.size()
x1 = self.conv1x1(x)
x2 = self.conv3x3(x)
x3 = self.sse(x) * x
y = self.silu(x1 + x2 + x3)
return y
# 输入 N C H W, 输出 N C H W
if __name__ == '__main__':
# input = torch.randn(3, 512, 7, 7).cuda()
input = torch.randn(1, 128, 256, 256).cuda()
pna = ParNetAttention(channel=128).cuda()
output = pna(input)
print(output.shape)

View File

@ -0,0 +1,70 @@
# https://github.com/JierunChen/FasterNet
"""
这个代码实现了一个名为Partial_conv3的自定义卷积模块它根据参数的不同执行不同的操作这个模块的主要特点如下
部分卷积操作这个模块使用了一个nn.Conv2d的部分卷积操作其中dim_conv3表示卷积操作的输出通道数通常是输入通道数dim的一部分这部分卷积操作在输入图像的特定通道上执行
前向传播策略这个模块可以采用两种不同的前向传播策略具体取决于forward参数的设置
'slicing'在前向传播时仅对输入张量的部分通道进行部分卷积操作这对应于仅在推理时使用部分卷积
'split_cat'在前向传播时将输入张量分为两部分其中一部分进行部分卷积操作然后将两部分重新连接这对应于在训练和推理过程中都使用部分卷积
部分卷积操作的应用部分卷积操作被用于输入张量的部分通道上而保持其他通道不变这有助于模型有选择性地应用卷积操作到特定通道上从而可以灵活地控制特征的提取和传播
残差连接在部分卷积操作之后模块保留了未经处理的部分通道然后将两部分连接起来以保持输入和输出的通道数一致以便与其他模块连接
总的来说Partial_conv3模块提供了一种自定义卷积策略可以根据应用的需要选择性地应用卷积操作到输入图像的特定通道上这种模块可以用于特征选择通道交互等任务增加了神经网络的灵活性
"""
from torch import nn
import torch
from einops.einops import rearrange
def to_3d(x):
return rearrange(x, 'b c h w -> b (h w) c')
def to_4d(x, h, w):
return rearrange(x, 'b (h w) c -> b c h w', h=h, w=w)
class Partial_conv3(nn.Module):
def __init__(self, dim, n_div, forward):
super().__init__()
self.dim_conv3 = dim // n_div
self.dim_untouched = dim - self.dim_conv3
self.partial_conv3 = nn.Conv2d(self.dim_conv3, self.dim_conv3, 3, 1, 1, bias=False)
if forward == 'slicing':
self.forward = self.forward_slicing
elif forward == 'split_cat':
self.forward = self.forward_split_cat
else:
raise NotImplementedError
def forward_slicing(self, x):
# only for inference
x = x.clone() # !!! Keep the original input intact for the residual connection later
x[:, :self.dim_conv3, :, :] = self.partial_conv3(x[:, :self.dim_conv3, :, :])
return x
def forward_split_cat(self, x):
# for training/inference
x1, x2 = torch.split(x, [self.dim_conv3, self.dim_untouched], dim=1)
x1 = self.partial_conv3(x1)
x = torch.cat((x1, x2), 1)
return x
if __name__ == '__main__':
block = Partial_conv3(128, 2, 'split_cat')
input = torch.rand(32, 784, 128)
input = to_4d(input, 28, 28)
output = block(input)
output = to_3d(input)
print(input.size())
print(output.size())

View File

@ -0,0 +1,95 @@
# https://paperswithcode.com/paper/s-2-mlpv2-improved-spatial-shift-mlp
"""
SplitAttention
这是一个分离式注意力Split Attention模块用于增强神经网络的特征表示
参数包括 channel通道数 k分离的注意力头数
在前向传播中输入张量 x_all 被重塑为形状 (b, k, h*w, c)其中 b 是批次大小k 是头数h w 是高度和宽度c 是通道数
然后计算注意力的权重通过 MLP 网络计算 hat_a然后应用 softmax 函数得到 bar_a
最后 bar_a 与输入张量 x_all 相乘并对所有头的结果进行求和以获得最终的输出
S2Attention
这是一个基于Split Attention的注意力模块用于处理输入张量
参数包括 channels通道数
在前向传播中首先对输入张量进行线性变换然后将结果分为三部分x1x2 x3
接下来这三部分被传递给 SplitAttention 模块以计算注意力权重并增强特征表示
最后通过另一个线性变换将注意力增强后的特征表示进行合并并返回
这些模块可以用于构建神经网络中的不同层以提高特征表示的性能和泛化能力
"""
import numpy as np
import torch
from torch import nn
from torch.nn import init
def spatial_shift1(x):
b, w, h, c = x.size()
x[:, 1:, :, :c // 4] = x[:, :w - 1, :, :c // 4]
x[:, :w - 1, :, c // 4:c // 2] = x[:, 1:, :, c // 4:c // 2]
x[:, :, 1:, c // 2:c * 3 // 4] = x[:, :, :h - 1, c // 2:c * 3 // 4]
x[:, :, :h - 1, 3 * c // 4:] = x[:, :, 1:, 3 * c // 4:]
return x
def spatial_shift2(x):
b, w, h, c = x.size()
x[:, :, 1:, :c // 4] = x[:, :, :h - 1, :c // 4]
x[:, :, :h - 1, c // 4:c // 2] = x[:, :, 1:, c // 4:c // 2]
x[:, 1:, :, c // 2:c * 3 // 4] = x[:, :w - 1, :, c // 2:c * 3 // 4]
x[:, :w - 1, :, 3 * c // 4:] = x[:, 1:, :, 3 * c // 4:]
return x
class SplitAttention(nn.Module):
def __init__(self, channel=32, k=3):
super().__init__()
self.channel = channel
self.k = k
self.mlp1 = nn.Linear(channel, channel, bias=False)
self.gelu = nn.GELU()
self.mlp2 = nn.Linear(channel, channel * k, bias=False)
self.softmax = nn.Softmax(1)
def forward(self, x_all):
b, k, h, w, c = x_all.shape
x_all = x_all.reshape(b, k, -1, c) # bs,k,n,c
a = torch.sum(torch.sum(x_all, 1), 1) # bs,c
hat_a = self.mlp2(self.gelu(self.mlp1(a))) # bs,kc
hat_a = hat_a.reshape(b, self.k, c) # bs,k,c
bar_a = self.softmax(hat_a) # bs,k,c
attention = bar_a.unsqueeze(-2) # #bs,k,1,c
out = attention * x_all # #bs,k,n,c
out = torch.sum(out, 1).reshape(b, h, w, c)
return out
class S2Attention(nn.Module):
def __init__(self, channels=32):
super().__init__()
self.mlp1 = nn.Linear(channels, channels * 3)
self.mlp2 = nn.Linear(channels, channels)
self.split_attention = SplitAttention()
def forward(self, x):
b, c, w, h = x.size()
x = x.permute(0, 2, 3, 1)
x = self.mlp1(x)
x1 = spatial_shift1(x[:, :, :, :c])
x2 = spatial_shift2(x[:, :, :, c:c * 2])
x3 = x[:, :, :, c * 2:]
x_all = torch.stack([x1, x2, x3], 1)
a = self.split_attention(x_all)
x = self.mlp2(a)
x = x.permute(0, 3, 1, 2)
return x
# 输入 N C H W, 输出 N C H W
if __name__ == '__main__':
input = torch.randn(64, 32, 7, 7)
s2att = S2Attention(channels=32)
output = s2att(input)
print(output.shape)

View File

@ -0,0 +1,85 @@
# https://github.com/implus/SKNet
"""
该模块的主要功能是对输入张量进行一系列卷积操作然后计算不同卷积核的注意力权重并将它们应用于输入的不同部分以生成最终的输出以下是该模块的主要组件和步骤
初始化在初始化中模块接受以下参数
channel输入通道数
kernels用于卷积操作的核大小列表
reduction通道减少比例用于降低通道数
group卷积操作的分组数
L指定的参数用于确定最大通道数的值
在初始化过程中模块创建了一系列卷积层线性层和 Softmax 操作以用于后续的计算
前向传播在前向传播过程中模块执行以下步骤
针对每个核大小使用相应的卷积操作对输入进行卷积并将卷积结果存储在列表 conv_outs
将所有卷积结果叠加起来以生成 U它代表了输入的融合表示
U 进行平均池化然后通过线性层将通道数减少到 d
使用线性层计算不同卷积核的注意力权重并将它们存储在列表 weights
使用 Softmax 函数将注意力权重归一化
将注意力权重应用于不同卷积核的特征表示并对它们进行加权叠加生成最终的输出张量 V
最终模块返回张量 V 作为输出
这个模块的核心思想是在不同尺度的卷积核上计算注意力权重以捕获输入的多尺度信息然后将不同尺度的特征进行加权叠加以生成最终的输出这可以增强模型对不同尺度物体的感知能力
"""
import torch
from torch import nn
from collections import OrderedDict
class SKAttention(nn.Module):
def __init__(self, channel=512, kernels=[1, 3, 5, 7], reduction=16, group=1, L=32):
super().__init__()
self.d = max(L, channel // reduction)
self.convs = nn.ModuleList([])
for k in kernels:
self.convs.append(
nn.Sequential(OrderedDict([
('conv', nn.Conv2d(channel, channel, kernel_size=k, padding=k // 2, groups=group)),
('bn', nn.BatchNorm2d(channel)),
('relu', nn.ReLU())
]))
)
self.fc = nn.Linear(channel, self.d)
self.fcs = nn.ModuleList([])
for i in range(len(kernels)):
self.fcs.append(nn.Linear(self.d, channel))
self.softmax = nn.Softmax(dim=0)
def forward(self, x):
bs, c, _, _ = x.size()
conv_outs = []
### split
for conv in self.convs:
conv_outs.append(conv(x))
feats = torch.stack(conv_outs, 0) # k,bs,channel,h,w
### fuse
U = sum(conv_outs) # bs,c,h,w
### reduction channel
S = U.mean(-1).mean(-1) # bs,c
Z = self.fc(S) # bs,d
### calculate attention weight
weights = []
for fc in self.fcs:
weight = fc(Z)
weights.append(weight.view(bs, c, 1, 1)) # bs,channel
attention_weughts = torch.stack(weights, 0) # k,bs,channel,1,1
attention_weughts = self.softmax(attention_weughts) # k,bs,channel,1,1
### fuse
V = (attention_weughts * feats).sum(0)
return V
# 输入 N C H W, 输出 N C H W
if __name__ == '__main__':
input = torch.randn(50, 512, 7, 7)
se = SKAttention(channel=512, reduction=8)
output = se(input)
print(output.shape)

Binary file not shown.

View File

@ -0,0 +1,189 @@
# https://github.com/hhb072/SViT
""""
以下是该模块的主要组件和功能
Unfold 操作Unfold 类定义了一个卷积操作用于将输入图像进行解展开unfolding具体来说它将输入图像划分成不重叠的局部块并将这些块展平成向量这有助于在局部区域之间建立联系
Fold 操作Fold 类定义了一个卷积转置操作用于将展开的局部块还原为原始的图像形状这有助于将局部特征重新组合成图像
Attention 操作Attention 类定义了一个加性注意力机制用于计算局部块之间的关联权重通过对展开的局部块执行注意力操作可以确定不同块之间的相关性从而更好地捕获局部特征
Stoken 操作StokenAttention 类将图像划分为多个小块并在这些小块之间执行加性注意力操作它还包括对块之间的关系进行迭代更新的逻辑以更好地捕获图像中的局部特征
直接传递操作direct_forward 方法用于直接传递输入图像而不进行块划分和注意力操作这对于某些情况下不需要局部特征建模的情况很有用
Stoken 操作和直接传递操作的选择根据 self.stoken_size 参数的设置模块可以选择执行 Stoken 操作或直接传递操作如果 self.stoken_size 的值大于 1则执行 Stoken 操作否则执行直接传递操作
总的来说这个模块提供了一种有效的方式来处理图像数据并在图像的不同局部区域之间建立关联以捕获局部特征这对于许多计算机视觉任务如目标检测和图像分割都具有重要意义
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
class Unfold(nn.Module):
def __init__(self, kernel_size=3):
super().__init__()
self.kernel_size = kernel_size
weights = torch.eye(kernel_size ** 2)
weights = weights.reshape(kernel_size ** 2, 1, kernel_size, kernel_size)
self.weights = nn.Parameter(weights, requires_grad=False)
def forward(self, x):
b, c, h, w = x.shape
x = F.conv2d(x.reshape(b * c, 1, h, w), self.weights, stride=1, padding=self.kernel_size // 2)
return x.reshape(b, c * 9, h * w)
class Fold(nn.Module):
def __init__(self, kernel_size=3):
super().__init__()
self.kernel_size = kernel_size
weights = torch.eye(kernel_size ** 2)
weights = weights.reshape(kernel_size ** 2, 1, kernel_size, kernel_size)
self.weights = nn.Parameter(weights, requires_grad=False)
def forward(self, x):
b, _, h, w = x.shape
x = F.conv_transpose2d(x, self.weights, stride=1, padding=self.kernel_size // 2)
return x
class Attention(nn.Module):
def __init__(self, dim, window_size=None, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
super().__init__()
self.dim = dim
self.num_heads = num_heads
head_dim = dim // num_heads
self.window_size = window_size
self.scale = qk_scale or head_dim ** -0.5
self.qkv = nn.Conv2d(dim, dim * 3, 1, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Conv2d(dim, dim, 1)
self.proj_drop = nn.Dropout(proj_drop)
def forward(self, x):
B, C, H, W = x.shape
N = H * W
q, k, v = self.qkv(x).reshape(B, self.num_heads, C // self.num_heads * 3, N).chunk(3,
dim=2) # (B, num_heads, head_dim, N)
attn = (k.transpose(-1, -2) @ q) * self.scale
attn = attn.softmax(dim=-2) # (B, h, N, N)
attn = self.attn_drop(attn)
x = (v @ attn).reshape(B, C, H, W)
x = self.proj(x)
x = self.proj_drop(x)
return x
class StokenAttention(nn.Module):
def __init__(self, dim, stoken_size, n_iter=1, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.,
proj_drop=0.):
super().__init__()
self.n_iter = n_iter
self.stoken_size = stoken_size
self.scale = dim ** - 0.5
self.unfold = Unfold(3)
self.fold = Fold(3)
self.stoken_refine = Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
attn_drop=attn_drop, proj_drop=proj_drop)
def stoken_forward(self, x):
'''
x: (B, C, H, W)
'''
B, C, H0, W0 = x.shape
h, w = self.stoken_size
pad_l = pad_t = 0
pad_r = (w - W0 % w) % w
pad_b = (h - H0 % h) % h
if pad_r > 0 or pad_b > 0:
x = F.pad(x, (pad_l, pad_r, pad_t, pad_b))
_, _, H, W = x.shape
hh, ww = H // h, W // w
stoken_features = F.adaptive_avg_pool2d(x, (hh, ww)) # (B, C, hh, ww)
pixel_features = x.reshape(B, C, hh, h, ww, w).permute(0, 2, 4, 3, 5, 1).reshape(B, hh * ww, h * w, C)
with torch.no_grad():
for idx in range(self.n_iter):
stoken_features = self.unfold(stoken_features) # (B, C*9, hh*ww)
stoken_features = stoken_features.transpose(1, 2).reshape(B, hh * ww, C, 9)
affinity_matrix = pixel_features @ stoken_features * self.scale # (B, hh*ww, h*w, 9)
affinity_matrix = affinity_matrix.softmax(-1) # (B, hh*ww, h*w, 9)
affinity_matrix_sum = affinity_matrix.sum(2).transpose(1, 2).reshape(B, 9, hh, ww)
affinity_matrix_sum = self.fold(affinity_matrix_sum)
if idx < self.n_iter - 1:
stoken_features = pixel_features.transpose(-1, -2) @ affinity_matrix # (B, hh*ww, C, 9)
stoken_features = self.fold(stoken_features.permute(0, 2, 3, 1).reshape(B * C, 9, hh, ww)).reshape(
B, C, hh, ww)
stoken_features = stoken_features / (affinity_matrix_sum + 1e-12) # (B, C, hh, ww)
stoken_features = pixel_features.transpose(-1, -2) @ affinity_matrix # (B, hh*ww, C, 9)
stoken_features = self.fold(stoken_features.permute(0, 2, 3, 1).reshape(B * C, 9, hh, ww)).reshape(B, C, hh, ww)
stoken_features = stoken_features / (affinity_matrix_sum.detach() + 1e-12) # (B, C, hh, ww)
stoken_features = self.stoken_refine(stoken_features)
stoken_features = self.unfold(stoken_features) # (B, C*9, hh*ww)
stoken_features = stoken_features.transpose(1, 2).reshape(B, hh * ww, C, 9) # (B, hh*ww, C, 9)
pixel_features = stoken_features @ affinity_matrix.transpose(-1, -2) # (B, hh*ww, C, h*w)
pixel_features = pixel_features.reshape(B, hh, ww, C, h, w).permute(0, 3, 1, 4, 2, 5).reshape(B, C, H, W)
if pad_r > 0 or pad_b > 0:
pixel_features = pixel_features[:, :, :H0, :W0]
return pixel_features
def direct_forward(self, x):
B, C, H, W = x.shape
stoken_features = x
stoken_features = self.stoken_refine(stoken_features)
return stoken_features
def forward(self, x):
if self.stoken_size[0] > 1 or self.stoken_size[1] > 1:
return self.stoken_forward(x)
else:
return self.direct_forward(x)
# 输入 N C H W, 输出 N C H W
if __name__ == '__main__':
input = torch.randn(3, 64, 32, 64).cuda()
se = StokenAttention(64, stoken_size=[8,8]).cuda()
output = se(input)
print(output.shape)

View File

@ -0,0 +1,163 @@
# https://github.com/cheng-haha/ScConv
"""
GroupBatchnorm2d
这是一个自定义的批量归一化Batch Normalization模块
它支持将通道分组即将通道分成多个组每个组共享统计信息
参数包括 c_num通道数group_num分组数 eps防止除以零的小值
在前向传播中它首先将输入张量按组进行划分并在每个组内计算均值和标准差然后使用这些统计信息来对输入进行标准化
SRUSelf-Reconstruction Unit
这是一个自定义的模块用于增强神经网络的特征表示
参数包括 oup_channels输出通道数group_num分组数gate_treshold门控阈值 torch_gn是否使用PyTorch的GroupNorm
在前向传播中它首先应用分组归一化Group Normalization然后通过门控机制Gate重新构造输入特征
门控机制根据输入特征的分布和权重来决定哪些信息被保留哪些信息被舍弃
CRUChannel Reorganization Unit
这是一个自定义的通道重组模块用于重新组织神经网络的通道
参数包括 op_channel输出通道数alpha通道划分比例squeeze_radio压缩比例group_size分组大小 group_kernel_size分组卷积核大小
在前向传播中它首先将输入通道分成两部分然后对每部分进行压缩squeeze操作和分组卷积Group Convolution操作最后将结果进行融合
ScConvScale and Channel Convolution
这是一个结合了SRU和CRU的模块用于增强特征表示并进行通道重组
参数包括 SRU CRU 模块的参数
在前向传播中它首先应用SRU模块然后应用CRU模块以改善特征表示并重新组织通道
这些自定义模块可以用于构建更复杂的神经网络以满足特定的任务和需求模块中的操作和机制可以帮助提高神经网络的性能和泛化能力
"""
import torch
import torch.nn.functional as F
import torch.nn as nn
class GroupBatchnorm2d(nn.Module):
def __init__(self, c_num: int,
group_num: int = 16,
eps: float = 1e-10
):
super(GroupBatchnorm2d, self).__init__()
assert c_num >= group_num
self.group_num = group_num
self.weight = nn.Parameter(torch.randn(c_num, 1, 1))
self.bias = nn.Parameter(torch.zeros(c_num, 1, 1))
self.eps = eps
def forward(self, x):
N, C, H, W = x.size()
x = x.view(N, self.group_num, -1)
mean = x.mean(dim=2, keepdim=True)
std = x.std(dim=2, keepdim=True)
x = (x - mean) / (std + self.eps)
x = x.view(N, C, H, W)
return x * self.weight + self.bias
class SRU(nn.Module):
def __init__(self,
oup_channels: int,
group_num: int = 16,
gate_treshold: float = 0.5,
torch_gn: bool = False
):
super().__init__()
self.gn = nn.GroupNorm(num_channels=oup_channels, num_groups=group_num) if torch_gn else GroupBatchnorm2d(
c_num=oup_channels, group_num=group_num)
self.gate_treshold = gate_treshold
self.sigomid = nn.Sigmoid()
def forward(self, x):
gn_x = self.gn(x)
w_gamma = self.gn.weight / torch.sum(self.gn.weight)
w_gamma = w_gamma.view(1, -1, 1, 1)
reweigts = self.sigomid(gn_x * w_gamma)
# Gate
info_mask = reweigts >= self.gate_treshold
noninfo_mask = reweigts < self.gate_treshold
x_1 = info_mask * gn_x
x_2 = noninfo_mask * gn_x
x = self.reconstruct(x_1, x_2)
return x
def reconstruct(self, x_1, x_2):
x_11, x_12 = torch.split(x_1, x_1.size(1) // 2, dim=1)
x_21, x_22 = torch.split(x_2, x_2.size(1) // 2, dim=1)
return torch.cat([x_11 + x_22, x_12 + x_21], dim=1)
class CRU(nn.Module):
'''
alpha: 0<alpha<1
'''
def __init__(self,
op_channel: int,
alpha: float = 1 / 2,
squeeze_radio: int = 2,
group_size: int = 2,
group_kernel_size: int = 3,
):
super().__init__()
self.up_channel = up_channel = int(alpha * op_channel)
self.low_channel = low_channel = op_channel - up_channel
self.squeeze1 = nn.Conv2d(up_channel, up_channel // squeeze_radio, kernel_size=1, bias=False)
self.squeeze2 = nn.Conv2d(low_channel, low_channel // squeeze_radio, kernel_size=1, bias=False)
# up
self.GWC = nn.Conv2d(up_channel // squeeze_radio, op_channel, kernel_size=group_kernel_size, stride=1,
padding=group_kernel_size // 2, groups=group_size)
self.PWC1 = nn.Conv2d(up_channel // squeeze_radio, op_channel, kernel_size=1, bias=False)
# low
self.PWC2 = nn.Conv2d(low_channel // squeeze_radio, op_channel - low_channel // squeeze_radio, kernel_size=1,
bias=False)
self.advavg = nn.AdaptiveAvgPool2d(1)
def forward(self, x):
# Split
up, low = torch.split(x, [self.up_channel, self.low_channel], dim=1)
up, low = self.squeeze1(up), self.squeeze2(low)
# Transform
Y1 = self.GWC(up) + self.PWC1(up)
Y2 = torch.cat([self.PWC2(low), low], dim=1)
# Fuse
out = torch.cat([Y1, Y2], dim=1)
out = F.softmax(self.advavg(out), dim=1) * out
out1, out2 = torch.split(out, out.size(1) // 2, dim=1)
return out1 + out2
class ScConv(nn.Module):
def __init__(self,
op_channel: int,
group_num: int = 4,
gate_treshold: float = 0.5,
alpha: float = 1 / 2,
squeeze_radio: int = 2,
group_size: int = 2,
group_kernel_size: int = 3,
):
super().__init__()
self.SRU = SRU(op_channel,
group_num=group_num,
gate_treshold=gate_treshold)
self.CRU = CRU(op_channel,
alpha=alpha,
squeeze_radio=squeeze_radio,
group_size=group_size,
group_kernel_size=group_kernel_size)
def forward(self, x):
x = self.SRU(x)
x = self.CRU(x)
return x
# 输入 N C H W, 输出 N C H W
if __name__ == '__main__':
# x = torch.randn(1, 32, 16, 16)
x = torch.randn(1, 128, 256, 256)
model = ScConv(128)
x = model(x)
# x = torch.unsqueeze(x[:, 0], 1)
# print(type(x))
print(x.shape)

View File

@ -0,0 +1,57 @@
import numpy as np
import torch
from torch import nn
from torch.nn import init
class SpatialGroupEnhance(nn.Module):
def __init__(self, groups):
super().__init__()
self.groups = groups
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.weight = nn.Parameter(torch.zeros(1, groups, 1, 1))
self.bias = nn.Parameter(torch.zeros(1, groups, 1, 1))
self.sig = nn.Sigmoid()
self.init_weights()
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, x):
b, c, h, w = x.shape
x = x.view(b * self.groups, -1, h, w) # bs*g,dim//g,h,w
xn = x * self.avg_pool(x) # bs*g,dim//g,h,w
xn = xn.sum(dim=1, keepdim=True) # bs*g,1,h,w
t = xn.view(b * self.groups, -1) # bs*g,h*w
t = t - t.mean(dim=1, keepdim=True) # bs*g,h*w
std = t.std(dim=1, keepdim=True) + 1e-5
t = t / std # bs*g,h*w
t = t.view(b, self.groups, h, w) # bs,g,h*w
t = t * self.weight + self.bias # bs,g,h*w
t = t.view(b * self.groups, 1, h, w) # bs*g,1,h*w
x = x * self.sig(t)
x = x.view(b, c, h, w)
return x
# 输入 N C H W, 输出 N C H W
if __name__ == '__main__':
input = torch.randn(50, 512, 7, 7)
sge = SpatialGroupEnhance(groups=4)
output = sge(input)
print(output.shape)

View File

@ -0,0 +1,97 @@
# https://github.com/mindspore-courses/External-Attention-MindSpore/blob/main/model/attention/TripletAttention.py
"""
以下是这些模块的主要特点和作用
BasicConv 模块
这是一个基本的卷积模块用于进行卷积操作包括卷积批归一化可选ReLU 激活函数可选
可以通过参数来控制是否使用批归一化和ReLU激活函数
ZPool 模块
这是一个自定义的池化操作将输入的特征图进行最大池化和平均池化然后将它们拼接在一起
AttentionGate 模块
这个模块实现了一个注意力门控机制用于学习特征图的注意力权重
首先通过 ZPool 操作将输入的特征图进行池化
然后应用一个卷积层该卷积层输出一个注意力权重通过 Sigmoid 激活函数将其归一化
最后将输入特征图与注意力权重相乘以得到加权的特征图
TripletAttention 模块
这个模块实现了一种三重注意力机制用于学习特征图的全局和局部信息
该模块包括三个 AttentionGate 模块分别用于通道维度c高度维度h和宽度维度w的注意力权重学习
可以通过参数 no_spatial 来控制是否忽略空间维度
最终将三个注意力权重加权平均以得到最终的特征图
"""
import torch
import torch.nn as nn
class BasicConv(nn.Module):
def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True,
bn=True, bias=False):
super(BasicConv, self).__init__()
self.out_channels = out_planes
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding,
dilation=dilation, groups=groups, bias=bias)
self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None
self.relu = nn.ReLU() if relu else None
def forward(self, x):
x = self.conv(x)
if self.bn is not None:
x = self.bn(x)
if self.relu is not None:
x = self.relu(x)
return x
class ZPool(nn.Module):
def forward(self, x):
return torch.cat((torch.max(x, 1)[0].unsqueeze(1), torch.mean(x, 1).unsqueeze(1)), dim=1)
class AttentionGate(nn.Module):
def __init__(self):
super(AttentionGate, self).__init__()
kernel_size = 7
self.compress = ZPool()
self.conv = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size - 1) // 2, relu=False)
def forward(self, x):
x_compress = self.compress(x)
x_out = self.conv(x_compress)
scale = torch.sigmoid_(x_out)
return x * scale
class TripletAttention(nn.Module):
def __init__(self, no_spatial=False):
super(TripletAttention, self).__init__()
self.cw = AttentionGate()
self.hc = AttentionGate()
self.no_spatial = no_spatial
if not no_spatial:
self.hw = AttentionGate()
def forward(self, x):
x_perm1 = x.permute(0, 2, 1, 3).contiguous()
x_out1 = self.cw(x_perm1)
x_out11 = x_out1.permute(0, 2, 1, 3).contiguous()
x_perm2 = x.permute(0, 3, 2, 1).contiguous()
x_out2 = self.hc(x_perm2)
x_out21 = x_out2.permute(0, 3, 2, 1).contiguous()
if not self.no_spatial:
x_out = self.hw(x)
x_out = 1 / 3 * (x_out + x_out11 + x_out21)
else:
x_out = 1 / 2 * (x_out11 + x_out21)
return x_out
if __name__ == '__main__':
input = torch.randn(50, 512, 7, 7)
triplet = TripletAttention()
output = triplet(input)
print(output.shape)

View File

@ -0,0 +1,97 @@
# https://github.com/mindspore-courses/External-Attention-MindSpore/blob/main/model/attention/UFOAttention.py
"""
以下是这个模块的主要特点和作用
多头自注意力这个模块使用了多头自注意力机制通过将输入进行不同线性变换分为多个头来计算注意力h 参数表示注意力头的数量
线性变换模块中的线性层fc_qfc_kfc_v fc_o用于将输入进行线性变换以生成查询QK和值V的向量
权重初始化模块中的线性层的权重被初始化以确保良好的训练收敛性这些初始化方法包括卷积层的 He 初始化和线性层的正态分布初始化
注意力计算通过计算 Q K 的点积然后应用归一化函数得到注意力矩阵在这个模块中注意力矩阵经过了一些自定义的归一化XNorm 函数
多头特征整合多个注意力头的输出被整合在一起然后通过线性层进行进一步的处理以生成最终的输出
Dropout 正则化模块中使用了 Dropout 操作以减少过拟合的风险
参数化的缩放因子模块中包括一个可学习的缩放因子 gamma用于调整注意力计算的缩放
总的来说UFOAttention模块提供了一种用于神经网络中的自注意力机制它可以根据输入数据生成不同的查询键和值并计算注意力矩阵然后整合多个头的输出以生成最终的特征表示这种模块通常用于处理序列数据如自然语言处理中的 Transformer 模型中的注意力层
"""
import numpy as np
import torch
from torch import nn
from torch.functional import norm
from torch.nn import init
def XNorm(x, gamma):
norm_tensor = torch.norm(x, 2, -1, True)
return x * gamma / norm_tensor
class UFOAttention(nn.Module):
'''
Scaled dot-product attention
'''
def __init__(self, d_model, d_k, d_v, h, dropout=.1):
'''
:param d_model: Output dimensionality of the model
:param d_k: Dimensionality of queries and keys
:param d_v: Dimensionality of values
:param h: Number of heads
'''
super(UFOAttention, self).__init__()
self.fc_q = nn.Linear(d_model, h * d_k)
self.fc_k = nn.Linear(d_model, h * d_k)
self.fc_v = nn.Linear(d_model, h * d_v)
self.fc_o = nn.Linear(h * d_v, d_model)
self.dropout = nn.Dropout(dropout)
self.gamma = nn.Parameter(torch.randn((1, h, 1, 1)))
self.d_model = d_model
self.d_k = d_k
self.d_v = d_v
self.h = h
self.init_weights()
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, queries, keys, values):
b_s, nq = queries.shape[:2]
nk = keys.shape[1]
q = self.fc_q(queries).view(b_s, nq, self.h, self.d_k).permute(0, 2, 1, 3) # (b_s, h, nq, d_k)
k = self.fc_k(keys).view(b_s, nk, self.h, self.d_k).permute(0, 2, 3, 1) # (b_s, h, d_k, nk)
v = self.fc_v(values).view(b_s, nk, self.h, self.d_v).permute(0, 2, 1, 3) # (b_s, h, nk, d_v)
kv = torch.matmul(k, v) # bs,h,c,c
kv_norm = XNorm(kv, self.gamma) # bs,h,c,c
q_norm = XNorm(q, self.gamma) # bs,h,n,c
out = torch.matmul(q_norm, kv_norm).permute(0, 2, 1, 3).contiguous().view(b_s, nq, self.h * self.d_v)
out = self.fc_o(out) # (b_s, nq, d_model)
return out
if __name__ == '__main__':
block = UFOAttention(d_model=512, d_k=512, d_v=512, h=8).cuda()
input = torch.rand(64, 64, 512).cuda()
output = block(input, input, input)
print(input.size(), output.size())

View File

@ -0,0 +1,98 @@
# https://github.com/Andrew-Qibin/VisionPermutator
"""
MLP (Multi-Layer Perceptron) 模块
MLP 是一个多层感知器MLP模块用于将输入数据进行线性变换和激活函数操作以学习和提取特征
构造函数 (__init__) 接受以下参数
in_features输入特征的维度
hidden_features中间隐藏层的特征维度
out_features输出层的特征维度
act_layer激活函数默认为 GELU
dropDropout 概率默认为 0.1
MLP 模块包括两个线性层fc1 fc2一个激活函数act_layer和一个 Dropout drop
forward 方法接受输入 x首先将输入经过第一个线性层和激活函数然后应用 Dropout最后通过第二个线性层得到输出
WeightedPermuteMLP 模块
WeightedPermuteMLP 是一个自注意力模块它用于对输入张量进行特征变换和加权重组
构造函数 (__init__) 接受以下参数
dim输入特征的维度
seg_dim分段维度默认为 8
qkv_biasQK V 投影是否包括偏差默认为 False
proj_drop投影层后的 Dropout 概率默认为 0
WeightedPermuteMLP 模块首先将输入张量通过三个线性层mlp_cmlp_h mlp_w进行特征变换分别用于通道高度和宽度方向
输入张量被分成多个段并在通道维度上进行重组然后经过线性层进行特征变换
每个变换后的段都会计算一个权重然后通过加权平均的方式将这些段组合在一起以获得最终的输出
最终输出通过投影层和 Dropout 进行后处理
这两个模块通常用于神经网络的不同部分用于特征提取和建模MLP 主要用于局部特征的提取 WeightedPermuteMLP 主要用于加权重组特征以增强全局特征表示
"""
import torch
from torch import nn
class MLP(nn.Module):
def __init__(self,in_features,hidden_features,out_features,act_layer=nn.GELU,drop=0.1):
super().__init__()
self.fc1=nn.Linear(in_features,hidden_features)
self.act=act_layer()
self.fc2=nn.Linear(hidden_features,out_features)
self.drop=nn.Dropout(drop)
def forward(self, x) :
return self.drop(self.fc2(self.drop(self.act(self.fc1(x)))))
class WeightedPermuteMLP(nn.Module):
def __init__(self,dim,seg_dim=8, qkv_bias=False, proj_drop=0.):
super().__init__()
self.seg_dim=seg_dim
self.mlp_c=nn.Linear(dim,dim,bias=qkv_bias)
self.mlp_h=nn.Linear(dim,dim,bias=qkv_bias)
self.mlp_w=nn.Linear(dim,dim,bias=qkv_bias)
self.reweighting=MLP(dim,dim//4,dim*3)
self.proj=nn.Linear(dim,dim)
self.proj_drop=nn.Dropout(proj_drop)
def forward(self,x) :
B,H,W,C=x.shape
c_embed=self.mlp_c(x)
S=C//self.seg_dim
h_embed=x.reshape(B,H,W,self.seg_dim,S).permute(0,3,2,1,4).reshape(B,self.seg_dim,W,H*S)
h_embed=self.mlp_h(h_embed).reshape(B,self.seg_dim,W,H,S).permute(0,3,2,1,4).reshape(B,H,W,C)
w_embed=x.reshape(B,H,W,self.seg_dim,S).permute(0,3,1,2,4).reshape(B,self.seg_dim,H,W*S)
w_embed=self.mlp_w(w_embed).reshape(B,self.seg_dim,H,W,S).permute(0,2,3,1,4).reshape(B,H,W,C)
weight=(c_embed+h_embed+w_embed).permute(0,3,1,2).flatten(2).mean(2)
weight=self.reweighting(weight).reshape(B,C,3).permute(2,0,1).softmax(0).unsqueeze(2).unsqueeze(2)
x=c_embed*weight[0]+w_embed*weight[1]+h_embed*weight[2]
x=self.proj_drop(self.proj(x))
return x
if __name__ == '__main__':
input=torch.randn(64,8,8,512)
seg_dim=8
vip=WeightedPermuteMLP(512,seg_dim)
out=vip(input)
print(out.shape)

View File

@ -0,0 +1,131 @@
import torch
from torch import nn
from torch.nn import init
from einops.einops import rearrange
def to_3d(x):
return rearrange(x, 'b c h w -> b (h w) c')
def to_4d(x, h, w):
return rearrange(x, 'b (h w) c -> b c h w', h=h, w=w)
class Partial_conv3(nn.Module):
def __init__(self, dim, n_div, forward):
super().__init__()
self.dim_conv3 = dim // n_div
self.dim_untouched = dim - self.dim_conv3
self.partial_conv3 = nn.Conv2d(self.dim_conv3, self.dim_conv3, 3, 1, 1, bias=False)
if forward == 'slicing':
self.forward = self.forward_slicing
elif forward == 'split_cat':
self.forward = self.forward_split_cat
else:
raise NotImplementedError
def forward_slicing(self, x):
# only for inference
x = x.clone() # !!! Keep the original input intact for the residual connection later
x[:, :self.dim_conv3, :, :] = self.partial_conv3(x[:, :self.dim_conv3, :, :])
return x
def forward_split_cat(self, x):
x = to_4d(x, 28, 28)
# for training/inference
x1, x2 = torch.split(x, [self.dim_conv3, self.dim_untouched], dim=1)
x1 = self.partial_conv3(x1)
x = torch.cat((x1, x2), 1)
x = to_3d(x)
return x
class ExternalAttention(nn.Module):
def __init__(self, d_model, S=64):
super().__init__()
self.mk = nn.Linear(d_model, S, bias=False)
self.mv = nn.Linear(S, d_model, bias=False)
self.softmax = nn.Softmax(dim=1)
self.init_weights()
self.pa = Partial_conv3(128, 2, 'split_cat')
def init_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
init.constant_(m.bias, 0)
elif isinstance(m, nn.BatchNorm2d):
init.constant_(m.weight, 1)
init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
init.normal_(m.weight, std=0.001)
if m.bias is not None:
init.constant_(m.bias, 0)
def forward(self, queries): # torch.Size([32, 784, 128])
queries = self.pa(queries)
attn = self.mk(queries) # torch.Size([32, 784, 8])
attn = self.softmax(attn) # torch.Size([32, 784, 8])
attn = attn / torch.sum(attn, dim=2, keepdim=True) # torch.Size([32, 784, 8])
out = self.mv(attn) # torch.Size([32, 784, 128])
return out
# class Paex(nn.Module): # 串联
# def __init__(self):
# super(Paex, self).__init__()
# self.pa = Partial_conv3(128, 2, 'split_cat')
# self.ex = ExternalAttention(d_model=128, S=8)
#
# def forward(self, x):
# x1 = self.pa(x)
# x2 = self.ex(x1)
# return x2
#
# class Paex(nn.Module): # 并联
# def __init__(self):
# super(Paex, self).__init__()
# self.pa = Partial_conv3(128, 2, 'split_cat')
# self.ex = ExternalAttention(d_model=128, S=8)
#
# def forward(self, x):
# x1 = self.pa(x)
# x2 = self.ex(x)
# x3 = x1 + x2
# return x3
#
#
# 输入 B C N, 输出 B C N
# if __name__ == '__main__':
# block = Paex()
# input = torch.rand(32, 784, 128)
# output = block(input)
# print(input.size())
# print(output.size())
# 输入 B C N, 输出 B C N
if __name__ == '__main__':
block = ExternalAttention(d_model=128, S=8)
input = torch.rand(32, 784, 128)
output = block(input)
print(input.size())
print(output.size())

View File

@ -0,0 +1,157 @@
import torch
from torch import nn
import math
from einops.einops import rearrange
def to_3d(x):
return rearrange(x, 'b c h w -> b (h w) c')
def to_4d(x, h, w):
return rearrange(x, 'b (h w) c -> b c h w', h=h, w=w)
class AKConv(nn.Module):
def __init__(self, inc, outc, num_param, stride=1, bias=None):
super(AKConv, self).__init__()
self.num_param = num_param
self.stride = stride
self.conv = nn.Sequential(nn.Conv2d(inc, outc, kernel_size=(num_param, 1), stride=(num_param, 1), bias=bias)
,nn.BatchNorm2d(outc)
,nn.SiLU()) # the conv adds the BN and SiLU to compare original Conv in YOLOv5.
self.p_conv = nn.Conv2d(inc, 2 * num_param, kernel_size=3, padding=1, stride=stride)
nn.init.constant_(self.p_conv.weight, 0)
self.p_conv.register_full_backward_hook(self._set_lr)
@staticmethod
def _set_lr(module, grad_input, grad_output):
grad_input = (grad_input[i] * 0.1 for i in range(len(grad_input)))
grad_output = (grad_output[i] * 0.1 for i in range(len(grad_output)))
def forward(self, x):
# N is num_param.
offset = self.p_conv(x)
dtype = offset.data.type()
N = offset.size(1) // 2
# (b, 2N, h, w)
p = self._get_p(offset, dtype)
# (b, h, w, 2N)
p = p.contiguous().permute(0, 2, 3, 1)
q_lt = p.detach().floor()
q_rb = q_lt + 1
q_lt = torch.cat([torch.clamp(q_lt[..., :N], 0, x.size(2) - 1), torch.clamp(q_lt[..., N:], 0, x.size(3) - 1)],
dim=-1).long()
q_rb = torch.cat([torch.clamp(q_rb[..., :N], 0, x.size(2) - 1), torch.clamp(q_rb[..., N:], 0, x.size(3) - 1)],
dim=-1).long()
q_lb = torch.cat([q_lt[..., :N], q_rb[..., N:]], dim=-1)
q_rt = torch.cat([q_rb[..., :N], q_lt[..., N:]], dim=-1)
# clip p
p = torch.cat([torch.clamp(p[..., :N], 0, x.size(2) - 1), torch.clamp(p[..., N:], 0, x.size(3) - 1)], dim=-1)
# bilinear kernel (b, h, w, N)
g_lt = (1 + (q_lt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_lt[..., N:].type_as(p) - p[..., N:]))
g_rb = (1 - (q_rb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_rb[..., N:].type_as(p) - p[..., N:]))
g_lb = (1 + (q_lb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_lb[..., N:].type_as(p) - p[..., N:]))
g_rt = (1 - (q_rt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_rt[..., N:].type_as(p) - p[..., N:]))
# resampling the features based on the modified coordinates.
x_q_lt = self._get_x_q(x, q_lt, N)
x_q_rb = self._get_x_q(x, q_rb, N)
x_q_lb = self._get_x_q(x, q_lb, N)
x_q_rt = self._get_x_q(x, q_rt, N)
# bilinear
x_offset = g_lt.unsqueeze(dim=1) * x_q_lt + \
g_rb.unsqueeze(dim=1) * x_q_rb + \
g_lb.unsqueeze(dim=1) * x_q_lb + \
g_rt.unsqueeze(dim=1) * x_q_rt
x_offset = self._reshape_x_offset(x_offset, self.num_param)
out = self.conv(x_offset)
return out
# generating the inital sampled shapes for the AKConv with different sizes.
def _get_p_n(self, N, dtype):
base_int = round(math.sqrt(self.num_param))
row_number = self.num_param // base_int
mod_number = self.num_param % base_int
p_n_x ,p_n_y = torch.meshgrid(
torch.arange(0, row_number),
torch.arange(0 ,base_int))
p_n_x = torch.flatten(p_n_x)
p_n_y = torch.flatten(p_n_y)
if mod_number > 0:
mod_p_n_x ,mod_p_n_y = torch.meshgrid(
torch.arange(row_number ,row_number +1),
torch.arange(0 ,mod_number))
mod_p_n_x = torch.flatten(mod_p_n_x)
mod_p_n_y = torch.flatten(mod_p_n_y)
p_n_x ,p_n_y = torch.cat((p_n_x ,mod_p_n_x)) ,torch.cat((p_n_y ,mod_p_n_y))
p_n = torch.cat([p_n_x ,p_n_y], 0)
p_n = p_n.view(1, 2 * N, 1, 1).type(dtype)
return p_n
# no zero-padding
def _get_p_0(self, h, w, N, dtype):
p_0_x, p_0_y = torch.meshgrid(
torch.arange(0, h * self.stride, self.stride),
torch.arange(0, w * self.stride, self.stride))
p_0_x = torch.flatten(p_0_x).view(1, 1, h, w).repeat(1, N, 1, 1)
p_0_y = torch.flatten(p_0_y).view(1, 1, h, w).repeat(1, N, 1, 1)
p_0 = torch.cat([p_0_x, p_0_y], 1).type(dtype)
return p_0
def _get_p(self, offset, dtype):
N, h, w = offset.size(1) // 2, offset.size(2), offset.size(3)
# (1, 2N, 1, 1)
p_n = self._get_p_n(N, dtype)
# (1, 2N, h, w)
p_0 = self._get_p_0(h, w, N, dtype)
p = p_0 + p_n + offset
return p
def _get_x_q(self, x, q, N):
b, h, w, _ = q.size()
padded_w = x.size(3)
c = x.size(1)
# (b, c, h*w)
x = x.contiguous().view(b, c, -1)
# (b, h, w, N)
index = q[..., :N] * padded_w + q[..., N:] # offset_x*w + offset_y
# (b, c, h*w*N)
index = index.contiguous().unsqueeze(dim=1).expand(-1, c, -1, -1, -1).contiguous().view(b, c, -1)
x_offset = x.gather(dim=-1, index=index).contiguous().view(b, c, h, w, N)
return x_offset
# Stacking resampled features in the row direction.
@staticmethod
def _reshape_x_offset(x_offset, num_param):
b, c, h, w, n = x_offset.size()
# using Conv3d
# x_offset = x_offset.permute(0,1,4,2,3), then Conv3d(c,c_out, kernel_size =(num_param,1,1),stride=(num_param,1,1),bias= False)
# using 1 × 1 Conv
# x_offset = x_offset.permute(0,1,4,2,3), then, x_offset.view(b,c×num_param,h,w) finally, Conv2d(c×num_param,c_out, kernel_size =1,stride=1,bias= False)
# using the column conv as follow then, Conv2d(inc, outc, kernel_size=(num_param, 1), stride=(num_param, 1), bias=bias)
x_offset = rearrange(x_offset, 'b c h w n -> b c (h n) w')
return x_offset
if __name__ == '__main__':
block = AKConv(inc=32,outc=32,num_param=3)
input = torch.rand(64,32,15,15)
output = block(input)
print(input.size())
print(output.size())

View File

@ -0,0 +1,430 @@
import torch
import torch.nn as nn
class BatchNorm2D(nn.Module):
def __init__(self, num_channels, epsilon=1e-5, momentum=0.9, rescale=True):
super(BatchNorm2D, self).__init__()
self.num_channels = num_channels
self.epsilon = epsilon
self.momentum = momentum
self.rescale = rescale
if (self.rescale == True):
# define parameters gamma, beta which are learnable
# dimension of gamma and beta should be (num_channels) ie its a one dimensional vector
# initializing gamma as ones vector and beta as zeros vector (implies no scaling/shifting at the start)
self.gamma = nn.Parameter(torch.ones(num_channels))
self.beta = nn.Parameter(torch.zeros(num_channels))
# define parameters running mean and variance which is not learnable
# keep track of mean and variance(but donot learn them), momentum is used which weighs current batch-mean and
# variance with the running mean and variance using (momentum*runningmean+(1-momentum)*currentmean)
self.register_buffer('runningmean', torch.zeros(num_channels))
self.register_buffer('runningvar', torch.ones(num_channels))
def forward(self, x):
assert x.shape[1] == self.num_channels
assert len(x.shape) == 4 # 4 because (batchsize, numchannels, height, width)
if (self.training):
# calculate mean and variance along the dimensions other than the channel dimension
# variance calculation is using the biased formula during training
variance = torch.var(x, dim=[0, 2, 3], unbiased=False)
mean = torch.mean(x, dim=[0, 2, 3])
self.runningmean.mul_(self.momentum).add_((1 - self.momentum) * mean.detach())
self.runningvar.mul_(self.momentum).add_((1 - self.momentum) * variance.detach())
out = (x - mean.view([1, self.num_channels, 1, 1])) / torch.sqrt(
variance.view([1, self.num_channels, 1, 1]) + self.epsilon)
else:
m = x.shape[0] * x.shape[2] * x.shape[3]
out = (x - self.runningmean.view([1, self.num_channels, 1, 1])) / torch.sqrt(
(m / (m - 1)) * self.runningvar.view([1, self.num_channels, 1, 1]) + self.epsilon)
# during testing just use the running mean and (UnBiased) variance
if (self.rescale == True):
out = self.gamma.view([1, self.num_channels, 1, 1]) * out + self.beta.view([1, self.num_channels, 1, 1])
return out
class BatchNormm2D(nn.Module):
def __init__(self, num_channels, epsilon=1e-5, momentum=0.9, rescale=True):
super(BatchNormm2D, self).__init__()
self.num_channels = num_channels
self.epsilon = epsilon
self.momentum = momentum
self.rescale = rescale
if (self.rescale == True):
# define parameters gamma, beta which are learnable
# dimension of gamma and beta should be (num_channels) ie its a one dimensional vector
# initializing gamma as ones vector and beta as zeros vector (implies no scaling/shifting at the start)
self.gamma = nn.Parameter(torch.ones(num_channels))
self.beta = nn.Parameter(torch.zeros(num_channels))
# define parameters running mean and variance which is not learnable
# keep track of mean and variance(but donot learn them), momentum is used which weighs current batch-mean and
# variance with the running mean and variance using (momentum*runningmean+(1-momentum)*currentmean)
self.register_buffer('runningmean', torch.zeros(num_channels))
self.register_buffer('runningvar', torch.ones(num_channels))
def forward(self, x):
assert x.shape[1] == self.num_channels
assert len(x.shape) == 4 # 4 because (batchsize, numchannels, height, width)
if (self.training):
# calculate mean and variance along the dimensions other than the channel dimension
# variance calculation is using the biased formula during training
variance = torch.var(x, dim=[0, 2, 3], unbiased=False)
mean = torch.mean(x, dim=[0, 2, 3])
self.runningmean = (1 - self.momentum) * mean + (self.momentum) * self.runningmean
self.runningvar = (1 - self.momentum) * variance + (self.momentum) * self.runningvar
out = (x - mean.view([1, self.num_channels, 1, 1])) / torch.sqrt(
variance.view([1, self.num_channels, 1, 1]) + self.epsilon)
else:
m = x.shape[0] * x.shape[2] * x.shape[3]
out = (x - self.runningmean.view([1, self.num_channels, 1, 1])) / torch.sqrt(
(m / (m - 1)) * self.runningvar.view([1, self.num_channels, 1, 1]) + self.epsilon)
# during testing just use the running mean and (UnBiased) variance
if (self.rescale == True):
return out
class BatchNormm2DViiT(nn.Module):
def __init__(self, num_channels, epsilon=1e-5, momentum=0.9, rescale=True):
super(BatchNormm2DViiT, self).__init__()
self.num_channels = num_channels
self.epsilon = epsilon
self.momentum = momentum
self.rescale = rescale
if (self.rescale == True):
# define parameters gamma, beta which are learnable
# dimension of gamma and beta should be (num_channels) ie its a one dimensional vector
# initializing gamma as ones vector and beta as zeros vector (implies no scaling/shifting at the start)
self.gamma = nn.Parameter(torch.ones(num_channels))
self.beta = nn.Parameter(torch.zeros(num_channels))
# define parameters running mean and variance which is not learnable
# keep track of mean and variance(but donot learn them), momentum is used which weighs current batch-mean and
# variance with the running mean and variance using (momentum*runningmean+(1-momentum)*currentmean)
self.register_buffer('runningmean', torch.zeros(num_channels))
self.register_buffer('runningvar', torch.ones(num_channels))
def forward(self, x):
if (self.training):
# calculate mean and variance along the dimensions other than the channel dimension
# variance calculation is using the biased formula during training
mean = x.mean(-1, keepdim=True) # mean: [bsz, max_len, 1]
std = x.std(-1, keepdim=True) # std: [bsz, max_len, 1]
self.runningmean = (1 - self.momentum) * mean + (self.momentum) * self.runningmean
self.runningvar = (1 - self.momentum) * std + (self.momentum) * self.runningvar
out=(x - mean) / (std + self.epsilon)
else:
m = x.shape[0] * x.shape[2] * x.shape[3]
out = (x - self.runningmean) / torch.sqrt(
(m / (m - 1))* self.runningvar + self.epsilon)
# during testing just use the running mean and (UnBiased) variance
if (self.rescale == True):
out = self.a_2 * out + self.b_2
return out
class BatchNormm2DViTC(nn.Module):
def __init__(self, num_channels, epsilon=1e-5, momentum=0.9, rescale=True):
super(BatchNormm2DViTC, self).__init__()
self.num_channels = num_channels
self.epsilon = epsilon
self.momentum = momentum
self.rescale = rescale
if (self.rescale == True):
# define parameters gamma, beta which are learnable
# dimension of gamma and beta should be (num_channels) ie its a one dimensional vector
# initializing gamma as ones vector and beta as zeros vector (implies no scaling/shifting at the start)
self.gamma = nn.Parameter(torch.ones(num_channels))
self.beta = nn.Parameter(torch.zeros(num_channels))
# define parameters running mean and variance which is not learnable
# keep track of mean and variance(but donot learn them), momentum is used which weighs current batch-mean and
# variance with the running mean and variance using (momentum*runningmean+(1-momentum)*currentmean)
self.register_buffer('runningmean', torch.zeros(num_channels))
self.register_buffer('runningvar', torch.ones(num_channels))
def forward(self, x):
if (self.training):
# calculate mean and variance along the dimensions other than the channel dimension
# variance calculation is using the biased formula during training
mean = x.mean(-1, keepdim=True) # mean: [bsz, max_len, 1]
std = x.std(-1, keepdim=True) # std: [bsz, max_len, 1]
self.runningmean = (1 - self.momentum) * mean + (self.momentum) * self.runningmean
self.runningvar = (1 - self.momentum) * std + (self.momentum) * self.runningvar
out=(x - mean) / (std + self.epsilon)
else:
m = x.shape[0] * x.shape[2] * x.shape[3]
out = (x - self.runningmean) / torch.sqrt(
(m / (m - 1))* self.runningvar + self.epsilon)
# during testing just use the running mean and (UnBiased) variance
if (self.rescale == True):
return out
class InstanceNorm2D(nn.Module):
def __init__(self, num_channels, epsilon=1e-5, momentum=0.9, rescale=True):
super(InstanceNorm2D, self).__init__()
self.num_channels = num_channels
self.epsilon = epsilon
self.momentum = momentum
self.rescale = rescale
if (self.rescale == True):
# define parameters gamma, beta which are learnable
# dimension of gamma and beta should be (num_channels) ie its a one dimensional vector
# initializing gamma as ones vector and beta as zeros vector (implies no scaling/shifting at the start)
self.gamma = nn.Parameter(torch.ones(num_channels))
self.beta = nn.Parameter(torch.zeros(num_channels))
# running mean and variance should have the same dimension as in batchnorm
# ie, a vector of size num_channels because while testing, when we get one
# sample at a time, we should be able to use this.
self.register_buffer('runningmean', torch.zeros(num_channels))
self.register_buffer('runningvar', torch.ones(num_channels))
def forward(self, x):
assert x.shape[1] == self.num_channels
assert len(x.shape) == 4 # 4 because len((batchsize, numchannels, height, width)) = 4
if (self.training):
# calculate mean and variance along the dimensions other than the channel dimension
# variance calculation is using the biased formula during training
variance, mean = torch.var(x, dim=[2, 3], unbiased=False), torch.mean(x, dim=[2, 3])
out = (x - mean.view([-1, self.num_channels, 1, 1])) / torch.sqrt(
variance.view([-1, self.num_channels, 1, 1]) + self.epsilon)
else:
variance, mean = torch.var(x, dim=[2, 3], unbiased=False), torch.mean(x, dim=[2, 3])
out = (x - mean.view([-1, self.num_channels, 1, 1])) / torch.sqrt(
variance.view([-1, self.num_channels, 1, 1]) + self.epsilon)
if (self.rescale == True):
out = self.gamma.view([1, self.num_channels, 1, 1]) * out + self.beta.view([1, self.num_channels, 1, 1])
return out
class LayerNormViT(nn.Module):
def __init__(self, features, eps=1e-6):
super(LayerNormViT, self).__init__()
self.a_2 = nn.Parameter(torch.ones(features))
self.b_2 = nn.Parameter(torch.zeros(features))
self.eps = eps
def forward(self, x):
mean = x.mean(-1, keepdim=True) # mean: [bsz, max_len, 1]
std = x.std(-1, keepdim=True) # std: [bsz, max_len, 1]
return self.a_2 * (x - mean) / (std + self.eps) + self.b_2
class LayerNormViTC(nn.Module):
def __init__(self, features, eps=1e-6):
super(LayerNormViTC, self).__init__()
self.a_2 = nn.Parameter(torch.ones(features))
self.b_2 = nn.Parameter(torch.zeros(features))
self.eps = eps
def forward(self, x):
mean = x.mean(-1, keepdim=True) # mean: [bsz, max_len, 1]
std = x.std(-1, keepdim=True) # std: [bsz, max_len, 1]
return(x - mean) / (std + self.eps)
class LayerNorm2D(nn.Module):
def __init__(self, num_channels, epsilon = 1e-5):
super(LayerNorm2D, self).__init__()
self.num_channels = num_channels
self.epsilon = epsilon
self.gamma = nn.Parameter(torch.ones(num_channels))
self.beta = nn.Parameter(torch.zeros(num_channels))
def forward(self, x):
# assert list(x.shape)[1] == self.num_channels
# assert len(x.shape) == 4 # 4 because len((batchsize, numchannels, height, width)) = 4
variance, mean = torch.var(x, dim = [1,2, 3], unbiased=False), torch.mean(x, dim = [1,2, 3])
out = (x-mean.view([-1, 1, 1, 1]))/torch.sqrt(variance.view([-1, 1, 1, 1])+self.epsilon)
out = self.gamma.view([1, self.num_channels, 1, 1]) * out + self.beta.view([1, self.num_channels, 1, 1])
return out
class LayerNormm2D(nn.Module):
def __init__(self, num_channels, epsilon=1e-5):
super(LayerNormm2D, self).__init__()
self.num_channels = num_channels
self.epsilon = epsilon
self.gamma = nn.Parameter(torch.ones(num_channels))
self.beta = nn.Parameter(torch.zeros(num_channels))
def forward(self, x):
assert list(x.shape)[1] == self.num_channels
assert len(x.shape) == 4 # 4 because len((batchsize, numchannels, height, width)) = 4
variance, mean = torch.var(x, dim=[1, 2, 3], unbiased=False), torch.mean(x, dim=[1, 2, 3])
out = (x - mean.view([-1, 1, 1, 1])) / torch.sqrt(variance.view([-1, 1, 1, 1]) + self.epsilon)
return out
class GroupNorm2D(nn.Module):
def __init__(self, num_channels, num_groups=4, epsilon=1e-5):
super(GroupNorm2D, self).__init__()
self.num_channels = num_channels
# self.num_groups = num_groups
self.num_groups = num_channels // 4
self.epsilon = epsilon
self.gamma = nn.Parameter(torch.ones(num_channels))
self.beta = nn.Parameter(torch.zeros(num_channels))
def forward(self, x):
assert x.shape[1] == self.num_channels
assert len(x.shape) == 4 # 4 because (batchsize, numchannels, height, width)
[N, C, H, W] = list(x.shape)
out = torch.reshape(x, (N, self.num_groups, self.num_channels // self.num_groups, H, W))
variance, mean = torch.var(out, dim=[2, 3, 4], unbiased=False, keepdim=True), torch.mean(out, dim=[2, 3, 4],
keepdim=True)
out = (out - mean) / torch.sqrt(variance + self.epsilon)
out = out.view(N, self.num_channels, H, W)
out = self.gamma.view([1, self.num_channels, 1, 1]) * out + self.beta.view([1, self.num_channels, 1, 1])
return out
class BatchNorm_ByoL(nn.Module):
def __init__(self, bn, num_channels=2048, epsilon=1e-5, momentum=0.9, rescale=True):
super(BatchNorm_ByoL, self).__init__()
self.num_channels = num_channels
self.gamma = nn.Parameter(torch.ones(num_channels))
self.beta = nn.Parameter(torch.zeros(num_channels))
self.eps = epsilon
self.register_buffer('runningmean', torch.zeros(num_channels))
self.register_buffer('runningvar', torch.ones(num_channels))
def forward(self, x):
std = self.runningvar.add(self.eps).sqrt()
return x.sub(self.runningmean).div(std).mul(self.gamma).add(self.beta)
class LaychNorm_ByoL(nn.Module):
def __init__(self, bn, num_channels=2048, epsilon=1e-5, momentum=0.9, rescale=True):
super(LaychNorm_ByoL, self).__init__()
self.num_channels = num_channels
self.gamma = nn.Parameter(torch.ones(num_channels))
self.beta = nn.Parameter(torch.zeros(num_channels))
self.eps = epsilon
self.register_buffer('runningmean', torch.zeros(num_channels))
self.register_buffer('runningvar', torch.ones(num_channels))
def forward(self, x):
std = self.runningvar.add(self.eps).sqrt()
return x.sub(self.runningmean).div(std).mul(self.gamma).add(self.beta)
class BatchNorm_Byol(nn.Module):
def __init__(self, bn, num_channels=2048, epsilon=1e-5, momentum=0.9, rescale=True):
super(BatchNorm_Byol, self).__init__()
self.num_channels = num_channels
self.gamma = nn.Parameter(torch.ones(num_channels))
self.beta = nn.Parameter(torch.zeros(num_channels))
self.eps = epsilon
self.register_buffer('runningmean', torch.zeros(num_channels))
self.register_buffer('runningvar', torch.ones(num_channels))
def forward(self, x):
std = self.runningvar.add(self.eps).sqrt()
return x.sub(self.runningmean).div(std)
class LaychNorm_Byol(nn.Module):
def __init__(self, bn, num_channels=2048, epsilon=1e-5, momentum=0.9, rescale=True):
super(LaychNorm_Byol, self).__init__()
self.num_channels = num_channels
self.gamma = nn.Parameter(torch.ones(num_channels))
self.beta = nn.Parameter(torch.zeros(num_channels))
self.eps = epsilon
self.register_buffer('runningmean', torch.zeros(num_channels))
self.register_buffer('runningvar', torch.ones(num_channels))
def forward(self, x):
std = self.runningvar.add(self.eps).sqrt()
return x.sub(self.runningmean).div(std)
class BatchChannelNorm_Byol(nn.Module):
def __init__(self, num_channels, epsilon=1e-5, momentum=0.9):
super(BatchChannelNorm_Byol, self).__init__()
self.num_channels = num_channels
self.epsilon = epsilon
self.momentum = momentum
self.Batchh = BatchNorm_Byol(self.num_channels, epsilon=self.epsilon)
self.layeer = LaychNorm_Byol(self.num_channels, epsilon=self.epsilon)
# The BCN variable to be learnt
self.BCN_var = nn.Parameter(torch.ones(self.num_channels))
# Gamma and Beta for rescaling
self.gamma = nn.Parameter(torch.ones(num_channels))
self.beta = nn.Parameter(torch.zeros(num_channels))
def forward(self, x):
X = self.Batchh(x)
Y = self.layeer(x)
out = self.BCN_var * X + 1 - self.BCN_var * Y
out = self.gamma * out + self.beta
return out
class BatchChannelNorm(nn.Module):
def __init__(self, num_channels, epsilon=1e-5, momentum=0.9):
super(BatchChannelNorm, self).__init__()
self.num_channels = num_channels
self.epsilon = epsilon
self.momentum = momentum
self.Batchh = BatchNormm2D(self.num_channels, epsilon=self.epsilon)
self.layeer = LayerNormm2D(self.num_channels, epsilon=self.epsilon)
# The BCN variable to be learnt
self.BCN_var = nn.Parameter(torch.ones(self.num_channels))
# Gamma and Beta for rescaling
self.gamma = nn.Parameter(torch.ones(num_channels))
self.beta = nn.Parameter(torch.zeros(num_channels))
def forward(self, x):
X = self.Batchh(x)
Y = self.layeer(x)
out = self.BCN_var.view([1, self.num_channels, 1, 1]) * X + (
1 - self.BCN_var.view([1, self.num_channels, 1, 1])) * Y
out = self.gamma.view([1, self.num_channels, 1, 1]) * out + self.beta.view([1, self.num_channels, 1, 1])
return out
class BatchChannelNormvit(nn.Module):
def __init__(self, num_channels, epsilon=1e-5, momentum=0.9):
super(BatchChannelNormvit, self).__init__()
self.num_channels = num_channels
self.epsilon = epsilon
self.momentum = momentum
self.Batchh = BatchNormm2DViTC(self.num_channels, epsilon=self.epsilon)
self.layeer = LayerNormViTC(self.num_channels)
# The BCN variable to be learnt
self.BCN_var = nn.Parameter(torch.ones(self.num_channels))
# Gamma and Beta for rescaling
self.gamma = nn.Parameter(torch.ones(num_channels))
self.beta = nn.Parameter(torch.zeros(num_channels))
def forward(self, x):
X = self.Batchh(x)
Y = self.layeer(x)
out = self.BCN_var * X + (
1 - self.BCN_var) * Y
out = self.gamma* out + self.beta
return out
if __name__ == '__main__':
block = BatchChannelNorm(num_channels=64)
input = torch.rand(64, 64, 9, 9)
output = block(input)
print(input.size())
print(output.size())

View File

@ -0,0 +1,31 @@
import torch
import torch.nn as nn
from pytorch_wavelets import DWTForward
class Down_wt(nn.Module):
def __init__(self, in_ch, out_ch):
super(Down_wt, self).__init__()
self.wt = DWTForward(J=1, mode='zero', wave='haar')
self.conv_bn_relu = nn.Sequential(
nn.Conv2d(in_ch * 4, out_ch, kernel_size=1, stride=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(inplace=True),
)
def forward(self, x):
yL, yH = self.wt(x)
y_HL = yH[0][:, :, 0, ::]
y_LH = yH[0][:, :, 1, ::]
y_HH = yH[0][:, :, 2, ::]
x = torch.cat([yL, y_HL, y_LH, y_HH], dim=1)
x = self.conv_bn_relu(x)
return x
# 输入 N C H W, 输出 N C H W
if __name__ == '__main__':
block = Down_wt(64, 64) # 输入通道数,输出通道数
input = torch.rand(3, 64, 64, 64)
output = block(input)
print(output.size())

View File

@ -0,0 +1,128 @@
import torch
from torch import nn
import math
"""
这个模块实现了一个称为"MCALayer"Multi-modal Channel Attention Layer的注意力机制它主要用于增强神经网络在不同通道之间的交互和信息整合MCALayer具有以下几个关键组件和特点
MCAGate模块MCALayer包含了MCAGate模块这是一个多模态的注意力机制它利用池化操作平均池化最大池化标准差池化来提取不同的通道间的特征信息这些不同类型的池化操作有助于捕捉通道间的不同统计特性
通道间的交互MCALayer具有三种不同类型的通道间交互方式分别是水平-通道h-cw垂直-通道w-hc和通道-通道c-hw交互这些交互方式分别针对不同的维度有助于模型更好地理解和整合不同通道之间的信息
空间维度的处理根据no_spatial参数的设置MCALayer可以选择是否进行空间维度上的交互如果no_spatial为True只会进行通道间的交互如果为False还会进行空间维度上的交互
权重融合在不同的通道交互之后MCALayer使用权重融合来整合不同池化方式的信息通过学习的方式模型可以决定如何分配不同池化方式的重要性
多尺度核大小MCALayer中的核大小会根据输入通道数自动选择以增强模块的适应性
总的来说MCALayer通过多模态的注意力机制引入不同类型的通道交互和池化操作从而可以更好地捕捉特征之间的关系提高模型的特征表示能力有助于在计算机视觉任务中提高性能如图像分类目标检测和语义分割此外MCALayer的模块化设计使得它可以方便地嵌入到神经网络中以增强模型的特征提取和表示能力
"""
__all__ = ['MCALayer', 'MCAGate']
class StdPool(nn.Module):
def __init__(self):
super(StdPool, self).__init__()
def forward(self, x):
b, c, _, _ = x.size()
std = x.view(b, c, -1).std(dim=2, keepdim=True)
std = std.reshape(b, c, 1, 1)
return std
class MCAGate(nn.Module):
def __init__(self, k_size, pool_types=['avg', 'std']):
"""Constructs a MCAGate module.
Args:
k_size: kernel size
pool_types: pooling type. 'avg': average pooling, 'max': max pooling, 'std': standard deviation pooling.
"""
super(MCAGate, self).__init__()
self.pools = nn.ModuleList([])
for pool_type in pool_types:
if pool_type == 'avg':
self.pools.append(nn.AdaptiveAvgPool2d(1))
elif pool_type == 'max':
self.pools.append(nn.AdaptiveMaxPool2d(1))
elif pool_type == 'std':
self.pools.append(StdPool())
else:
raise NotImplementedError
self.conv = nn.Conv2d(1, 1, kernel_size=(1, k_size), stride=1, padding=(0, (k_size - 1) // 2), bias=False)
self.sigmoid = nn.Sigmoid()
self.weight = nn.Parameter(torch.rand(2))
def forward(self, x):
feats = [pool(x) for pool in self.pools]
if len(feats) == 1:
out = feats[0]
elif len(feats) == 2:
weight = torch.sigmoid(self.weight)
out = 1 / 2 * (feats[0] + feats[1]) + weight[0] * feats[0] + weight[1] * feats[1]
else:
assert False, "Feature Extraction Exception!"
out = out.permute(0, 3, 2, 1).contiguous()
out = self.conv(out)
out = out.permute(0, 3, 2, 1).contiguous()
out = self.sigmoid(out)
out = out.expand_as(x)
return x * out
class MCALayer(nn.Module):
def __init__(self, inp, no_spatial=False):
"""Constructs a MCA module.
Args:
inp: Number of channels of the input feature maps
no_spatial: whether to build channel dimension interactions
"""
super(MCALayer, self).__init__()
lambd = 1.5
gamma = 1
temp = round(abs((math.log2(inp) - gamma) / lambd))
kernel = temp if temp % 2 else temp - 1
self.h_cw = MCAGate(3)
self.w_hc = MCAGate(3)
self.no_spatial = no_spatial
if not no_spatial:
self.c_hw = MCAGate(kernel)
def forward(self, x):
x_h = x.permute(0, 2, 1, 3).contiguous()
x_h = self.h_cw(x_h)
x_h = x_h.permute(0, 2, 1, 3).contiguous()
x_w = x.permute(0, 3, 2, 1).contiguous()
x_w = self.w_hc(x_w)
x_w = x_w.permute(0, 3, 2, 1).contiguous()
if not self.no_spatial:
x_c = self.c_hw(x)
x_out = 1 / 3 * (x_c + x_h + x_w)
else:
x_out = 1 / 2 * (x_h + x_w)
return x_out
if __name__ == '__main__':
block = MCALayer(inp=64)
input = torch.rand(64, 64, 9, 9)
output = block(input)
print(input.size())
print(output.size())

View File

@ -0,0 +1,81 @@
import torch
import torch.nn as nn
from mmengine.model import BaseModule
"""
这个代码实现了一个称为"MSCAAttention"Multi-Scale Channel Attention的注意力模块这种注意力模块的主要作用是增强神经网络在特定通道和空间维度上的感知能力从而有助于提取更加丰富和有用的特征
这个注意力模块的特点如下
多尺度特征提取它使用了多个卷积核大小和填充的卷积操作以提取不同尺度的特征信息这些卷积操作包括一个具有较大卷积核的初始卷积 (self.conv0) 和多个后续的卷积操作self.conv0_1self.conv0_2self.conv1_1self.conv1_2self.conv2_1self.conv2_2每个都针对不同的核大小和填充
通道混合在提取多尺度特征之后通过对这些特征进行通道混合来整合不同尺度的信息通道混合操作由最后一个卷积层 self.conv3 完成
卷积注意力最后通过将通道混合后的特征与输入特征进行逐元素乘法实现了一种卷积注意力机制这意味着模块通过对不同通道的特征赋予不同的权重来选择性地强调或抑制输入特征
总的来说MSCAAttention的主要作用是增强特征图的表示能力它能够自动学习特定通道和空间位置的重要性从而更好地捕捉图像或特征图中的关键信息这有助于改善模型在各种计算机视觉任务中的性能例如图像分类目标检测和语义分割
"""
class MSCAAttention(BaseModule):
def __init__(self,
channels,
kernel_sizes=[5, [1, 7], [1, 11], [1, 21]],
paddings=[2, [0, 3], [0, 5], [0, 10]]):
super().__init__()
self.conv0 = nn.Conv2d(
channels,
channels,
kernel_size=kernel_sizes[0],
padding=paddings[0],
groups=channels)
for i, (kernel_size,
padding) in enumerate(zip(kernel_sizes[1:], paddings[1:])):
kernel_size_ = [kernel_size, kernel_size[::-1]]
padding_ = [padding, padding[::-1]]
conv_name = [f'conv{i}_1', f'conv{i}_2']
for i_kernel, i_pad, i_conv in zip(kernel_size_, padding_,
conv_name):
self.add_module(
i_conv,
nn.Conv2d(
channels,
channels,
tuple(i_kernel),
padding=i_pad,
groups=channels))
self.conv3 = nn.Conv2d(channels, channels, 1)
def forward(self, x):
"""Forward function."""
u = x.clone()
attn = self.conv0(x)
# Multi-Scale Feature extraction
attn_0 = self.conv0_1(attn)
attn_0 = self.conv0_2(attn_0)
attn_1 = self.conv1_1(attn)
attn_1 = self.conv1_2(attn_1)
attn_2 = self.conv2_1(attn)
attn_2 = self.conv2_2(attn_2)
attn = attn + attn_0 + attn_1 + attn_2
# Channel Mixing
attn = self.conv3(attn)
# Convolutional Attention
x = attn * u
return x
if __name__ == '__main__':
block = MSCAAttention(channels=64)
input = torch.rand(64, 64, 9, 9)
output = block(input)
print(input.size())
print(output.size())

View File

@ -0,0 +1,68 @@
import torch
from torch import nn
"""
个代码实现了一个名为"TalkingHeadAttn"的自注意力模块Self-Attention主要用于增强神经网络在输入序列上的特征表示和建模以下是这个自注意力模块的关键部分和特点
多头自注意力这个模块使用了多头自注意力机制通过将输入数据进行不同方式的投影来构建多个注意力头num_heads参数指定了注意力头的数量每个头将学习捕捉输入序列中不同的特征关系
查询--QKV投影模块使用线性变换nn.Linear将输入 x 投影到查询QK和值V的空间这个投影操作是通过self.qkv完成的注意为了提高计算效率一次性生成了三个部分的投影结果
注意力计算通过计算 Q K 的点积然后应用 Softmax 操作得到了注意力矩阵表示了输入序列中各个位置之间的关联程度这个计算是通过 attn = q @ k.transpose(-2, -1) attn = attn.softmax(dim=-1) 完成的
多头特征整合多头注意力的输出被整合在一起通过乘以值V矩阵并进行线性变换将多个头的结果整合到一起这个整合过程包括了投影 self.proj_l self.proj_w 操作
Dropout正则化在注意力计算和投影操作之后使用 Dropout 来进行正则化减少过拟合风险
输出最终的输出是通过 self.proj self.proj_drop 完成的
总的来说TalkingHeadAttn模块通过多头自注意力机制能够同时考虑输入序列中不同位置之间的关系以及不同的特征关系这有助于提高模型在序列数据上的特征提取和建模能力使其在自然语言处理和其他序列数据任务中表现出色这个模块通常作为大型神经网络模型的子模块用于处理序列数据
"""
class TalkingHeadAttn(nn.Module):
def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.):
super().__init__()
self.num_heads = num_heads
head_dim = dim // num_heads
self.scale = head_dim ** -0.5
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_l = nn.Linear(num_heads, num_heads)
self.proj_w = nn.Linear(num_heads, num_heads)
self.proj_drop = nn.Dropout(proj_drop)
def forward(self, x):
B, N, C = x.shape
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
q, k, v = qkv[0] * self.scale, qkv[1], qkv[2]
attn = q @ k.transpose(-2, -1)
attn = self.proj_l(attn.permute(0, 2, 3, 1)).permute(0, 3, 1, 2)
attn = attn.softmax(dim=-1)
attn = self.proj_w(attn.permute(0, 2, 3, 1)).permute(0, 3, 1, 2)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
x = self.proj(x)
x = self.proj_drop(x)
return x
if __name__ == '__main__':
block = TalkingHeadAttn(dim=128)
input = torch.rand(32, 784, 128)
output = block(input)
print(input.size())
print(output.size())

View File

@ -0,0 +1,60 @@
# https://github.com/damo-cv/KVT
"""
以下是该模块的主要组件和操作
qkv这是一个线性层将输入特征 x 映射到三个不同的线性变换分别对应查询 (query) (key)和值 (value)这三个变换将输入特征的通道划分成多个头 (heads)
attn_drop proj_drop这是用于进行注意力矩阵和输出特征的丢弃操作的 Dropout
topk这是一个超参数表示要选择每个查询的前 k 个最相关的键它控制了 k-最近邻注意力机制的行为
在前向传播过程中该模块首先将输入特征 x 映射为查询键和值然后通过矩阵乘法操作计算注意力矩阵但注意力矩阵的计算在这里进行了修改具体来说它使用 torch.topk 函数来选择每个查询的前 k 个最相关的键然后将其余的注意力权重设为负无穷大以实现 k-最近邻注意力机制之后应用 softmax 归一化得到最终的注意力矩阵最后利用注意力矩阵对值进行加权平均得到最终的输出特征
这个模块的核心思想是在计算注意力时仅考虑与每个查询最相关的 k 个键从而减少计算复杂度并提高效率这对于处理大规模数据或具有长序列的模型特别有用
"""
import torch
import torch.nn as nn
class kNNAttention(nn.Module):
def __init__(self, dim, n um_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.,topk=100):
super().__init__()
self.num_heads = num_heads
head_dim = dim // num_heads
# NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
self.scale = qk_scale or head_dim ** -0.5
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
self.attn_drop = nn.Dropout(attn_drop)
self.proj = nn.Linear(dim, dim)
self.proj_drop = nn.Dropout(proj_drop)
self.topk = topk
def forward(self, x):
B, N, C = x.shape
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple)
attn = (q @ k.transpose(-2, -1)) * self.scale
# the core code block
mask=torch.zeros(B,self.num_heads,N,N,device=x.device,requires_grad=False)
index=torch.topk(attn,k=self.topk,dim=-1,largest=True)[1]
mask.scatter_(-1,index,1.)
attn=torch.where(mask>0, attn,torch.full_like(attn, float('-inf')))
# end of the core code block
attn = attn.softmax(dim=-1)
attn = self.attn_drop(attn)
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
x = self.proj(x)
x = self.proj_drop(x)
return x
if __name__ == '__main__':
block = kNNAttention(dim=128)
input = torch.rand(32,784,128)
output = block(input)
print(input.size())
print(output.size())

View File

@ -0,0 +1,59 @@
# https://github.com/ZjjConan/SimAM
"""
该模块的目的是增强图像特征之间的关系以提高模型的表现
以下是模块的主要组件和功能
初始化在初始化过程中模块接受一个参数 e_lambda它是一个小的正数默认为1e-4e_lambda 用于避免分母为零的情况以确保数值稳定性此外模块还创建了一个 Sigmoid 激活函数 act
前向传播在前向传播中模块执行以下步骤
计算输入张量 x 的形状信息包括批量大小 b通道数 c高度 h 和宽度 w
计算像素点的数量 n即图像的高度和宽度的乘积减去1减1是因为在计算方差时要排除一个像素的均值
计算每个像素点与均值的差的平方 (x - x.mean(dim=[2, 3], keepdim=True)).pow(2)这样可以得到差的平方矩阵
计算分母部分 (x_minus_mu_square.sum(dim=[2, 3], keepdim=True) / n + self.e_lambda)并加上小的正数 e_lambda 以确保分母不为零
计算 y通过将差的平方矩阵除以分母部分然后加上0.5这个操作应用了 Sigmoid 函数将结果限制在0到1之间
最后将输入张量 x y 经过 Sigmoid 激活后的结果相乘以产生最终的输出
SIMAM 模块的关键思想是计算每个像素点的特征值与均值之间的关系并通过 Sigmoid 激活函数来调整这种关系从而增强特征之间的互动性这有助于捕获图像中不同位置之间的关系有助于提高模型性能
"""
import torch
import torch.nn as nn
from thop import profile
from einops import rearrange
def to_3d(x):
return rearrange(x, 'b c h w -> b (h w) c')
def to_4d(x,h,w):
return rearrange(x, 'b (h w) c -> b c h w', h=h, w=w)
class Simam_module(torch.nn.Module):
def __init__(self, e_lambda=1e-4):
super(Simam_module, self).__init__()
self.act = nn.Sigmoid()
self.e_lambda = e_lambda
def forward(self, x):
b, c, h, w = x.size()
n = w * h - 1
x_minus_mu_square = (x - x.mean(dim=[2, 3], keepdim=True)).pow(2)
y = x_minus_mu_square / (4 * (x_minus_mu_square.sum(dim=[2, 3], keepdim=True) / n + self.e_lambda)) + 0.5
return x * self.act(y)
# 输入 N C H W, 输出 N C H W
if __name__ == '__main__':
model = Simam_module().cuda()
# x = torch.randn(1, 3, 64, 64).cuda()
x = torch.randn(32, 784, 128).cuda()
x = to_4d(x,h=28,w=28)
y = model(x)
y = to_3d(y)
print(y.shape)
flops, params = profile(model, (x,))
print(flops / 1e9)
print(params)

Binary file not shown.

View File

@ -0,0 +1,9 @@
from attention.A2Atttention import DoubleAttention
import torch
from torch import nn
from torch.nn import functional as F
input=torch.randn(50,512,7,7)
a2 = DoubleAttention(512,128,128,True)
output=a2(input)
print(output.shape)

View File

@ -0,0 +1,8 @@
from model.attention.ACmix import ACmix
import torch
if __name__ == '__main__':
input = torch.randn(50, 256, 7, 7)
acmix = ACmix(in_planes=256, out_planes=256)
output = acmix(input)
print(output.shape)

View File

@ -0,0 +1,9 @@
from attention.AFT import AFT_FULL
import torch
from torch import nn
from torch.nn import functional as F
input=torch.randn(50,49,512)
aft_full = AFT_FULL(d_model=512, n=49)
output=aft_full(input)
print(output.shape)

View File

@ -0,0 +1,12 @@
from model.attention.Axial_attention import AxialImageTransformer
import torch
if __name__ == '__main__':
input = torch.randn(3, 128, 7, 7)
model = AxialImageTransformer(
dim=128,
depth=12,
reversible=True
)
outputs = model(input)
print(outputs.shape)

View File

@ -0,0 +1,18 @@
"""
BAM: Bottleneck Attention Module---BMCV2018
论文地址https://arxiv.org/pdf/1807.06514.pdf
这是CBAM同作者同时期的工作工作与CBAM非常相似也是双重Attention不同的是CBAM是将两个attention的结果串联而BAM是直接将两个attention矩阵进行相加
Channel Attention方面与SE的结构基本一样Spatial Attention方面还是在通道维度进行pool然后用了两次3x3的空洞卷积最后将用一次1x1的卷积得到Spatial Attention的矩阵
最后Channel Attention和Spatial Attention矩阵进行相加这里用到了广播机制并进行归一化这样一来就得到了空间和通道结合的attention矩阵
"""
from attention.BAM import BAMBlock
import torch
input = torch.randn(50, 512, 7, 7)
bam = BAMBlock(channel=512, reduction=16, dia_val=2)
output = bam(input)
print(output.shape)

View File

@ -0,0 +1,20 @@
"""
CBAM: Convolutional Block Attention Module---ECCV2018
论文地址https://openaccess.thecvf.com/content_ECCV_2018/papers/Sanghyun_Woo_Convolutional_Block_Attention_ECCV_2018_paper.pdf
这是ECCV2018的一篇论文这篇文章同时使用了Channel Attention和Spatial Attention将两者进行了串联文章也做了并联和两种串联方式的消融实验
Channel Attention方面大致结构还是和SE相似不过作者提出AvgPool和MaxPool有不同的表示效果所以作者对原来的特征在Spatial维度分别进行了AvgPool和MaxPool
后用SE的结构提取channel attention注意这里是参数共享的然后将两个特征相加后做归一化就得到了注意力矩阵
Spatial Attention和Channel Attention类似先在channel维度进行两种pool后将两个特征进行拼接然后用7x7的卷积来提取Spatial Attention
之所以用7x7是因为提取的是空间注意力所以用的卷积核必须足够大然后做一次归一化就得到了空间的注意力矩阵
"""
from attention.CBAM import CBAMBlock
import torch
input = torch.randn(50, 512, 7, 7)
kernel_size = input.shape[2]
cbam = CBAMBlock(channel=512, reduction=16, kernel_size=kernel_size)
output = cbam(input)
print(output.shape)

View File

@ -0,0 +1,10 @@
from attention.CoAtNet import CoAtNet
import torch
from torch import nn
from torch.nn import functional as F
input=torch.randn(1,3,224,224)
mbconv=CoAtNet(in_ch=3,image_size=224)
out=mbconv(input)
print(out.shape)

View File

@ -0,0 +1,12 @@
from attention.CoTAttention import CoTAttention
import torch
from torch import nn
from torch.nn import functional as F
input=torch.randn(50,512,7,7)
cot = CoTAttention(dim=512,kernel_size=3)
output=cot(input)
print(output.shape)

Some files were not shown because too many files have changed in this diff Show More