“提交项目”
This commit is contained in:
commit
63abdae2a9
|
@ -0,0 +1,8 @@
|
||||||
|
# 默认忽略的文件
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# 基于编辑器的 HTTP 客户端请求
|
||||||
|
/httpRequests/
|
||||||
|
# Datasource local storage ignored files
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
|
@ -0,0 +1,119 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="PublishConfigData" remoteFilesAllowedToDisappearOnAutoupload="false">
|
||||||
|
<serverData>
|
||||||
|
<paths name="root@123.125.240.150:45809">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@124.16.151.196:10341">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@124.16.151.196:10341 password">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@222.187.226.110:28961">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@connect.east.seetacloud.com:15907">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@connect.east.seetacloud.com:26749">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@connect.east.seetacloud.com:26749 (2)">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@connect.east.seetacloud.com:26749 (3)">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@connect.east.seetacloud.com:26749 (4)">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@connect.east.seetacloud.com:26749 (5)">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@connect.east.seetacloud.com:26749 password">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-42.seetacloud.com:12154">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-42.seetacloud.com:14975">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-42.seetacloud.com:34252">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-8.seetacloud.com:35693">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-8.seetacloud.com:35693 (2)">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
</serverData>
|
||||||
|
</component>
|
||||||
|
</project>
|
|
@ -0,0 +1,27 @@
|
||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<profile version="1.0">
|
||||||
|
<option name="myName" value="Project Default" />
|
||||||
|
<inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
|
||||||
|
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
||||||
|
<option name="ignoredPackages">
|
||||||
|
<value>
|
||||||
|
<list size="12">
|
||||||
|
<item index="0" class="java.lang.String" itemvalue="sklearn" />
|
||||||
|
<item index="1" class="java.lang.String" itemvalue="tqdm" />
|
||||||
|
<item index="2" class="java.lang.String" itemvalue="scipy" />
|
||||||
|
<item index="3" class="java.lang.String" itemvalue="h5py" />
|
||||||
|
<item index="4" class="java.lang.String" itemvalue="matplotlib" />
|
||||||
|
<item index="5" class="java.lang.String" itemvalue="torch" />
|
||||||
|
<item index="6" class="java.lang.String" itemvalue="numpy" />
|
||||||
|
<item index="7" class="java.lang.String" itemvalue="torchvision" />
|
||||||
|
<item index="8" class="java.lang.String" itemvalue="opencv_python" />
|
||||||
|
<item index="9" class="java.lang.String" itemvalue="Pillow" />
|
||||||
|
<item index="10" class="java.lang.String" itemvalue="lxml" />
|
||||||
|
<item index="11" class="java.lang.String" itemvalue="requests" />
|
||||||
|
</list>
|
||||||
|
</value>
|
||||||
|
</option>
|
||||||
|
</inspection_tool>
|
||||||
|
<inspection_tool class="PyPep8NamingInspection" enabled="false" level="WEAK WARNING" enabled_by_default="false" />
|
||||||
|
</profile>
|
||||||
|
</component>
|
|
@ -0,0 +1,6 @@
|
||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<settings>
|
||||||
|
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||||
|
<version value="1.0" />
|
||||||
|
</settings>
|
||||||
|
</component>
|
|
@ -0,0 +1,4 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9" project-jdk-type="Python SDK" />
|
||||||
|
</project>
|
|
@ -0,0 +1,8 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/pytorch_segmentation.iml" filepath="$PROJECT_DIR$/.idea/pytorch_segmentation.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
|
@ -0,0 +1,15 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$" />
|
||||||
|
<orderEntry type="inheritedJdk" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
<component name="PyDocumentationSettings">
|
||||||
|
<option name="format" value="GOOGLE" />
|
||||||
|
<option name="myDocStringFormat" value="Google" />
|
||||||
|
</component>
|
||||||
|
<component name="TestRunnerService">
|
||||||
|
<option name="PROJECT_TEST_RUNNER" value="py.test" />
|
||||||
|
</component>
|
||||||
|
</module>
|
|
@ -0,0 +1,12 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$" />
|
||||||
|
<orderEntry type="jdk" jdkName="Python 3.9 (pytorch)" jdkType="Python SDK" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
<component name="PyDocumentationSettings">
|
||||||
|
<option name="format" value="PLAIN" />
|
||||||
|
<option name="myDocStringFormat" value="Plain" />
|
||||||
|
</component>
|
||||||
|
</module>
|
|
@ -0,0 +1,140 @@
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class ActivateFunc():
|
||||||
|
def __init__(self, x, b=1, lamb=2, alpha=1, a=2):
|
||||||
|
super(ActivateFunc, self).__init__()
|
||||||
|
self.x = x
|
||||||
|
self.b = b
|
||||||
|
self.lamb = lamb
|
||||||
|
self.alpha = alpha
|
||||||
|
self.a = a
|
||||||
|
|
||||||
|
def __init__(self, x, b=1, lamb=2, alpha=1, a=2):
|
||||||
|
super(ActivateFunc, self).__init__()
|
||||||
|
self.x = x
|
||||||
|
self.b = b
|
||||||
|
self.lamb = lamb
|
||||||
|
self.alpha = alpha
|
||||||
|
self.a = a
|
||||||
|
|
||||||
|
def Sigmoid(self):
|
||||||
|
y = np.exp(self.x) / (np.exp(self.x) + 1)
|
||||||
|
y_grad = y*(1-y)
|
||||||
|
return [y, y_grad]
|
||||||
|
|
||||||
|
def Tanh(self):
|
||||||
|
y = np.tanh(self.x)
|
||||||
|
y_grad = 1 - y * y
|
||||||
|
return [y, y_grad]
|
||||||
|
|
||||||
|
def Swish(self): #b是一个常数,指定b
|
||||||
|
y = self.x * (np.exp(self.b*self.x) / (np.exp(self.b*self.x) + 1))
|
||||||
|
y_grad = np.exp(self.b*self.x)/(1+np.exp(self.b*self.x)) + self.x * (self.b*np.exp(self.b*self.x) / ((1+np.exp(self.b*self.x))*(1+np.exp(self.b*self.x))))
|
||||||
|
return [y, y_grad]
|
||||||
|
|
||||||
|
def ELU(self): # alpha是个常数,指定alpha
|
||||||
|
y = np.where(self.x > 0, self.x, self.alpha * (np.exp(self.x) - 1))
|
||||||
|
y_grad = np.where(self.x > 0, 1, self.alpha * np.exp(self.x))
|
||||||
|
return [y, y_grad]
|
||||||
|
|
||||||
|
def SELU(self): # lamb大于1,指定lamb和alpha
|
||||||
|
y = np.where(self.x > 0, self.lamb * self.x, self.lamb * self.alpha * (np.exp(self.x) - 1))
|
||||||
|
y_grad = np.where(self.x > 0, self.lamb*1, self.lamb * self.alpha * np.exp(self.x))
|
||||||
|
return [y, y_grad]
|
||||||
|
|
||||||
|
def ReLU(self):
|
||||||
|
y = np.where(self.x < 0, 0, self.x)
|
||||||
|
y_grad = np.where(self.x < 0, 0, 1)
|
||||||
|
return [y, y_grad]
|
||||||
|
|
||||||
|
def PReLU(self): # a大于1,指定a
|
||||||
|
y = np.where(self.x < 0, self.x / self.a, self.x)
|
||||||
|
y_grad = np.where(self.x < 0, 1 / self.a, 1)
|
||||||
|
return [y, y_grad]
|
||||||
|
|
||||||
|
def LeakyReLU(self): # a大于1,指定a
|
||||||
|
y = np.where(self.x < 0, self.x / self.a, self.x)
|
||||||
|
y_grad = np.where(self.x < 0, 1 / self.a, 1)
|
||||||
|
return [y, y_grad]
|
||||||
|
|
||||||
|
def Mish(self):
|
||||||
|
f = 1 + np.exp(x)
|
||||||
|
y = self.x * ((f*f-1) / (f*f+1))
|
||||||
|
y_grad = (f*f-1) / (f*f+1) + self.x*(4*f*(f-1)) / ((f*f+1)*(f*f+1))
|
||||||
|
return [y, y_grad]
|
||||||
|
|
||||||
|
def ReLU6(self):
|
||||||
|
y = np.where(np.where(self.x < 0, 0, self.x) > 6, 6, np.where(self.x < 0, 0, self.x))
|
||||||
|
y_grad = np.where(self.x > 6, 0, np.where(self.x < 0, 0, 1))
|
||||||
|
return [y, y_grad]
|
||||||
|
|
||||||
|
def Hard_Swish(self):
|
||||||
|
f = self.x + 3
|
||||||
|
relu6 = np.where(np.where(f < 0, 0, f) > 6, 6, np.where(f < 0, 0, f))
|
||||||
|
relu6_grad = np.where(f > 6, 0, np.where(f < 0, 0, 1))
|
||||||
|
y = self.x * relu6 / 6
|
||||||
|
y_grad = relu6 / 6 + self.x * relu6_grad / 6
|
||||||
|
return [y, y_grad]
|
||||||
|
|
||||||
|
def Hard_Sigmoid(self):
|
||||||
|
f = (2 * self.x + 5) / 10
|
||||||
|
y = np.where(np.where(f > 1, 1, f) < 0, 0, np.where(f > 1, 1, f))
|
||||||
|
y_grad = np.where(f > 0, np.where(f >= 1, 0, 1 / 5), 0)
|
||||||
|
return [y, y_grad]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def PlotActiFunc(x, y, title):
|
||||||
|
plt.grid(which='minor', alpha=0.2)
|
||||||
|
plt.grid(which='major', alpha=0.5)
|
||||||
|
plt.plot(x, y)
|
||||||
|
plt.title(title)
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
|
||||||
|
def PlotMultiFunc(x, y):
|
||||||
|
plt.grid(which='minor', alpha=0.2)
|
||||||
|
plt.grid(which='major', alpha=0.5)
|
||||||
|
plt.plot(x, y)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
x = np.arange(-10, 10, 0.01)
|
||||||
|
activateFunc = ActivateFunc(x)
|
||||||
|
activateFunc.b = 1
|
||||||
|
|
||||||
|
PlotActiFunc(x, activateFunc.Sigmoid()[0], title='Sigmoid')
|
||||||
|
PlotActiFunc(x, activateFunc.Tanh()[0], title='Tanh')
|
||||||
|
PlotActiFunc(x, activateFunc.ReLU()[0], title='ReLU')
|
||||||
|
PlotActiFunc(x, activateFunc.LeakyReLU()[0], title='LeakyReLU')
|
||||||
|
PlotActiFunc(x, activateFunc.ReLU6()[0], title='ReLU6')
|
||||||
|
PlotActiFunc(x, activateFunc.Swish()[0], title='Swish')
|
||||||
|
PlotActiFunc(x, activateFunc.Mish()[0], title='Mish')
|
||||||
|
PlotActiFunc(x, activateFunc.ELU()[0], title='ELU')
|
||||||
|
PlotActiFunc(x, activateFunc.Hard_Swish()[0], title='Hard_Swish')
|
||||||
|
PlotActiFunc(x, activateFunc.Hard_Sigmoid()[0], title='Hard_Sigmoid')
|
||||||
|
|
||||||
|
plt.figure(1)
|
||||||
|
PlotMultiFunc(x, activateFunc.Swish()[0])
|
||||||
|
PlotMultiFunc(x, activateFunc.Mish()[0])
|
||||||
|
plt.legend(['Swish', 'Mish'])
|
||||||
|
|
||||||
|
plt.figure(2)
|
||||||
|
PlotMultiFunc(x, activateFunc.Swish()[0])
|
||||||
|
PlotMultiFunc(x, activateFunc.Hard_Swish()[0])
|
||||||
|
plt.legend(['Swish', 'Hard-Swish'])
|
||||||
|
|
||||||
|
plt.figure(3)
|
||||||
|
PlotMultiFunc(x, activateFunc.Sigmoid()[0])
|
||||||
|
PlotMultiFunc(x, activateFunc.Hard_Sigmoid()[0])
|
||||||
|
plt.legend(['Sigmoid', 'Hard-Sigmoid'])
|
||||||
|
|
||||||
|
plt.figure(4)
|
||||||
|
PlotMultiFunc(x, activateFunc.ReLU()[0])
|
||||||
|
PlotMultiFunc(x, activateFunc.ReLU6()[0])
|
||||||
|
plt.legend(['ReLU', 'ReLU6'])
|
||||||
|
|
||||||
|
plt.show()
|
||||||
|
|
|
@ -0,0 +1,31 @@
|
||||||
|
"""
|
||||||
|
|
||||||
|
ReLU函数:
|
||||||
|
|
||||||
|
1、ReLU 函数在正输入时是线性的,收敛速度快,计算速度快,同时符合恒等性的特点。当输入为正时,由于导数是1,能够完整传递梯度,不存在梯度消失的问题(梯度饱和问题)。
|
||||||
|
|
||||||
|
2、计算速度快。ReLU 函数中只存在线性关系且无论是函数还是其导数都不包含复杂的数学运算,因此它的计算速度比 sigmoid 和 tanh 更快。
|
||||||
|
|
||||||
|
3、当输入大于0时,梯度为1,能够有效避免链式求导法则梯度相乘引起的梯度消失和梯度爆炸;计算成本低。
|
||||||
|
|
||||||
|
4、它保留了 step 函数的生物学启发(只有输入超出阈值时神经元才激活),不过当输入为正的时候,导数不为零,从而允许基于梯度的学习(尽管在 x=0 的时候,导数是未定义的)。当输入为负值的时候,ReLU 的学习速度可能会变得很慢,甚至使神经元直接无效,因为此时输入小于零而梯度为零,从而其权重无法得到更新,在剩下的训练过程中会一直保持静默。
|
||||||
|
|
||||||
|
ReLU不足:
|
||||||
|
|
||||||
|
1、ReLU的输入值为负的时候,输出始终为0,其一阶导数也始终为0,这样会导致神经元不能更新参数,也就是神经元不学习了,这种现象叫做“Dead Neuron”。为了解决ReLU函数这个缺点,在ReLU函数的负半区间引入一个泄露(Leaky)值,所以称为Leaky ReLU函数。
|
||||||
|
|
||||||
|
2、与Sigmoid一样,其输出不是以0为中心的(ReLU的输出为0或正数)。
|
||||||
|
|
||||||
|
3、ReLU在小于0的时候梯度为零,导致了某些神经元永远被抑制,最终造成特征的学习不充分;这是典型的 Dead ReLU 问题,所以需要改进随机初始化,避免将过多的负数特征送入ReLU。
|
||||||
|
|
||||||
|
"""
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
# Relu函数
|
||||||
|
print('*' * 25 + "Relu函数" + "*" * 25)
|
||||||
|
m = nn.ReLU()
|
||||||
|
input = torch.randn(2)
|
||||||
|
print("原:", input)
|
||||||
|
print("结果:", m(input))
|
||||||
|
print('*' * 50)
|
|
@ -0,0 +1,31 @@
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
# Sigmoid函数
|
||||||
|
print('*' * 25 + "Sigmoid函数" + "*" * 25)
|
||||||
|
m = nn.Sigmoid()
|
||||||
|
input = torch.randn(2)
|
||||||
|
print("原:", input)
|
||||||
|
print("结果:", m(input))
|
||||||
|
print('*' * 50)
|
||||||
|
|
||||||
|
"""
|
||||||
|
Sigmoid优点:
|
||||||
|
|
||||||
|
1、其值域为[0,1],非常适合作为模型的输出函数用于输出一个(0,1)范围内的概率值,可用于将预测概率作为输出的模型,比如用于表示二分类的类别或者用于表示置信度。
|
||||||
|
|
||||||
|
2、Sigmoid 函数的输出范围是 0 到 1。由于输出值限定在0到1,因此它对每个神经元的输出进行了归一化。
|
||||||
|
|
||||||
|
3、该函数是连续可导的(即可微),可以提供非常平滑的梯度值,防止模型训练过程中出现突变的梯度(即避免「跳跃」的输出值)。
|
||||||
|
|
||||||
|
Sigmoid不足:
|
||||||
|
|
||||||
|
1、从其导数的函数图像上可以看到,其导数的最大值只有0.25,而且当x在[-5,5]的范围外时其导数值就已经几乎接近于0了。这种情况会导致训练过程中神经元处于一种饱和状态,反向传播时其权重几乎得不到更新,从而使得模型变得难以训练,这种现象被称为梯度消失问题。
|
||||||
|
|
||||||
|
2、其输出不是以0为中心而是都大于0的(这会降低权重更新的效率),这样下一层的神经元会得到上一层输出的全正信号作为输入,所以Sigmoid激活函数不适合放在神经网络的前面层而一般是放在最后的输出层中使用。
|
||||||
|
|
||||||
|
3、需要进行指数运算(计算机运行得较慢),计算量大及计算复杂度高,训练耗时;指数的越大其倒数就越小,容易产生梯度消失。
|
||||||
|
————————————————
|
||||||
|
版权声明:本文为CSDN博主「小wu学cv」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
|
||||||
|
原文链接:https://blog.csdn.net/caip12999203000/article/details/127067360
|
||||||
|
"""
|
|
@ -0,0 +1,31 @@
|
||||||
|
"""
|
||||||
|
Tanh优点:
|
||||||
|
|
||||||
|
1、在分类任务中,双曲正切函数(Tanh)逐渐取代 Sigmoid 函数作为标准的激活函数,其具有很多神经网络所钟爱的特征。它是完全可微分的,反对称,对称中心在原点。
|
||||||
|
|
||||||
|
2、输出是S型曲线,具备打破网络层与网络层之间的线性关系,可以把网络层输出非线形地映射到 (−1,1) 区间里。负输入将被强映射为负,而零输入被映射为接近零;tanh 的输出间隔为1且值域是以0为中心的[-1,1](可以解决Sigmoid激活函数输出不以0为中心的问题。)
|
||||||
|
|
||||||
|
3、在一般的二元分类问题中,tanh 函数用于隐藏层,而 sigmoid 函数用于输出层,但这并不是固定的,需要根据特定问题进行调整。
|
||||||
|
|
||||||
|
Tanh不足:
|
||||||
|
|
||||||
|
1、当输入较大或较小时,输出几乎是平滑的并且梯度较小,这不利于权重更新。
|
||||||
|
|
||||||
|
2、Tanh函数也需要进行指数运算,所以其也会存在计算复杂度高且计算量大的问题。
|
||||||
|
|
||||||
|
3、当神经网络的层数增多的时候,由于在进行反向传播的时候,链式求导,多项相乘,函数进入饱和区(导数接近于零的地方)就会逐层传递,这种现象被称为梯度消失。
|
||||||
|
————————————————
|
||||||
|
版权声明:本文为CSDN博主「小wu学cv」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
|
||||||
|
原文链接:https://blog.csdn.net/caip12999203000/article/details/127067360
|
||||||
|
|
||||||
|
"""
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
# Tanh函数
|
||||||
|
print('*' * 25 + "Tanh函数" + "*" * 25)
|
||||||
|
m = nn.Tanh()
|
||||||
|
input = torch.randn(2)
|
||||||
|
print("原:", input)
|
||||||
|
print("结果:", m(input))
|
||||||
|
print('*' * 50)
|
|
@ -0,0 +1,325 @@
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.optim as optim
|
||||||
|
from torch.utils.data import DataLoader, TensorDataset, random_split
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import cv2
|
||||||
|
def load_data(pix, use_type='train'):
|
||||||
|
datasets = list()
|
||||||
|
file_list = [x for x in os.listdir(f"./out_mat/{pix}/{use_type}/") if x.endswith('.npy')][:3000]
|
||||||
|
for file in file_list:
|
||||||
|
file_img = np.load(f"./out_mat/{pix}/{use_type}/{file}")[:,:,:1]
|
||||||
|
datasets.append(file_img)
|
||||||
|
return np.asarray(datasets)
|
||||||
|
train_set = load_data(96, 'train')
|
||||||
|
val_set = load_data(96, 'valid')
|
||||||
|
test_set = load_data(96, 'test')
|
||||||
|
|
||||||
|
def load_mask(mask_rate):
|
||||||
|
mask_files = os.listdir(f'./out_mat/96/mask/{mask_rate}')
|
||||||
|
masks = list()
|
||||||
|
for file in mask_files:
|
||||||
|
d = cv2.imread(f'./out_mat/96/mask/{mask_rate}/{file}', cv2.IMREAD_GRAYSCALE)
|
||||||
|
d = (d > 0) * 1
|
||||||
|
masks.append(d)
|
||||||
|
return np.asarray(masks)
|
||||||
|
|
||||||
|
masks = load_mask(20)
|
||||||
|
|
||||||
|
maxs = train_set.max(axis=0)
|
||||||
|
mins = train_set.min(axis=0)
|
||||||
|
|
||||||
|
len(train_set)
|
||||||
|
|
||||||
|
norm_train = (train_set - mins) / (maxs-mins)
|
||||||
|
del train_set
|
||||||
|
norm_valid = (val_set - mins) / (maxs-mins)
|
||||||
|
del val_set
|
||||||
|
norm_test = (test_set - mins) / (maxs-mins)
|
||||||
|
del test_set
|
||||||
|
norm_train.shape
|
||||||
|
|
||||||
|
trans_train = np.transpose(norm_train, (0, 3, 1, 2))
|
||||||
|
trans_val = np.transpose(norm_valid, (0, 3, 1, 2))
|
||||||
|
trans_test = np.transpose(norm_test, (0, 3, 1, 2))
|
||||||
|
|
||||||
|
# 可视化特定特征的函数
|
||||||
|
def visualize_feature(input_feature,masked_feature, output_feature, title):
|
||||||
|
plt.figure(figsize=(12, 6))
|
||||||
|
plt.subplot(1, 3, 1)
|
||||||
|
plt.imshow(input_feature[0].cpu().numpy())
|
||||||
|
plt.title(title + " Input")
|
||||||
|
plt.subplot(1, 3, 2)
|
||||||
|
plt.imshow(masked_feature[0].cpu().numpy())
|
||||||
|
plt.title(title + " Masked")
|
||||||
|
plt.subplot(1, 3, 3)
|
||||||
|
plt.imshow(output_feature[0].detach().cpu().numpy())
|
||||||
|
plt.title(title + " Recovery")
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
# 设置随机种子以确保结果的可重复性
|
||||||
|
torch.manual_seed(0)
|
||||||
|
np.random.seed(0)
|
||||||
|
|
||||||
|
# 数据准备
|
||||||
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
|
print(device)
|
||||||
|
# 将numpy数组转换为PyTorch张量
|
||||||
|
tensor_train = torch.tensor(trans_train.astype(np.float32), device=device)
|
||||||
|
tensor_valid = torch.tensor(trans_val.astype(np.float32), device=device)
|
||||||
|
tensor_test = torch.tensor(trans_test.astype(np.float32), device=device)
|
||||||
|
|
||||||
|
# 创建一个数据集和数据加载器
|
||||||
|
train_set = TensorDataset(tensor_train, tensor_train) # 输出和标签相同,因为我们是自编码器
|
||||||
|
val_set = TensorDataset(tensor_valid, tensor_valid)
|
||||||
|
test_set = TensorDataset(tensor_test, tensor_test)
|
||||||
|
batch_size = 64
|
||||||
|
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
|
||||||
|
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
|
||||||
|
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)
|
||||||
|
|
||||||
|
def mask_data(data, device, masks):
|
||||||
|
mask_inds = np.random.choice(masks.shape[0], data.shape[0])
|
||||||
|
mask = torch.from_numpy(masks[mask_inds]).to(device)
|
||||||
|
tmp_first_channel = data[:, 0, :, :] * mask
|
||||||
|
masked_data = torch.clone(data)
|
||||||
|
masked_data[:, 0, :, :] = tmp_first_channel
|
||||||
|
return masked_data
|
||||||
|
|
||||||
|
class SEBlock(nn.Module):
|
||||||
|
def __init__(self, in_channels, reduced_dim):
|
||||||
|
super(SEBlock, self).__init__()
|
||||||
|
self.se = nn.Sequential(
|
||||||
|
nn.AdaptiveAvgPool2d(1),
|
||||||
|
nn.Conv2d(in_channels, reduced_dim, kernel_size=1),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.Conv2d(reduced_dim, in_channels, kernel_size=1),
|
||||||
|
nn.Sigmoid()
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return x * self.se(x)
|
||||||
|
|
||||||
|
class Conv(nn.Sequential):
|
||||||
|
def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, bias=False):
|
||||||
|
super(Conv, self).__init__(
|
||||||
|
nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias,
|
||||||
|
dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2)
|
||||||
|
)
|
||||||
|
|
||||||
|
class ConvBNReLU(nn.Sequential):
|
||||||
|
def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, norm_layer=nn.BatchNorm2d, bias=False):
|
||||||
|
super(ConvBNReLU, self).__init__(
|
||||||
|
nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias,
|
||||||
|
dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2),
|
||||||
|
norm_layer(out_channels),
|
||||||
|
nn.ReLU()
|
||||||
|
)
|
||||||
|
|
||||||
|
class SeparableBNReLU(nn.Sequential):
|
||||||
|
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, norm_layer=nn.BatchNorm2d):
|
||||||
|
super(SeparableBNReLU, self).__init__(
|
||||||
|
nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, dilation=dilation,
|
||||||
|
padding=((stride - 1) + dilation * (kernel_size - 1)) // 2, groups=in_channels, bias=False),
|
||||||
|
norm_layer(out_channels),
|
||||||
|
nn.Conv2d(out_channels, out_channels, kernel_size=1, bias=False),
|
||||||
|
nn.ReLU6()
|
||||||
|
)
|
||||||
|
|
||||||
|
class ResidualBlock(nn.Module):
|
||||||
|
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
|
||||||
|
super(ResidualBlock, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(out_channels)
|
||||||
|
self.relu = nn.ReLU(inplace=True)
|
||||||
|
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(out_channels)
|
||||||
|
self.downsample = downsample
|
||||||
|
if in_channels != out_channels or stride != 1:
|
||||||
|
self.downsample = nn.Sequential(
|
||||||
|
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
|
||||||
|
nn.BatchNorm2d(out_channels)
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
identity = x
|
||||||
|
if self.downsample is not None:
|
||||||
|
identity = self.downsample(x)
|
||||||
|
|
||||||
|
out = self.conv1(x)
|
||||||
|
out = self.bn1(out)
|
||||||
|
out = self.relu(out)
|
||||||
|
|
||||||
|
out = self.conv2(out)
|
||||||
|
out = self.bn2(out)
|
||||||
|
|
||||||
|
out += identity
|
||||||
|
out = self.relu(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
class Mlp(nn.Module):
|
||||||
|
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.ReLU6, drop=0.):
|
||||||
|
super().__init__()
|
||||||
|
out_features = out_features or in_features
|
||||||
|
hidden_features = hidden_features or in_features
|
||||||
|
self.fc1 = nn.Conv2d(in_features, hidden_features, 1, 1, 0, bias=True)
|
||||||
|
self.act = act_layer()
|
||||||
|
self.fc2 = nn.Conv2d(hidden_features, out_features, 1, 1, 0, bias=True)
|
||||||
|
self.drop = nn.Dropout(drop, inplace=True)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.fc1(x)
|
||||||
|
x = self.act(x)
|
||||||
|
x = self.drop(x)
|
||||||
|
x = self.fc2(x)
|
||||||
|
x = self.drop(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
class MultiHeadAttentionBlock(nn.Module):
|
||||||
|
def __init__(self, embed_dim, num_heads, dropout=0.1):
|
||||||
|
super(MultiHeadAttentionBlock, self).__init__()
|
||||||
|
self.attention = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout)
|
||||||
|
self.norm = nn.LayerNorm(embed_dim)
|
||||||
|
self.dropout = nn.Dropout(dropout)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
B, C, H, W = x.shape
|
||||||
|
x = x.view(B, C, H * W).permute(2, 0, 1) # (B, C, H, W) -> (HW, B, C)
|
||||||
|
attn_output, _ = self.attention(x, x, x)
|
||||||
|
attn_output = self.norm(attn_output)
|
||||||
|
attn_output = self.dropout(attn_output)
|
||||||
|
attn_output = attn_output.permute(1, 2, 0).view(B, C, H, W)
|
||||||
|
return attn_output
|
||||||
|
|
||||||
|
class SpatialAttentionBlock(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(SpatialAttentionBlock, self).__init__()
|
||||||
|
self.conv = nn.Conv2d(2, 1, kernel_size=7, padding=3, bias=False)
|
||||||
|
|
||||||
|
def forward(self, x): #(B, 64, H, W)
|
||||||
|
avg_out = torch.mean(x, dim=1, keepdim=True) #(B, 1, H, W)
|
||||||
|
max_out, _ = torch.max(x, dim=1, keepdim=True)#(B, 1, H, W)
|
||||||
|
out = torch.cat([avg_out, max_out], dim=1)#(B, 2, H, W)
|
||||||
|
out = torch.sigmoid(self.conv(out))#(B, 1, H, W)
|
||||||
|
return x * out #(B, C, H, W)
|
||||||
|
|
||||||
|
class DecoderAttentionBlock(nn.Module):
|
||||||
|
def __init__(self, in_channels):
|
||||||
|
super(DecoderAttentionBlock, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(in_channels, in_channels // 2, kernel_size=1)
|
||||||
|
self.conv2 = nn.Conv2d(in_channels // 2, in_channels, kernel_size=1)
|
||||||
|
self.spatial_attention = SpatialAttentionBlock()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# 通道注意力
|
||||||
|
b, c, h, w = x.size()
|
||||||
|
avg_pool = F.adaptive_avg_pool2d(x, 1)
|
||||||
|
max_pool = F.adaptive_max_pool2d(x, 1)
|
||||||
|
|
||||||
|
avg_out = self.conv1(avg_pool)
|
||||||
|
max_out = self.conv1(max_pool)
|
||||||
|
|
||||||
|
out = avg_out + max_out
|
||||||
|
out = torch.sigmoid(self.conv2(out))
|
||||||
|
|
||||||
|
# 添加空间注意力
|
||||||
|
out = x * out
|
||||||
|
out = self.spatial_attention(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class MaskedAutoencoder(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(MaskedAutoencoder, self).__init__()
|
||||||
|
self.encoder = nn.Sequential(
|
||||||
|
Conv(1, 32, kernel_size=3, stride=2),
|
||||||
|
nn.ReLU(),
|
||||||
|
SEBlock(32,32),
|
||||||
|
ConvBNReLU(32, 64, kernel_size=3, stride=2),
|
||||||
|
ResidualBlock(64,64),
|
||||||
|
SeparableBNReLU(64, 128, kernel_size=3, stride=2),
|
||||||
|
MultiHeadAttentionBlock(embed_dim=128, num_heads=4),
|
||||||
|
SEBlock(128, 128)
|
||||||
|
)
|
||||||
|
self.mlp = Mlp(in_features=128, hidden_features=256, out_features=128, act_layer=nn.ReLU6, drop=0.1)
|
||||||
|
self.decoder = nn.Sequential(
|
||||||
|
nn.ConvTranspose2d(128, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
|
||||||
|
nn.ReLU(),
|
||||||
|
DecoderAttentionBlock(32),
|
||||||
|
nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1),
|
||||||
|
nn.ReLU(),
|
||||||
|
DecoderAttentionBlock(16),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.ConvTranspose2d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=1), # 修改为 output_padding=1
|
||||||
|
nn.Sigmoid()
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
encoded = self.encoder(x)
|
||||||
|
print("Encoded size:", encoded.size())
|
||||||
|
decoded = self.decoder(encoded)
|
||||||
|
print("Encoded size:", decoded.size())
|
||||||
|
return decoded
|
||||||
|
|
||||||
|
# 实例化模型、损失函数和优化器
|
||||||
|
model = MaskedAutoencoder()
|
||||||
|
criterion = nn.MSELoss()
|
||||||
|
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
|
||||||
|
|
||||||
|
# 训练函数
|
||||||
|
def train_epoch(model, device, data_loader, criterion, optimizer):
|
||||||
|
model.train()
|
||||||
|
running_loss = 0.0
|
||||||
|
for batch_idx, (data, _) in enumerate(data_loader):
|
||||||
|
masked_data = mask_data(data, device, masks)
|
||||||
|
optimizer.zero_grad()
|
||||||
|
reconstructed = model(masked_data)
|
||||||
|
loss = criterion(reconstructed, data)
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
running_loss += loss.item()
|
||||||
|
return running_loss / (batch_idx + 1)
|
||||||
|
# 评估函数
|
||||||
|
def evaluate(model, device, data_loader, criterion):
|
||||||
|
model.eval()
|
||||||
|
running_loss = 0.0
|
||||||
|
with torch.no_grad():
|
||||||
|
for batch_idx, (data, _) in enumerate(data_loader):
|
||||||
|
data = data.to(device)
|
||||||
|
masked_data = mask_data(data, device, masks)
|
||||||
|
reconstructed = model(masked_data)
|
||||||
|
if batch_idx == 8:
|
||||||
|
rand_ind = np.random.randint(0, len(data))
|
||||||
|
visualize_feature(data[rand_ind], masked_data[rand_ind], reconstructed[rand_ind], title='NO_2')
|
||||||
|
loss = criterion(reconstructed, data)
|
||||||
|
running_loss += loss.item()
|
||||||
|
return running_loss / (batch_idx + 1)
|
||||||
|
# 测试函数
|
||||||
|
def test(model, device, data_loader):
|
||||||
|
model.eval()
|
||||||
|
with torch.no_grad():
|
||||||
|
for batch_idx, (data, _) in enumerate(data_loader):
|
||||||
|
data = data.to(device)
|
||||||
|
masked_data = mask_data(data, device, masks)
|
||||||
|
masked_ind = np.argwhere(masked_data[0][0]==0)
|
||||||
|
reconstructed = model(masked_data)
|
||||||
|
recon_no2 = reconstructed[0][0]
|
||||||
|
ori_no2 = data[0][0]
|
||||||
|
return
|
||||||
|
model = model.to(device)
|
||||||
|
|
||||||
|
num_epochs = 100
|
||||||
|
train_losses = list()
|
||||||
|
val_losses = list()
|
||||||
|
for epoch in range(num_epochs):
|
||||||
|
train_loss = train_epoch(model, device, train_loader, criterion, optimizer)
|
||||||
|
train_losses.append(train_loss)
|
||||||
|
val_loss = evaluate(model, device, val_loader, criterion)
|
||||||
|
val_losses.append(val_loss)
|
||||||
|
print(f'Epoch {epoch+1}, Train Loss: {train_loss}, Val Loss: {val_loss}')
|
||||||
|
|
||||||
|
# 测试模型
|
||||||
|
test_loss = evaluate(model, device, test_loader, criterion)
|
||||||
|
print(f'Test Loss: {test_loss}')
|
|
@ -0,0 +1,100 @@
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
|
||||||
|
class MLP(nn.Module):
|
||||||
|
def __init__(self, input_dim, output_dim):
|
||||||
|
super(MLP, self).__init__()
|
||||||
|
self.fc1 = nn.Linear(input_dim, output_dim)
|
||||||
|
self.act = nn.GELU() # 使用 GELU 激活函数
|
||||||
|
self.fc2 = nn.Linear(output_dim, input_dim)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return self.fc2(self.act(self.fc1(x)))
|
||||||
|
|
||||||
|
|
||||||
|
class Attention(nn.Module):
|
||||||
|
def __init__(self, dim, heads):
|
||||||
|
super(Attention, self).__init__()
|
||||||
|
self.heads = heads
|
||||||
|
self.dim = dim
|
||||||
|
self.scale = dim ** -0.5
|
||||||
|
|
||||||
|
self.qkv = nn.Linear(dim, dim * 3)
|
||||||
|
self.attn_drop = nn.Dropout(0.1)
|
||||||
|
self.proj = nn.Linear(dim, dim)
|
||||||
|
self.proj_drop = nn.Dropout(0.1)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
B, N, C = x.shape
|
||||||
|
qkv = self.qkv(x).reshape(B, N, 3, self.heads, C // self.heads).permute(2, 0, 3, 1,
|
||||||
|
4) # (3, B, heads, N, head_dim)
|
||||||
|
q, k, v = qkv[0], qkv[1], qkv[2]
|
||||||
|
|
||||||
|
attn = (q @ k.transpose(-2, -1)) * self.scale
|
||||||
|
attn = attn.softmax(dim=-1)
|
||||||
|
attn = self.attn_drop(attn)
|
||||||
|
|
||||||
|
out = (attn @ v).transpose(1, 2).reshape(B, N, C)
|
||||||
|
return self.proj_drop(self.proj(out))
|
||||||
|
|
||||||
|
|
||||||
|
class ViTEncoder(nn.Module):
|
||||||
|
def __init__(self, img_size=96, patch_size=8, dim=128, depth=4, heads=4, mlp_dim=256):
|
||||||
|
super(ViTEncoder, self).__init__()
|
||||||
|
self.patch_size = patch_size
|
||||||
|
self.dim = dim
|
||||||
|
self.patch_embedding = nn.Conv2d(1, dim, kernel_size=patch_size, stride=patch_size)
|
||||||
|
|
||||||
|
self.attention_layers = nn.ModuleList([
|
||||||
|
nn.Sequential(
|
||||||
|
Attention(dim, heads),
|
||||||
|
MLP(dim, mlp_dim)
|
||||||
|
) for _ in range(depth)
|
||||||
|
])
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.patch_embedding(x) # 形状变为 (batch_size, dim, num_patches_h, num_patches_w)
|
||||||
|
x = x.flatten(2).transpose(1, 2) # 形状变为 (batch_size, num_patches, dim)
|
||||||
|
|
||||||
|
for attention_layer in self.attention_layers:
|
||||||
|
x = attention_layer[0](x) + x # 自注意力
|
||||||
|
x = attention_layer[1](x) + x # MLP
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class ConvDecoder(nn.Module):
|
||||||
|
def __init__(self, dim=128, patch_size=8, img_size=96):
|
||||||
|
super(ConvDecoder, self).__init__()
|
||||||
|
self.dim = dim
|
||||||
|
self.patch_size = patch_size
|
||||||
|
self.img_size = img_size
|
||||||
|
self.decoder = nn.Sequential(
|
||||||
|
nn.ConvTranspose2d(dim, 128, kernel_size=patch_size, stride=patch_size),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.ConvTranspose2d(128, 1, kernel_size=3, stride=1, padding=1)
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = x.transpose(1, 2).view(-1, self.dim, self.img_size // self.patch_size, self.img_size // self.patch_size)
|
||||||
|
x = self.decoder(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class MAEModel(nn.Module):
|
||||||
|
def __init__(self, encoder, decoder):
|
||||||
|
super(MAEModel, self).__init__()
|
||||||
|
self.encoder = encoder
|
||||||
|
self.decoder = decoder
|
||||||
|
|
||||||
|
def forward(self, x, mask):
|
||||||
|
encoded = self.encoder(x)
|
||||||
|
decoded = self.decoder(encoded)
|
||||||
|
return decoded * mask
|
||||||
|
|
||||||
|
|
||||||
|
model = MAEModel()
|
||||||
|
x = torch.randn(1, 1, 256, 256)
|
||||||
|
output = model(x)
|
||||||
|
print(output.shape)
|
|
@ -0,0 +1,90 @@
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class MLP(nn.Module):
|
||||||
|
def __init__(self, input_dim, output_dim):
|
||||||
|
super(MLP, self).__init__()
|
||||||
|
self.fc1 = nn.Linear(input_dim, output_dim)
|
||||||
|
self.act = nn.GELU() # 使用 GELU 激活函数
|
||||||
|
self.fc2 = nn.Linear(output_dim, input_dim)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return self.fc2(self.act(self.fc1(x)))
|
||||||
|
|
||||||
|
|
||||||
|
class Attention(nn.Module):
|
||||||
|
def __init__(self, dim, heads):
|
||||||
|
super(Attention, self).__init__()
|
||||||
|
self.heads = heads
|
||||||
|
self.dim = dim
|
||||||
|
self.scale = dim ** -0.5
|
||||||
|
|
||||||
|
self.qkv = nn.Linear(dim, dim * 3)
|
||||||
|
self.attn_drop = nn.Dropout(0.1)
|
||||||
|
self.proj = nn.Linear(dim, dim)
|
||||||
|
self.proj_drop = nn.Dropout(0.1)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
B, N, C = x.shape
|
||||||
|
qkv = self.qkv(x).reshape(B, N, 3, self.heads, C // self.heads).permute(2, 0, 3, 1,
|
||||||
|
4) # (3, B, heads, N, head_dim)
|
||||||
|
q, k, v = qkv[0], qkv[1], qkv[2]
|
||||||
|
|
||||||
|
attn = (q @ k.transpose(-2, -1)) * self.scale
|
||||||
|
attn = attn.softmax(dim=-1)
|
||||||
|
attn = self.attn_drop(attn)
|
||||||
|
|
||||||
|
out = (attn @ v).transpose(1, 2).reshape(B, N, C)
|
||||||
|
return self.proj_drop(self.proj(out))
|
||||||
|
|
||||||
|
|
||||||
|
class ViTEncoder(nn.Module):
|
||||||
|
def __init__(self, img_size=96, patch_size=8, dim=128, depth=4, heads=4, mlp_dim=256):
|
||||||
|
super(ViTEncoder, self).__init__()
|
||||||
|
self.patch_size = patch_size
|
||||||
|
self.dim = dim
|
||||||
|
self.patch_embedding = nn.Conv2d(1, dim, kernel_size=patch_size, stride=patch_size)
|
||||||
|
|
||||||
|
self.attention_layers = nn.ModuleList([
|
||||||
|
nn.Sequential(
|
||||||
|
Attention(dim, heads),
|
||||||
|
MLP(dim, mlp_dim)
|
||||||
|
) for _ in range(depth)
|
||||||
|
])
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.patch_embedding(x) # 形状变为 (batch_size, dim, num_patches_h, num_patches_w)
|
||||||
|
x = x.flatten(2).transpose(1, 2) # 形状变为 (batch_size, num_patches, dim)
|
||||||
|
|
||||||
|
for attention_layer in self.attention_layers:
|
||||||
|
x = attention_layer[0](x) + x # 自注意力
|
||||||
|
x = attention_layer[1](x) + x # MLP
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ConvDecoder(nn.Module):
|
||||||
|
def __init__(self, dim=128, patch_size=8, img_size=96):
|
||||||
|
super(ConvDecoder, self).__init__()
|
||||||
|
self.dim = dim
|
||||||
|
self.patch_size = patch_size
|
||||||
|
self.img_size = img_size
|
||||||
|
self.decoder = nn.Sequential(
|
||||||
|
nn.ConvTranspose2d(dim, 128, kernel_size=patch_size, stride=patch_size),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.ConvTranspose2d(128, 1, kernel_size=3, stride=1, padding=1)
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = x.transpose(1, 2).view(-1, self.dim, self.img_size // self.patch_size, self.img_size // self.patch_size)
|
||||||
|
x = self.decoder(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
model = ConvDecoder()
|
||||||
|
x = torch.randn(1, 1, 256, 256)
|
||||||
|
output = model(x)
|
||||||
|
print(output.shape)
|
|
@ -0,0 +1,215 @@
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
class SEBlock(nn.Module):
|
||||||
|
def __init__(self, in_channels, reduced_dim):
|
||||||
|
super(SEBlock, self).__init__()
|
||||||
|
self.se = nn.Sequential(
|
||||||
|
nn.AdaptiveAvgPool2d(1), # 全局平均池化
|
||||||
|
nn.Conv2d(in_channels, reduced_dim, kernel_size=1),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.Conv2d(reduced_dim, in_channels, kernel_size=1),
|
||||||
|
nn.Sigmoid() # 使用Sigmoid是因为我们要对通道进行权重归一化
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return x * self.se(x)
|
||||||
|
# 定义Masked Autoencoder模型
|
||||||
|
class Conv(nn.Sequential):
|
||||||
|
def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, bias=False):
|
||||||
|
super(Conv, self).__init__(
|
||||||
|
nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias,
|
||||||
|
dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2)
|
||||||
|
)
|
||||||
|
class ConvBNReLU(nn.Sequential):
|
||||||
|
def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, norm_layer=nn.BatchNorm2d,
|
||||||
|
bias=False):
|
||||||
|
super(ConvBNReLU, self).__init__(
|
||||||
|
nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias,
|
||||||
|
dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2),
|
||||||
|
norm_layer(out_channels),
|
||||||
|
nn.ReLU()
|
||||||
|
)
|
||||||
|
class SeparableBNReLU(nn.Sequential):
|
||||||
|
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, norm_layer= nn.BatchNorm2d):
|
||||||
|
super(SeparableBNReLU, self).__init__(
|
||||||
|
nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, dilation=dilation,
|
||||||
|
padding=((stride - 1) + dilation * (kernel_size -1))//2, groups=in_channels, bias=False),
|
||||||
|
norm_layer(out_channels),
|
||||||
|
nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False),
|
||||||
|
nn.ReLU6()
|
||||||
|
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ResidualBlock(nn.Module):
|
||||||
|
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
|
||||||
|
super(ResidualBlock, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(out_channels)
|
||||||
|
self.relu = nn.ReLU(inplace=True)
|
||||||
|
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(out_channels)
|
||||||
|
self.downsample = downsample
|
||||||
|
if in_channels != out_channels or stride != 1:
|
||||||
|
self.downsample = nn.Sequential(
|
||||||
|
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
|
||||||
|
nn.BatchNorm2d(out_channels)
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
identity = x
|
||||||
|
if self.downsample is not None:
|
||||||
|
identity = self.downsample(x)
|
||||||
|
|
||||||
|
out = self.conv1(x)
|
||||||
|
out = self.bn1(out)
|
||||||
|
out = self.relu(out)
|
||||||
|
|
||||||
|
out = self.conv2(out)
|
||||||
|
out = self.bn2(out)
|
||||||
|
|
||||||
|
out += identity
|
||||||
|
out = self.relu(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
class Mlp(nn.Module):
|
||||||
|
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.ReLU6, drop=0.):
|
||||||
|
super().__init__()
|
||||||
|
out_features = out_features or in_features
|
||||||
|
hidden_features = hidden_features or in_features
|
||||||
|
self.fc1 = nn.Conv2d(in_features, hidden_features, 1, 1, 0, bias=True)
|
||||||
|
|
||||||
|
self.act = act_layer()
|
||||||
|
self.fc2 = nn.Conv2d(hidden_features, out_features, 1, 1, 0, bias=True)
|
||||||
|
self.drop = nn.Dropout(drop, inplace=True)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.fc1(x)
|
||||||
|
x = self.act(x)
|
||||||
|
x = self.drop(x)
|
||||||
|
x = self.fc2(x)
|
||||||
|
x = self.drop(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class MultiHeadAttentionBlock(nn.Module):
|
||||||
|
def __init__(self, embed_dim, num_heads, dropout=0.1):
|
||||||
|
super(MultiHeadAttentionBlock, self).__init__()
|
||||||
|
self.attention = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout)
|
||||||
|
self.norm = nn.LayerNorm(embed_dim)
|
||||||
|
self.dropout = nn.Dropout(dropout)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# (B, C, H, W) -> (HW, B, C) for MultiheadAttention compatibility
|
||||||
|
B, C, H, W = x.shape
|
||||||
|
x = x.view(B, C, H * W).permute(2, 0, 1) # (B, C, H, W) -> (HW, B, C)
|
||||||
|
|
||||||
|
# Apply multihead attention
|
||||||
|
attn_output, _ = self.attention(x, x, x)
|
||||||
|
|
||||||
|
# Apply normalization and dropout
|
||||||
|
attn_output = self.norm(attn_output)
|
||||||
|
attn_output = self.dropout(attn_output)
|
||||||
|
|
||||||
|
# Reshape back to (B, C, H, W)
|
||||||
|
attn_output = attn_output.permute(1, 2, 0).view(B, C, H, W)
|
||||||
|
|
||||||
|
return attn_output
|
||||||
|
|
||||||
|
|
||||||
|
class SpatialAttentionBlock(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(SpatialAttentionBlock, self).__init__()
|
||||||
|
self.conv = nn.Conv2d(2, 1, kernel_size=7, padding=3, bias=False)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
avg_out = torch.mean(x, dim=1, keepdim=True)
|
||||||
|
max_out, _ = torch.max(x, dim=1, keepdim=True)
|
||||||
|
out = torch.cat([avg_out, max_out], dim=1)
|
||||||
|
out = torch.sigmoid(self.conv(out))
|
||||||
|
return x * out
|
||||||
|
|
||||||
|
class DecoderAttentionBlock(nn.Module):
|
||||||
|
def __init__(self, in_channels):
|
||||||
|
super(DecoderAttentionBlock, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(in_channels, in_channels // 2, kernel_size=1)
|
||||||
|
self.conv2 = nn.Conv2d(in_channels // 2, in_channels, kernel_size=1)
|
||||||
|
self.spatial_attention = SpatialAttentionBlock()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# 通道注意力
|
||||||
|
b, c, h, w = x.size()
|
||||||
|
avg_pool = F.adaptive_avg_pool2d(x, 1)
|
||||||
|
max_pool = F.adaptive_max_pool2d(x, 1)
|
||||||
|
|
||||||
|
avg_out = self.conv1(avg_pool)
|
||||||
|
max_out = self.conv1(max_pool)
|
||||||
|
|
||||||
|
out = avg_out + max_out
|
||||||
|
out = torch.sigmoid(self.conv2(out))
|
||||||
|
|
||||||
|
# 添加空间注意力
|
||||||
|
out = x * out
|
||||||
|
out = self.spatial_attention(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
class MaskedAutoencoder(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(MaskedAutoencoder, self).__init__()
|
||||||
|
self.encoder = nn.Sequential(
|
||||||
|
Conv(1, 32, kernel_size=3, stride=2),
|
||||||
|
nn.ReLU(),
|
||||||
|
SEBlock(32,32),
|
||||||
|
ConvBNReLU(32, 64, kernel_size=3, stride=2),
|
||||||
|
ResidualBlock(64,64),
|
||||||
|
SeparableBNReLU(64, 128, kernel_size=3, stride=2),
|
||||||
|
MultiHeadAttentionBlock(embed_dim=128, num_heads=4),
|
||||||
|
SEBlock(128, 128)
|
||||||
|
)
|
||||||
|
self.mlp = Mlp(in_features=128, hidden_features=256, out_features=128, act_layer=nn.ReLU6, drop=0.1)
|
||||||
|
self.decoder = nn.Sequential(
|
||||||
|
nn.ConvTranspose2d(128, 128, kernel_size=3, stride=2, padding=1, output_padding=1),
|
||||||
|
nn.ReLU(),
|
||||||
|
DecoderAttentionBlock(128), # 在第一层后添加注意力模块
|
||||||
|
nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, output_padding=1),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
|
||||||
|
DecoderAttentionBlock(32), # 在最后一层前添加注意力模块
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.ConvTranspose2d(32, 1, kernel_size=3, stride=2, padding=1, output_padding=1),
|
||||||
|
nn.Sigmoid() # Sigmoid输出
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# class MaskedAutoencoder(nn.Module):
|
||||||
|
# def __init__(self):
|
||||||
|
# super(MaskedAutoencoder, self).__init__()
|
||||||
|
# self.encoder = nn.Sequential(
|
||||||
|
# nn.Conv2d(1, 32, kernel_size=3, stride=2, padding=1),
|
||||||
|
# nn.ReLU(),
|
||||||
|
# nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1),
|
||||||
|
# nn.ReLU(),
|
||||||
|
# nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1),
|
||||||
|
# nn.ReLU(),
|
||||||
|
# SEBlock(128, 128)
|
||||||
|
# )
|
||||||
|
# self.decoder = nn.Sequential(
|
||||||
|
# nn.ConvTranspose2d(128, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
|
||||||
|
# nn.ReLU(),
|
||||||
|
# nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1),
|
||||||
|
# nn.ReLU(),
|
||||||
|
# nn.ConvTranspose2d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=1),
|
||||||
|
# nn.Sigmoid() # 使用Sigmoid是因为输入数据是0-1之间的
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# def forward(self, x):
|
||||||
|
# encoded = self.encoder(x)
|
||||||
|
# decoded = self.decoder(encoded)
|
||||||
|
# return decoded
|
||||||
|
|
||||||
|
# 实例化模型、损失函数和优化器
|
||||||
|
model = MaskedAutoencoder()
|
||||||
|
criterion = nn.MSELoss()
|
||||||
|
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
|
|
@ -0,0 +1,190 @@
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
class SEBlock(nn.Module):
|
||||||
|
def __init__(self, in_channels, reduced_dim):
|
||||||
|
super(SEBlock, self).__init__()
|
||||||
|
self.se = nn.Sequential(
|
||||||
|
nn.AdaptiveAvgPool2d(1),
|
||||||
|
nn.Conv2d(in_channels, reduced_dim, kernel_size=1),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.Conv2d(reduced_dim, in_channels, kernel_size=1),
|
||||||
|
nn.Sigmoid()
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
return x * self.se(x)
|
||||||
|
|
||||||
|
class Conv(nn.Sequential):
|
||||||
|
def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, bias=False):
|
||||||
|
super(Conv, self).__init__(
|
||||||
|
nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias,
|
||||||
|
dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2)
|
||||||
|
)
|
||||||
|
|
||||||
|
class ConvBNReLU(nn.Sequential):
|
||||||
|
def __init__(self, in_channels, out_channels, kernel_size=3, dilation=1, stride=1, norm_layer=nn.BatchNorm2d, bias=False):
|
||||||
|
super(ConvBNReLU, self).__init__(
|
||||||
|
nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, bias=bias,
|
||||||
|
dilation=dilation, stride=stride, padding=((stride - 1) + dilation * (kernel_size - 1)) // 2),
|
||||||
|
norm_layer(out_channels),
|
||||||
|
nn.ReLU()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class SeparableBNReLU(nn.Sequential):
|
||||||
|
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, norm_layer=nn.BatchNorm2d):
|
||||||
|
super(SeparableBNReLU, self).__init__(
|
||||||
|
nn.Conv2d(in_channels, in_channels, kernel_size=kernel_size, stride=stride, dilation=dilation,
|
||||||
|
padding=((stride - 1) + dilation * (kernel_size - 1)) // 2, groups=in_channels, bias=False),
|
||||||
|
# 分离卷积,仅调整空间信息
|
||||||
|
norm_layer(in_channels), # 对输入通道进行归一化
|
||||||
|
nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False), # 这里进行升维操作
|
||||||
|
nn.ReLU6()
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class ResidualBlock(nn.Module):
|
||||||
|
def __init__(self, in_channels, out_channels, stride=1, downsample=None):
|
||||||
|
super(ResidualBlock, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
|
||||||
|
self.bn1 = nn.BatchNorm2d(out_channels)
|
||||||
|
self.relu = nn.ReLU(inplace=True)
|
||||||
|
self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False)
|
||||||
|
self.bn2 = nn.BatchNorm2d(out_channels)
|
||||||
|
|
||||||
|
# 如果输入和输出通道不一致,进行降采样操作
|
||||||
|
self.downsample = downsample
|
||||||
|
if in_channels != out_channels or stride != 1:
|
||||||
|
self.downsample = nn.Sequential(
|
||||||
|
nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
|
||||||
|
nn.BatchNorm2d(out_channels)
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
identity = x
|
||||||
|
if self.downsample is not None:
|
||||||
|
identity = self.downsample(x)
|
||||||
|
|
||||||
|
out = self.conv1(x)
|
||||||
|
out = self.bn1(out)
|
||||||
|
out = self.relu(out)
|
||||||
|
|
||||||
|
out = self.conv2(out)
|
||||||
|
out = self.bn2(out)
|
||||||
|
|
||||||
|
out += identity
|
||||||
|
out = self.relu(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class Mlp(nn.Module):
|
||||||
|
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.ReLU6, drop=0.):
|
||||||
|
super().__init__()
|
||||||
|
out_features = out_features or in_features
|
||||||
|
hidden_features = hidden_features or in_features
|
||||||
|
self.fc1 = nn.Conv2d(in_features, hidden_features, 1, 1, 0, bias=True)
|
||||||
|
self.act = act_layer()
|
||||||
|
self.fc2 = nn.Conv2d(hidden_features, out_features, 1, 1, 0, bias=True)
|
||||||
|
self.drop = nn.Dropout(drop, inplace=True)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.fc1(x)
|
||||||
|
x = self.act(x)
|
||||||
|
x = self.drop(x)
|
||||||
|
x = self.fc2(x)
|
||||||
|
x = self.drop(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
class MultiHeadAttentionBlock(nn.Module):
|
||||||
|
def __init__(self, embed_dim, num_heads, dropout=0.1):
|
||||||
|
super(MultiHeadAttentionBlock, self).__init__()
|
||||||
|
self.attention = nn.MultiheadAttention(embed_dim, num_heads, dropout=dropout)
|
||||||
|
self.norm = nn.LayerNorm(embed_dim)
|
||||||
|
self.dropout = nn.Dropout(dropout)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
B, C, H, W = x.shape
|
||||||
|
x = x.view(B, C, H * W).permute(2, 0, 1) # (B, C, H, W) -> (HW, B, C)
|
||||||
|
attn_output, _ = self.attention(x, x, x)
|
||||||
|
attn_output = self.norm(attn_output)
|
||||||
|
attn_output = self.dropout(attn_output)
|
||||||
|
attn_output = attn_output.permute(1, 2, 0).view(B, C, H, W)
|
||||||
|
return attn_output
|
||||||
|
|
||||||
|
class SpatialAttentionBlock(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(SpatialAttentionBlock, self).__init__()
|
||||||
|
self.conv = nn.Conv2d(2, 1, kernel_size=7, padding=3, bias=False)
|
||||||
|
|
||||||
|
def forward(self, x): #(B, 64, H, W)
|
||||||
|
avg_out = torch.mean(x, dim=1, keepdim=True) #(B, 1, H, W)
|
||||||
|
max_out, _ = torch.max(x, dim=1, keepdim=True)#(B, 1, H, W)
|
||||||
|
out = torch.cat([avg_out, max_out], dim=1)#(B, 2, H, W)
|
||||||
|
out = torch.sigmoid(self.conv(out))#(B, 1, H, W)
|
||||||
|
return x * out #(B, C, H, W)
|
||||||
|
|
||||||
|
class DecoderAttentionBlock(nn.Module):
|
||||||
|
def __init__(self, in_channels):
|
||||||
|
super(DecoderAttentionBlock, self).__init__()
|
||||||
|
self.conv1 = nn.Conv2d(in_channels, in_channels // 2, kernel_size=1)
|
||||||
|
self.conv2 = nn.Conv2d(in_channels // 2, in_channels, kernel_size=1)
|
||||||
|
self.spatial_attention = SpatialAttentionBlock()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# 通道注意力
|
||||||
|
b, c, h, w = x.size()
|
||||||
|
avg_pool = F.adaptive_avg_pool2d(x, 1)
|
||||||
|
max_pool = F.adaptive_max_pool2d(x, 1)
|
||||||
|
|
||||||
|
avg_out = self.conv1(avg_pool)
|
||||||
|
max_out = self.conv1(max_pool)
|
||||||
|
|
||||||
|
out = avg_out + max_out
|
||||||
|
out = torch.sigmoid(self.conv2(out))
|
||||||
|
|
||||||
|
# 添加空间注意力
|
||||||
|
out = x * out
|
||||||
|
out = self.spatial_attention(out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class MaskedAutoencoder(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(MaskedAutoencoder, self).__init__()
|
||||||
|
self.encoder = nn.Sequential(
|
||||||
|
Conv(1, 32, kernel_size=3, stride=2),
|
||||||
|
nn.ReLU(),
|
||||||
|
SEBlock(32,32),
|
||||||
|
ConvBNReLU(32, 64, kernel_size=3, stride=2),
|
||||||
|
ResidualBlock(64,64),
|
||||||
|
SeparableBNReLU(64, 128, kernel_size=3, stride=2),
|
||||||
|
MultiHeadAttentionBlock(embed_dim=128, num_heads=4),
|
||||||
|
SEBlock(128, 128)
|
||||||
|
)
|
||||||
|
self.mlp = Mlp(in_features=128, hidden_features=256, out_features=128, act_layer=nn.ReLU6, drop=0.1)
|
||||||
|
self.decoder = nn.Sequential(
|
||||||
|
nn.ConvTranspose2d(128, 32, kernel_size=3, stride=2, padding=1, output_padding=1),
|
||||||
|
nn.ReLU(),
|
||||||
|
DecoderAttentionBlock(32),
|
||||||
|
nn.ConvTranspose2d(32, 16, kernel_size=3, stride=2, padding=1, output_padding=1),
|
||||||
|
nn.ReLU(),
|
||||||
|
DecoderAttentionBlock(16),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.ConvTranspose2d(16, 1, kernel_size=3, stride=2, padding=1, output_padding=1), # 修改为 output_padding=1
|
||||||
|
nn.Sigmoid()
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
encoded = self.encoder(x)
|
||||||
|
print("Encoded size:", encoded.size())
|
||||||
|
decoded = self.decoder(encoded)
|
||||||
|
print("Encoded size:", decoded.size())
|
||||||
|
return decoded
|
||||||
|
|
||||||
|
|
||||||
|
model = MaskedAutoencoder()
|
||||||
|
x = torch.randn(1, 1, 256, 256)
|
||||||
|
output = model(x)
|
||||||
|
print(output.shape)
|
|
@ -0,0 +1,8 @@
|
||||||
|
# 默认忽略的文件
|
||||||
|
/shelf/
|
||||||
|
/workspace.xml
|
||||||
|
# 数据源本地存储已忽略文件
|
||||||
|
/dataSources/
|
||||||
|
/dataSources.local.xml
|
||||||
|
# 基于编辑器的 HTTP 客户端请求
|
||||||
|
/httpRequests/
|
|
@ -0,0 +1,182 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="PublishConfigData" remoteFilesAllowedToDisappearOnAutoupload="false">
|
||||||
|
<serverData>
|
||||||
|
<paths name="root@connect.westa.seetacloud.com:41442">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-3.seetacloud.com:47627">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-3.seetacloud.com:60211">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-41.seetacloud.com:10087">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-41.seetacloud.com:17758">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-41.seetacloud.com:18218">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-41.seetacloud.com:24544">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-41.seetacloud.com:26650">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-41.seetacloud.com:29425">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-41.seetacloud.com:30917">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-41.seetacloud.com:52181">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-41.seetacloud.com:56391">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-41.seetacloud.com:56529">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-41.seetacloud.com:59186">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-42.seetacloud.com:16236">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-42.seetacloud.com:18720">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-42.seetacloud.com:23687">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-42.seetacloud.com:26700">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-42.seetacloud.com:34775">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-42.seetacloud.com:35796">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-42.seetacloud.com:39635">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-42.seetacloud.com:46129">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-45.autodl.pro:45028">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-45.autodl.pro:48066">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
<paths name="root@region-45.autodl.pro:54865">
|
||||||
|
<serverdata>
|
||||||
|
<mappings>
|
||||||
|
<mapping local="$PROJECT_DIR$" web="/" />
|
||||||
|
</mappings>
|
||||||
|
</serverdata>
|
||||||
|
</paths>
|
||||||
|
</serverData>
|
||||||
|
</component>
|
||||||
|
</project>
|
|
@ -0,0 +1,46 @@
|
||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<profile version="1.0">
|
||||||
|
<option name="myName" value="Project Default" />
|
||||||
|
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
||||||
|
<option name="ignoredPackages">
|
||||||
|
<value>
|
||||||
|
<list size="33">
|
||||||
|
<item index="0" class="java.lang.String" itemvalue="scikit-image" />
|
||||||
|
<item index="1" class="java.lang.String" itemvalue="protobuf" />
|
||||||
|
<item index="2" class="java.lang.String" itemvalue="torchmetrics" />
|
||||||
|
<item index="3" class="java.lang.String" itemvalue="scikit-learn" />
|
||||||
|
<item index="4" class="java.lang.String" itemvalue="PyYAML" />
|
||||||
|
<item index="5" class="java.lang.String" itemvalue="dgl" />
|
||||||
|
<item index="6" class="java.lang.String" itemvalue="opencv-python-headless" />
|
||||||
|
<item index="7" class="java.lang.String" itemvalue="imagecodecs" />
|
||||||
|
<item index="8" class="java.lang.String" itemvalue="histocartography" />
|
||||||
|
<item index="9" class="java.lang.String" itemvalue="wandb" />
|
||||||
|
<item index="10" class="java.lang.String" itemvalue="mmcv-full" />
|
||||||
|
<item index="11" class="java.lang.String" itemvalue="tifffile" />
|
||||||
|
<item index="12" class="java.lang.String" itemvalue="timm" />
|
||||||
|
<item index="13" class="java.lang.String" itemvalue="opencv-python" />
|
||||||
|
<item index="14" class="java.lang.String" itemvalue="h5py" />
|
||||||
|
<item index="15" class="java.lang.String" itemvalue="loguru" />
|
||||||
|
<item index="16" class="java.lang.String" itemvalue="addict" />
|
||||||
|
<item index="17" class="java.lang.String" itemvalue="omegaconf" />
|
||||||
|
<item index="18" class="java.lang.String" itemvalue="albumentations" />
|
||||||
|
<item index="19" class="java.lang.String" itemvalue="tqdm" />
|
||||||
|
<item index="20" class="java.lang.String" itemvalue="pytorch-lightning" />
|
||||||
|
<item index="21" class="java.lang.String" itemvalue="tensorboard" />
|
||||||
|
<item index="22" class="java.lang.String" itemvalue="pytorch-toolbelt" />
|
||||||
|
<item index="23" class="java.lang.String" itemvalue="openslide-python" />
|
||||||
|
<item index="24" class="java.lang.String" itemvalue="einops" />
|
||||||
|
<item index="25" class="java.lang.String" itemvalue="Pillow" />
|
||||||
|
<item index="26" class="java.lang.String" itemvalue="pandas" />
|
||||||
|
<item index="27" class="java.lang.String" itemvalue="scipy" />
|
||||||
|
<item index="28" class="java.lang.String" itemvalue="matplotlib" />
|
||||||
|
<item index="29" class="java.lang.String" itemvalue="segmentation-models-pytorch" />
|
||||||
|
<item index="30" class="java.lang.String" itemvalue="torch" />
|
||||||
|
<item index="31" class="java.lang.String" itemvalue="numpy" />
|
||||||
|
<item index="32" class="java.lang.String" itemvalue="torchvision" />
|
||||||
|
</list>
|
||||||
|
</value>
|
||||||
|
</option>
|
||||||
|
</inspection_tool>
|
||||||
|
</profile>
|
||||||
|
</component>
|
|
@ -0,0 +1,6 @@
|
||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<settings>
|
||||||
|
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||||
|
<version value="1.0" />
|
||||||
|
</settings>
|
||||||
|
</component>
|
|
@ -0,0 +1,4 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (yolov8) (20)" project-jdk-type="Python SDK" />
|
||||||
|
</project>
|
|
@ -0,0 +1,8 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/模块缝合库.iml" filepath="$PROJECT_DIR$/.idea/模块缝合库.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
|
@ -0,0 +1,15 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$" />
|
||||||
|
<orderEntry type="jdk" jdkName="Python 3.9 (yolov8) (20)" jdkType="Python SDK" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
<component name="PyDocumentationSettings">
|
||||||
|
<option name="format" value="PLAIN" />
|
||||||
|
<option name="myDocStringFormat" value="Plain" />
|
||||||
|
</component>
|
||||||
|
<component name="TestRunnerService">
|
||||||
|
<option name="PROJECT_TEST_RUNNER" value="pytest" />
|
||||||
|
</component>
|
||||||
|
</module>
|
Binary file not shown.
|
@ -0,0 +1,80 @@
|
||||||
|
# https:// tinyurl.com/ 5ft8v46w
|
||||||
|
"""
|
||||||
|
以下是这个模块的主要特点和作用:
|
||||||
|
|
||||||
|
线性变换:模块中包括两个线性层 to_query 和 to_key,分别用于将输入的特征进行线性变换,将特征维度从 in_dims 映射到 token_dim * num_heads。这两个线性层的输出用于计算查询(Query)和键(Key)。
|
||||||
|
|
||||||
|
可学习的权重:模块中包括一个可学习的权重向量 w_g,用于计算加性注意力的权重。这个权重向量的形状是 (token_dim * num_heads, 1)。
|
||||||
|
|
||||||
|
归一化:通过 torch.nn.functional.normalize 函数对查询(Query)和键(Key)进行 L2 归一化,以确保它们具有单位长度。
|
||||||
|
|
||||||
|
权重计算:计算查询(Query)与权重向量 w_g 的点积,并乘以缩放因子 scale_factor(通常是 token_dim 的倒数的平方根),以得到加性注意力的权重 A。
|
||||||
|
|
||||||
|
归一化:对权重 A 进行归一化,以确保它们在序列长度维度上的和为 1。
|
||||||
|
|
||||||
|
加权求和:通过将注意力权重 A 与查询(Query)相乘,然后在序列长度维度上求和,得到全局上下文向量 G。
|
||||||
|
|
||||||
|
扩展 G:通过 einops.repeat 操作,将全局上下文向量 G 扩展为与键(Key)相同形状的张量。
|
||||||
|
|
||||||
|
注意力计算:通过将扩展后的 G 与键(Key)相乘,然后加上原始查询(Query),得到注意力加权的输出。
|
||||||
|
|
||||||
|
投影层:通过线性层 Proj 对注意力加权的输出进行投影,将特征维度从 token_dim * num_heads 投影回 token_dim * num_heads。
|
||||||
|
|
||||||
|
最终投影:通过线性层 final 对投影后的输出进行最终的线性变换,将特征维度从 token_dim * num_heads 投影回 token_dim,并得到最终的输出。
|
||||||
|
|
||||||
|
总的来说,这个模块实现了一种高效的加性注意力机制,用于学习输入序列的全局上下文信息,并将加权后的全局上下文信息与原始特征进行融合,生成最终的输出特征。这种模块通常用于自注意力机制的一部分,可以用于处理序列数据,如自然语言处理中的 Transformer 模型。
|
||||||
|
"""
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import einops
|
||||||
|
|
||||||
|
|
||||||
|
class EfficientAdditiveAttnetion(nn.Module):
|
||||||
|
"""
|
||||||
|
Efficient Additive Attention module for SwiftFormer.
|
||||||
|
Input: tensor in shape [B, N, D]
|
||||||
|
Output: tensor in shape [B, N, D]
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, in_dims=512, token_dim=256, num_heads=2):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.to_query = nn.Linear(in_dims, token_dim * num_heads)
|
||||||
|
self.to_key = nn.Linear(in_dims, token_dim * num_heads)
|
||||||
|
|
||||||
|
self.w_g = nn.Parameter(torch.randn(token_dim * num_heads, 1))
|
||||||
|
self.scale_factor = token_dim ** -0.5
|
||||||
|
self.Proj = nn.Linear(token_dim * num_heads, token_dim * num_heads)
|
||||||
|
self.final = nn.Linear(token_dim * num_heads, token_dim)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
query = self.to_query(x)
|
||||||
|
key = self.to_key(x)
|
||||||
|
|
||||||
|
query = torch.nn.functional.normalize(query, dim=-1) # BxNxD
|
||||||
|
key = torch.nn.functional.normalize(key, dim=-1) # BxNxD
|
||||||
|
|
||||||
|
query_weight = query @ self.w_g # BxNx1 (BxNxD @ Dx1)
|
||||||
|
A = query_weight * self.scale_factor # BxNx1
|
||||||
|
|
||||||
|
A = torch.nn.functional.normalize(A, dim=1) # BxNx1
|
||||||
|
|
||||||
|
G = torch.sum(A * query, dim=1) # BxD
|
||||||
|
|
||||||
|
G = einops.repeat(
|
||||||
|
G, "b d -> b repeat d", repeat=key.shape[1]
|
||||||
|
) # BxNxD
|
||||||
|
|
||||||
|
out = self.Proj(G * key) + query # BxNxD
|
||||||
|
|
||||||
|
out = self.final(out) # BxNxD
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
# 输入 B N C , 输出 B N C
|
||||||
|
if __name__ == '__main__':
|
||||||
|
block = EfficientAdditiveAttnetion(64, 32).cuda()
|
||||||
|
input = torch.rand(3, 64 * 64, 64).cuda()
|
||||||
|
output = block(input)
|
||||||
|
print(input.size(), output.size())
|
Binary file not shown.
|
@ -0,0 +1,57 @@
|
||||||
|
# https://github.com/zcablii/Large-Selective-Kernel-Network
|
||||||
|
|
||||||
|
"""
|
||||||
|
以下是该模块的主要组件和操作:
|
||||||
|
|
||||||
|
conv0:这是一个深度可分离卷积层,使用 5x5 的卷积核进行卷积操作,groups=dim 意味着将输入的每个通道分为一组进行卷积操作。这一步旨在捕获输入中的空间特征。
|
||||||
|
|
||||||
|
conv_spatial:这是另一个深度可分离卷积层,使用 7x7 的卷积核进行卷积操作,stride=1 表示步幅为 1,padding=9 用于零填充操作,groups=dim 表示将输入的每个通道分为一组进行卷积操作,并且通过 dilation=3 进行扩张卷积。这一步旨在捕获输入中的更大范围的空间特征。
|
||||||
|
|
||||||
|
conv1 和 conv2:这是两个 1x1 的卷积层,用于降低通道数,将输入的通道数减少到 dim // 2。这两个卷积层分别应用于 conv0 和 conv_spatial 的输出。
|
||||||
|
|
||||||
|
conv_squeeze:这是一个 7x7 的卷积层,用于进行通道维度的压缩,将输入通道的数量从 2 降低到 2,通过 sigmoid 函数将输出的值缩放到 (0, 1) 范围内。
|
||||||
|
|
||||||
|
conv:这是一个 1x1 的卷积层,用于将通道数从 dim // 2 恢复到 dim,最终的输出通道数与输入的通道数相同。
|
||||||
|
|
||||||
|
在前向传播过程中,该模块通过一系列卷积操作将输入的特征图进行加权,其中使用了 sigmoid 权重来调整不同部分的注意力。最终输出的特征图是输入特征图乘以注意力加权的结果。
|
||||||
|
|
||||||
|
这个 LSKblock 模块的目的是引入空间和通道注意力,以更好地捕获输入特征图中的重要信息。
|
||||||
|
"""
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
|
||||||
|
class LSKblock(nn.Module):
|
||||||
|
def __init__(self, dim):
|
||||||
|
super().__init__()
|
||||||
|
self.conv0 = nn.Conv2d(dim, dim, 5, padding=2, groups=dim)
|
||||||
|
self.conv_spatial = nn.Conv2d(dim, dim, 7, stride=1, padding=9, groups=dim, dilation=3)
|
||||||
|
self.conv1 = nn.Conv2d(dim, dim // 2, 1)
|
||||||
|
self.conv2 = nn.Conv2d(dim, dim // 2, 1)
|
||||||
|
self.conv_squeeze = nn.Conv2d(2, 2, 7, padding=3)
|
||||||
|
self.conv = nn.Conv2d(dim // 2, dim, 1)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
attn1 = self.conv0(x)
|
||||||
|
attn2 = self.conv_spatial(attn1)
|
||||||
|
|
||||||
|
attn1 = self.conv1(attn1)
|
||||||
|
attn2 = self.conv2(attn2)
|
||||||
|
|
||||||
|
attn = torch.cat([attn1, attn2], dim=1)
|
||||||
|
avg_attn = torch.mean(attn, dim=1, keepdim=True)
|
||||||
|
max_attn, _ = torch.max(attn, dim=1, keepdim=True)
|
||||||
|
agg = torch.cat([avg_attn, max_attn], dim=1)
|
||||||
|
sig = self.conv_squeeze(agg).sigmoid()
|
||||||
|
attn = attn1 * sig[:, 0, :, :].unsqueeze(1) + attn2 * sig[:, 1, :, :].unsqueeze(1)
|
||||||
|
attn = self.conv(attn)
|
||||||
|
return x * attn
|
||||||
|
|
||||||
|
|
||||||
|
# 输入 N C H W, 输出 N C H W
|
||||||
|
if __name__ == '__main__':
|
||||||
|
block = LSKblock(64).cuda()
|
||||||
|
input = torch.rand(1, 64, 64, 64).cuda()
|
||||||
|
output = block(input)
|
||||||
|
print(input.size(), output.size())
|
Binary file not shown.
|
@ -0,0 +1,110 @@
|
||||||
|
# https://www.haoranyou.com/castling-vit/
|
||||||
|
|
||||||
|
"""
|
||||||
|
以下是该模块的主要组件和操作:
|
||||||
|
|
||||||
|
qkv:这是一个线性层,将输入特征 x 映射到三个不同的线性变换,分别对应查询 (query),键 (key),和值 (value)。这三个变换将输入特征的通道划分成多个头 (heads)。
|
||||||
|
|
||||||
|
attn_drop 和 proj_drop:这是用于进行注意力矩阵和输出特征的丢弃操作的 Dropout 层。
|
||||||
|
|
||||||
|
kq_matmul、kqv_matmul 和 qk_matmul:这些是自定义的矩阵乘法操作,用于计算注意力矩阵中的各个部分。kq_matmul 用于计算键和查询的点积,kqv_matmul 用于计算键和值的点积,qk_matmul 用于计算查询和键的点积。
|
||||||
|
|
||||||
|
dconv:这是一个深度卷积层,用于对值进行深度卷积操作。
|
||||||
|
|
||||||
|
在前向传播过程中,该模块首先将输入特征 x 映射为查询、键和值。然后,通过上述矩阵乘法操作,计算注意力矩阵的各个部分。接下来,对查询和键进行标准化处理,并计算值的深度卷积。最后,根据注意力矩阵和深度卷积的结果,计算最终的输出特征。
|
||||||
|
|
||||||
|
此模块实现了线性角注意力机制,可用于处理序列或图像数据中的信息交互和特征提取任务。该模块的参数配置如 num_heads、qkv_bias、attn_drop 等可以根据具体任务进行调整。
|
||||||
|
"""
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import math
|
||||||
|
|
||||||
|
|
||||||
|
class MatMul(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(MatMul, self).__init__()
|
||||||
|
|
||||||
|
def forward(self, x, y):
|
||||||
|
return torch.matmul(x, y)
|
||||||
|
|
||||||
|
class LinAngularAttention(nn.Module):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
in_channels,
|
||||||
|
num_heads=8,
|
||||||
|
qkv_bias=False,
|
||||||
|
attn_drop=0.0,
|
||||||
|
proj_drop=0.0,
|
||||||
|
res_kernel_size=9,
|
||||||
|
sparse_reg=False,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
assert in_channels % num_heads == 0, "dim should be divisible by num_heads"
|
||||||
|
self.num_heads = num_heads
|
||||||
|
head_dim = in_channels // num_heads
|
||||||
|
self.scale = head_dim**-0.5
|
||||||
|
self.sparse_reg = sparse_reg
|
||||||
|
|
||||||
|
self.qkv = nn.Linear(in_channels, in_channels * 3, bias=qkv_bias)
|
||||||
|
self.attn_drop = nn.Dropout(attn_drop)
|
||||||
|
self.proj = nn.Linear(in_channels, in_channels)
|
||||||
|
self.proj_drop = nn.Dropout(proj_drop)
|
||||||
|
|
||||||
|
self.kq_matmul = MatMul()
|
||||||
|
self.kqv_matmul = MatMul()
|
||||||
|
if self.sparse_reg:
|
||||||
|
self.qk_matmul = MatMul()
|
||||||
|
self.sv_matmul = MatMul()
|
||||||
|
|
||||||
|
self.dconv = nn.Conv2d(
|
||||||
|
in_channels=self.num_heads,
|
||||||
|
out_channels=self.num_heads,
|
||||||
|
kernel_size=(res_kernel_size, 1),
|
||||||
|
padding=(res_kernel_size // 2, 0),
|
||||||
|
bias=False,
|
||||||
|
groups=self.num_heads,
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
N, L, C = x.shape
|
||||||
|
qkv = (
|
||||||
|
self.qkv(x)
|
||||||
|
.reshape(N, L, 3, self.num_heads, C // self.num_heads)
|
||||||
|
.permute(2, 0, 3, 1, 4)
|
||||||
|
)
|
||||||
|
q, k, v = qkv.unbind(0) # make torchscript happy (cannot use tensor as tuple)
|
||||||
|
|
||||||
|
if self.sparse_reg:
|
||||||
|
attn = self.qk_matmul(q * self.scale, k.transpose(-2, -1))
|
||||||
|
attn = attn.softmax(dim=-1)
|
||||||
|
mask = attn > 0.02 # note that the threshold could be different; adapt to your codebases.
|
||||||
|
sparse = mask * attn
|
||||||
|
|
||||||
|
q = q / q.norm(dim=-1, keepdim=True)
|
||||||
|
k = k / k.norm(dim=-1, keepdim=True)
|
||||||
|
dconv_v = self.dconv(v)
|
||||||
|
|
||||||
|
attn = self.kq_matmul(k.transpose(-2, -1), v)
|
||||||
|
|
||||||
|
if self.sparse_reg:
|
||||||
|
x = (
|
||||||
|
self.sv_matmul(sparse, v)
|
||||||
|
+ 0.5 * v
|
||||||
|
+ 1.0 / math.pi * self.kqv_matmul(q, attn)
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
x = 0.5 * v + 1.0 / math.pi * self.kqv_matmul(q, attn)
|
||||||
|
x = x / x.norm(dim=-1, keepdim=True)
|
||||||
|
x += dconv_v
|
||||||
|
x = x.transpose(1, 2).reshape(N, L, C)
|
||||||
|
x = self.proj(x)
|
||||||
|
x = self.proj_drop(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
block = LinAngularAttention(in_channels=128)
|
||||||
|
input = torch.rand(32,784,128)
|
||||||
|
output = block(input)
|
||||||
|
print(input.size(), output.size())
|
Binary file not shown.
|
@ -0,0 +1,119 @@
|
||||||
|
# https://github.com/lancopku/MUSE
|
||||||
|
|
||||||
|
"""
|
||||||
|
以下是该模块的主要组件和操作:
|
||||||
|
|
||||||
|
多头自注意力:通过输入的queries、keys和values,首先使用线性变换(fc_q, fc_k和fc_v)将它们映射到不同的子空间,然后计算多头自注意力得分,并使用softmax函数进行归一化。最后,使用这些得分加权values以获得最终的输出。
|
||||||
|
|
||||||
|
动态参数的卷积融合:在多头自注意力的输出上应用卷积操作,这些卷积操作具有不同的kernel_size(1、3和5),并使用动态参数(dy_paras)来决定它们的权重。这样,可以通过调整这些参数来动态控制不同kernel_size的卷积操作的贡献。
|
||||||
|
|
||||||
|
初始化权重:通过init_weights方法来初始化模块中的权重。
|
||||||
|
|
||||||
|
前向传播:根据输入的queries、keys、values以及可选的注意力掩码(attention_mask)和注意力权重(attention_weights),计算多头自注意力的输出,并与动态参数的卷积融合的结果相加以获得最终输出。
|
||||||
|
"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from torch.nn import init
|
||||||
|
|
||||||
|
|
||||||
|
class Depth_Pointwise_Conv1d(nn.Module):
|
||||||
|
def __init__(self, in_ch, out_ch, k):
|
||||||
|
super().__init__()
|
||||||
|
if (k == 1):
|
||||||
|
self.depth_conv = nn.Identity()
|
||||||
|
else:
|
||||||
|
self.depth_conv = nn.Conv1d(
|
||||||
|
in_channels=in_ch,
|
||||||
|
out_channels=in_ch,
|
||||||
|
kernel_size=k,
|
||||||
|
groups=in_ch,
|
||||||
|
padding=k // 2
|
||||||
|
)
|
||||||
|
self.pointwise_conv = nn.Conv1d(
|
||||||
|
in_channels=in_ch,
|
||||||
|
out_channels=out_ch,
|
||||||
|
kernel_size=1,
|
||||||
|
groups=1
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
out = self.pointwise_conv(self.depth_conv(x))
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class MUSEAttention(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, d_model, d_k, d_v, h, dropout=.1):
|
||||||
|
|
||||||
|
super(MUSEAttention, self).__init__()
|
||||||
|
self.fc_q = nn.Linear(d_model, h * d_k)
|
||||||
|
self.fc_k = nn.Linear(d_model, h * d_k)
|
||||||
|
self.fc_v = nn.Linear(d_model, h * d_v)
|
||||||
|
self.fc_o = nn.Linear(h * d_v, d_model)
|
||||||
|
self.dropout = nn.Dropout(dropout)
|
||||||
|
|
||||||
|
self.conv1 = Depth_Pointwise_Conv1d(h * d_v, d_model, 1)
|
||||||
|
self.conv3 = Depth_Pointwise_Conv1d(h * d_v, d_model, 3)
|
||||||
|
self.conv5 = Depth_Pointwise_Conv1d(h * d_v, d_model, 5)
|
||||||
|
self.dy_paras = nn.Parameter(torch.ones(3))
|
||||||
|
self.softmax = nn.Softmax(-1)
|
||||||
|
|
||||||
|
self.d_model = d_model
|
||||||
|
self.d_k = d_k
|
||||||
|
self.d_v = d_v
|
||||||
|
self.h = h
|
||||||
|
|
||||||
|
self.init_weights()
|
||||||
|
|
||||||
|
def init_weights(self):
|
||||||
|
for m in self.modules():
|
||||||
|
if isinstance(m, nn.Conv2d):
|
||||||
|
init.kaiming_normal_(m.weight, mode='fan_out')
|
||||||
|
if m.bias is not None:
|
||||||
|
init.constant_(m.bias, 0)
|
||||||
|
elif isinstance(m, nn.BatchNorm2d):
|
||||||
|
init.constant_(m.weight, 1)
|
||||||
|
init.constant_(m.bias, 0)
|
||||||
|
elif isinstance(m, nn.Linear):
|
||||||
|
init.normal_(m.weight, std=0.001)
|
||||||
|
if m.bias is not None:
|
||||||
|
init.constant_(m.bias, 0)
|
||||||
|
|
||||||
|
def forward(self, queries, keys, values, attention_mask=None, attention_weights=None):
|
||||||
|
|
||||||
|
# Self Attention
|
||||||
|
b_s, nq = queries.shape[:2]
|
||||||
|
nk = keys.shape[1]
|
||||||
|
|
||||||
|
q = self.fc_q(queries).view(b_s, nq, self.h, self.d_k).permute(0, 2, 1, 3) # (b_s, h, nq, d_k)
|
||||||
|
k = self.fc_k(keys).view(b_s, nk, self.h, self.d_k).permute(0, 2, 3, 1) # (b_s, h, d_k, nk)
|
||||||
|
v = self.fc_v(values).view(b_s, nk, self.h, self.d_v).permute(0, 2, 1, 3) # (b_s, h, nk, d_v)
|
||||||
|
|
||||||
|
att = torch.matmul(q, k) / np.sqrt(self.d_k) # (b_s, h, nq, nk)
|
||||||
|
if attention_weights is not None:
|
||||||
|
att = att * attention_weights
|
||||||
|
if attention_mask is not None:
|
||||||
|
att = att.masked_fill(attention_mask, -np.inf)
|
||||||
|
att = torch.softmax(att, -1)
|
||||||
|
att = self.dropout(att)
|
||||||
|
|
||||||
|
out = torch.matmul(att, v).permute(0, 2, 1, 3).contiguous().view(b_s, nq, self.h * self.d_v) # (b_s, nq, h*d_v)
|
||||||
|
out = self.fc_o(out) # (b_s, nq, d_model)
|
||||||
|
|
||||||
|
v2 = v.permute(0, 1, 3, 2).contiguous().view(b_s, -1, nk) # bs,dim,n
|
||||||
|
self.dy_paras = nn.Parameter(self.softmax(self.dy_paras))
|
||||||
|
out2 = self.dy_paras[0] * self.conv1(v2) + self.dy_paras[1] * self.conv3(v2) + self.dy_paras[2] * self.conv5(v2)
|
||||||
|
out2 = out2.permute(0, 2, 1) # bs.n.dim
|
||||||
|
|
||||||
|
out = out + out2
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
block = MUSEAttention(d_model=256, d_k=256, d_v=256, h=256).cuda()
|
||||||
|
# input = torch.rand(64, 64, 512).cuda()
|
||||||
|
input = torch.rand(1, 128, 256, 256).cuda()
|
||||||
|
output = block(input, input, input)
|
||||||
|
print(input.size(), output.size())
|
Binary file not shown.
|
@ -0,0 +1,75 @@
|
||||||
|
# https://github.com/apple/ml-cvnets
|
||||||
|
|
||||||
|
"""
|
||||||
|
以下是该模块的主要组件和操作:
|
||||||
|
|
||||||
|
自注意力计算:使用线性变换(fc_i, fc_k, fc_v和fc_o)将输入映射到不同的子空间,并计算权重(weight_i)来为每个查询分配注意力权重。注意力权重通过对fc_i的输出进行softmax操作得到,然后用于加权fc_k(input)的输出,得到context_score。接下来,通过对context_score进行求和,以获得一个上下文向量(context_vector),该向量用于加权fc_v(input)的输出。最后,对v进行线性变换(fc_o)以获得最终的输出。
|
||||||
|
|
||||||
|
初始化权重:通过init_weights方法来初始化模块中的权重。
|
||||||
|
|
||||||
|
前向传播:根据输入执行自注意力计算,返回计算得到的注意力输出。
|
||||||
|
"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from torch.nn import init
|
||||||
|
|
||||||
|
|
||||||
|
class MobileViTv2Attention(nn.Module):
|
||||||
|
'''
|
||||||
|
Scaled dot-product attention
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, d_model):
|
||||||
|
'''
|
||||||
|
:param d_model: Output dimensionality of the model
|
||||||
|
:param d_k: Dimensionality of queries and keys
|
||||||
|
:param d_v: Dimensionality of values
|
||||||
|
:param h: Number of heads
|
||||||
|
'''
|
||||||
|
super(MobileViTv2Attention, self).__init__()
|
||||||
|
self.fc_i = nn.Linear(d_model, 1)
|
||||||
|
self.fc_k = nn.Linear(d_model, d_model)
|
||||||
|
self.fc_v = nn.Linear(d_model, d_model)
|
||||||
|
self.fc_o = nn.Linear(d_model, d_model)
|
||||||
|
|
||||||
|
self.d_model = d_model
|
||||||
|
self.init_weights()
|
||||||
|
|
||||||
|
def init_weights(self):
|
||||||
|
for m in self.modules():
|
||||||
|
if isinstance(m, nn.Conv2d):
|
||||||
|
init.kaiming_normal_(m.weight, mode='fan_out')
|
||||||
|
if m.bias is not None:
|
||||||
|
init.constant_(m.bias, 0)
|
||||||
|
elif isinstance(m, nn.BatchNorm2d):
|
||||||
|
init.constant_(m.weight, 1)
|
||||||
|
init.constant_(m.bias, 0)
|
||||||
|
elif isinstance(m, nn.Linear):
|
||||||
|
init.normal_(m.weight, std=0.001)
|
||||||
|
if m.bias is not None:
|
||||||
|
init.constant_(m.bias, 0)
|
||||||
|
|
||||||
|
def forward(self, input):
|
||||||
|
'''
|
||||||
|
Computes
|
||||||
|
:param queries: Queries (b_s, nq, d_model)
|
||||||
|
:return:
|
||||||
|
'''
|
||||||
|
i = self.fc_i(input) # (bs,nq,1)
|
||||||
|
weight_i = torch.softmax(i, dim=1) # bs,nq,1
|
||||||
|
context_score = weight_i * self.fc_k(input) # bs,nq,d_model
|
||||||
|
context_vector = torch.sum(context_score, dim=1, keepdim=True) # bs,1,d_model
|
||||||
|
v = self.fc_v(input) * context_vector # bs,nq,d_model
|
||||||
|
out = self.fc_o(v) # bs,nq,d_model
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
block = MobileViTv2Attention(d_model=256)
|
||||||
|
# input = torch.rand(64, 64, 512).cuda()
|
||||||
|
input = torch.rand(1, 128, 256, 256)
|
||||||
|
output = block(input)
|
||||||
|
print(input.size(), output.size())
|
Binary file not shown.
|
@ -0,0 +1,89 @@
|
||||||
|
# https://github.com/sail-sg/volo
|
||||||
|
|
||||||
|
"""
|
||||||
|
以下是该模块的主要组件和操作:
|
||||||
|
|
||||||
|
v_pj:通过线性变换将输入特征映射到新的特征空间,以产生 v。
|
||||||
|
|
||||||
|
attn:通过线性变换将输入图像的局部区域映射到注意力得分的空间。这个得分表示局部区域的重要性。
|
||||||
|
|
||||||
|
attn_drop:一个用于应用注意力得分的丢弃层,以防止过度拟合。
|
||||||
|
|
||||||
|
proj 和 proj_drop:用于最终输出的线性变换和丢弃层。
|
||||||
|
|
||||||
|
unflod:一个用于手动卷积的操作,将 v 特征张量按指定的 kernel_size、padding 和 stride 进行展开。
|
||||||
|
|
||||||
|
pool:用于在输入图像上执行平均池化,以减小图像尺寸。
|
||||||
|
|
||||||
|
在前向传播中,模块首先将输入图像的局部区域映射到 v 特征空间,然后计算注意力得分。注意力得分被应用于 v 特征以获得加权特征表示。最后,通过线性变换和丢弃层来进一步处理特征表示,以产生最终的输出。
|
||||||
|
|
||||||
|
这个模块的主要用途是捕获输入图像的局部信息,并根据局部区域的重要性来加权特征表示。这对于各种计算机视觉任务,如图像分类和分割,可能都会有所帮助。
|
||||||
|
"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from torch.nn import init
|
||||||
|
import math
|
||||||
|
from torch.nn import functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class OutlookAttention(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, dim, num_heads=1, kernel_size=3, padding=1, stride=1, qkv_bias=False,
|
||||||
|
attn_drop=0.1):
|
||||||
|
super().__init__()
|
||||||
|
self.dim = dim
|
||||||
|
self.num_heads = num_heads
|
||||||
|
self.head_dim = dim // num_heads
|
||||||
|
self.kernel_size = kernel_size
|
||||||
|
self.padding = padding
|
||||||
|
self.stride = stride
|
||||||
|
self.scale = self.head_dim ** (-0.5)
|
||||||
|
|
||||||
|
self.v_pj = nn.Linear(dim, dim, bias=qkv_bias)
|
||||||
|
self.attn = nn.Linear(dim, kernel_size ** 4 * num_heads)
|
||||||
|
|
||||||
|
self.attn_drop = nn.Dropout(attn_drop)
|
||||||
|
self.proj = nn.Linear(dim, dim)
|
||||||
|
self.proj_drop = nn.Dropout(attn_drop)
|
||||||
|
|
||||||
|
self.unflod = nn.Unfold(kernel_size, padding, stride) # 手动卷积
|
||||||
|
self.pool = nn.AvgPool2d(kernel_size=stride, stride=stride, ceil_mode=True)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
B, H, W, C = x.shape
|
||||||
|
|
||||||
|
# 映射到新的特征v
|
||||||
|
v = self.v_pj(x).permute(0, 3, 1, 2) # B,C,H,W
|
||||||
|
h, w = math.ceil(H / self.stride), math.ceil(W / self.stride)
|
||||||
|
v = self.unflod(v).reshape(B, self.num_heads, self.head_dim, self.kernel_size * self.kernel_size,
|
||||||
|
h * w).permute(0, 1, 4, 3, 2) # B,num_head,H*W,kxk,head_dim
|
||||||
|
|
||||||
|
# 生成Attention Map
|
||||||
|
attn = self.pool(x.permute(0, 3, 1, 2)).permute(0, 2, 3, 1) # B,H,W,C
|
||||||
|
attn = self.attn(attn).reshape(B, h * w, self.num_heads, self.kernel_size * self.kernel_size \
|
||||||
|
, self.kernel_size * self.kernel_size).permute(0, 2, 1, 3,
|
||||||
|
4) # B,num_head,H*W,kxk,kxk
|
||||||
|
attn = self.scale * attn
|
||||||
|
attn = attn.softmax(-1)
|
||||||
|
attn = self.attn_drop(attn)
|
||||||
|
|
||||||
|
# 获取weighted特征
|
||||||
|
out = (attn @ v).permute(0, 1, 4, 3, 2).reshape(B, C * self.kernel_size * self.kernel_size,
|
||||||
|
h * w) # B,dimxkxk,H*W
|
||||||
|
out = F.fold(out, output_size=(H, W), kernel_size=self.kernel_size,
|
||||||
|
padding=self.padding, stride=self.stride) # B,C,H,W
|
||||||
|
out = self.proj(out.permute(0, 2, 3, 1)) # B,H,W,C
|
||||||
|
out = self.proj_drop(out)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
# 输入 B, H, W, C, 输出 B, H, W, C
|
||||||
|
if __name__ == '__main__':
|
||||||
|
block = OutlookAttention(dim=256).cuda()
|
||||||
|
# input = torch.rand(1, 64, 64, 512).cuda()
|
||||||
|
input = torch.rand(1, 128, 256, 256).cuda()
|
||||||
|
output = block(input)
|
||||||
|
print(input.size(), output.size())
|
Binary file not shown.
|
@ -0,0 +1,73 @@
|
||||||
|
# https://github.com/imankgoyal/NonDeepNetworks
|
||||||
|
|
||||||
|
"""
|
||||||
|
模块包括以下组件:
|
||||||
|
|
||||||
|
sse(Squeeze-and-Excitation)模块:
|
||||||
|
|
||||||
|
通过自适应平均池化将输入张量池化到大小为 1x1。
|
||||||
|
然后使用一个具有相同通道数的卷积层,产生一组注意力权重,这些权重通过 Sigmoid 激活函数进行缩放。
|
||||||
|
这些注意力权重用于对输入特征进行加权,以突出重要的特征。
|
||||||
|
conv1x1 和 conv3x3 模块:
|
||||||
|
|
||||||
|
conv1x1 是一个1x1卷积层,用于捕捉输入的全局信息。
|
||||||
|
conv3x3 是一个3x3卷积层,用于捕捉局部信息。
|
||||||
|
两者都后跟批归一化层以稳定训练。
|
||||||
|
silu 激活函数:
|
||||||
|
|
||||||
|
Silu(或Swish)激活函数是一种非线性激活函数,它将输入映射到一个非线性范围内。
|
||||||
|
在前向传播中,输入张量 x 通过这些组件,最终输出特征张量 y。这个模块旨在提高神经网络的特征表示能力,通过不同尺度的特征融合和注意力加权来捕获全局和局部信息。
|
||||||
|
"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from torch.nn import init
|
||||||
|
|
||||||
|
|
||||||
|
from einops import rearrange
|
||||||
|
|
||||||
|
def to_3d(x):
|
||||||
|
return rearrange(x, 'b c h w -> b (h w) c')
|
||||||
|
|
||||||
|
def to_4d(x,h,w):
|
||||||
|
return rearrange(x, 'b (h w) c -> b c h w',h=h,w=w)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ParNetAttention(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, channel=512):
|
||||||
|
super().__init__()
|
||||||
|
self.sse = nn.Sequential(
|
||||||
|
nn.AdaptiveAvgPool2d(1),
|
||||||
|
nn.Conv2d(channel, channel, kernel_size=1),
|
||||||
|
nn.Sigmoid()
|
||||||
|
)
|
||||||
|
|
||||||
|
self.conv1x1 = nn.Sequential(
|
||||||
|
nn.Conv2d(channel, channel, kernel_size=1),
|
||||||
|
nn.BatchNorm2d(channel)
|
||||||
|
)
|
||||||
|
self.conv3x3 = nn.Sequential(
|
||||||
|
nn.Conv2d(channel, channel, kernel_size=3, padding=1),
|
||||||
|
nn.BatchNorm2d(channel)
|
||||||
|
)
|
||||||
|
self.silu = nn.SiLU()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
b, c, _, _ = x.size()
|
||||||
|
x1 = self.conv1x1(x)
|
||||||
|
x2 = self.conv3x3(x)
|
||||||
|
x3 = self.sse(x) * x
|
||||||
|
y = self.silu(x1 + x2 + x3)
|
||||||
|
return y
|
||||||
|
|
||||||
|
|
||||||
|
# 输入 N C H W, 输出 N C H W
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# input = torch.randn(3, 512, 7, 7).cuda()
|
||||||
|
input = torch.randn(1, 128, 256, 256).cuda()
|
||||||
|
pna = ParNetAttention(channel=128).cuda()
|
||||||
|
output = pna(input)
|
||||||
|
print(output.shape)
|
Binary file not shown.
|
@ -0,0 +1,70 @@
|
||||||
|
# https://github.com/JierunChen/FasterNet
|
||||||
|
|
||||||
|
"""
|
||||||
|
这个代码实现了一个名为Partial_conv3的自定义卷积模块,它根据参数的不同执行不同的操作。这个模块的主要特点如下:
|
||||||
|
|
||||||
|
部分卷积操作:这个模块使用了一个nn.Conv2d的部分卷积操作,其中dim_conv3表示卷积操作的输出通道数,通常是输入通道数dim的一部分。这部分卷积操作在输入图像的特定通道上执行。
|
||||||
|
|
||||||
|
前向传播策略:这个模块可以采用两种不同的前向传播策略,具体取决于forward参数的设置:
|
||||||
|
|
||||||
|
'slicing':在前向传播时,仅对输入张量的部分通道进行部分卷积操作。这对应于仅在推理时使用部分卷积。
|
||||||
|
'split_cat':在前向传播时,将输入张量分为两部分,其中一部分进行部分卷积操作,然后将两部分重新连接。这对应于在训练和推理过程中都使用部分卷积。
|
||||||
|
部分卷积操作的应用:部分卷积操作被用于输入张量的部分通道上,而保持其他通道不变。这有助于模型有选择性地应用卷积操作到特定通道上,从而可以灵活地控制特征的提取和传播。
|
||||||
|
|
||||||
|
残差连接:在部分卷积操作之后,模块保留了未经处理的部分通道,然后将两部分连接起来,以保持输入和输出的通道数一致,以便与其他模块连接。
|
||||||
|
|
||||||
|
总的来说,Partial_conv3模块提供了一种自定义卷积策略,可以根据应用的需要选择性地应用卷积操作到输入图像的特定通道上。这种模块可以用于特征选择、通道交互等任务,增加了神经网络的灵活性。
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
from torch import nn
|
||||||
|
import torch
|
||||||
|
|
||||||
|
from einops.einops import rearrange
|
||||||
|
|
||||||
|
def to_3d(x):
|
||||||
|
return rearrange(x, 'b c h w -> b (h w) c')
|
||||||
|
|
||||||
|
def to_4d(x, h, w):
|
||||||
|
return rearrange(x, 'b (h w) c -> b c h w', h=h, w=w)
|
||||||
|
|
||||||
|
|
||||||
|
class Partial_conv3(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, dim, n_div, forward):
|
||||||
|
super().__init__()
|
||||||
|
self.dim_conv3 = dim // n_div
|
||||||
|
self.dim_untouched = dim - self.dim_conv3
|
||||||
|
self.partial_conv3 = nn.Conv2d(self.dim_conv3, self.dim_conv3, 3, 1, 1, bias=False)
|
||||||
|
|
||||||
|
if forward == 'slicing':
|
||||||
|
self.forward = self.forward_slicing
|
||||||
|
elif forward == 'split_cat':
|
||||||
|
self.forward = self.forward_split_cat
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def forward_slicing(self, x):
|
||||||
|
# only for inference
|
||||||
|
x = x.clone() # !!! Keep the original input intact for the residual connection later
|
||||||
|
x[:, :self.dim_conv3, :, :] = self.partial_conv3(x[:, :self.dim_conv3, :, :])
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
def forward_split_cat(self, x):
|
||||||
|
# for training/inference
|
||||||
|
x1, x2 = torch.split(x, [self.dim_conv3, self.dim_untouched], dim=1)
|
||||||
|
x1 = self.partial_conv3(x1)
|
||||||
|
x = torch.cat((x1, x2), 1)
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
block = Partial_conv3(128, 2, 'split_cat')
|
||||||
|
input = torch.rand(32, 784, 128)
|
||||||
|
input = to_4d(input, 28, 28)
|
||||||
|
output = block(input)
|
||||||
|
output = to_3d(input)
|
||||||
|
print(input.size())
|
||||||
|
print(output.size())
|
Binary file not shown.
|
@ -0,0 +1,95 @@
|
||||||
|
# https://paperswithcode.com/paper/s-2-mlpv2-improved-spatial-shift-mlp
|
||||||
|
|
||||||
|
"""
|
||||||
|
SplitAttention:
|
||||||
|
|
||||||
|
这是一个分离式注意力(Split Attention)模块,用于增强神经网络的特征表示。
|
||||||
|
参数包括 channel(通道数)和 k(分离的注意力头数)。
|
||||||
|
在前向传播中,输入张量 x_all 被重塑为形状 (b, k, h*w, c),其中 b 是批次大小,k 是头数,h 和 w 是高度和宽度,c 是通道数。
|
||||||
|
然后,计算注意力的权重,通过 MLP 网络计算 hat_a,然后应用 softmax 函数得到 bar_a。
|
||||||
|
最后,将 bar_a 与输入张量 x_all 相乘,并对所有头的结果进行求和以获得最终的输出。
|
||||||
|
S2Attention:
|
||||||
|
|
||||||
|
这是一个基于Split Attention的注意力模块,用于处理输入张量。
|
||||||
|
参数包括 channels(通道数)。
|
||||||
|
在前向传播中,首先对输入张量进行线性变换,然后将结果分为三部分(x1、x2 和 x3)。
|
||||||
|
接下来,这三部分被传递给 SplitAttention 模块,以计算注意力权重并增强特征表示。
|
||||||
|
最后,通过另一个线性变换将注意力增强后的特征表示进行合并并返回。
|
||||||
|
这些模块可以用于构建神经网络中的不同层,以提高特征表示的性能和泛化能力。
|
||||||
|
"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from torch.nn import init
|
||||||
|
|
||||||
|
|
||||||
|
def spatial_shift1(x):
|
||||||
|
b, w, h, c = x.size()
|
||||||
|
x[:, 1:, :, :c // 4] = x[:, :w - 1, :, :c // 4]
|
||||||
|
x[:, :w - 1, :, c // 4:c // 2] = x[:, 1:, :, c // 4:c // 2]
|
||||||
|
x[:, :, 1:, c // 2:c * 3 // 4] = x[:, :, :h - 1, c // 2:c * 3 // 4]
|
||||||
|
x[:, :, :h - 1, 3 * c // 4:] = x[:, :, 1:, 3 * c // 4:]
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
def spatial_shift2(x):
|
||||||
|
b, w, h, c = x.size()
|
||||||
|
x[:, :, 1:, :c // 4] = x[:, :, :h - 1, :c // 4]
|
||||||
|
x[:, :, :h - 1, c // 4:c // 2] = x[:, :, 1:, c // 4:c // 2]
|
||||||
|
x[:, 1:, :, c // 2:c * 3 // 4] = x[:, :w - 1, :, c // 2:c * 3 // 4]
|
||||||
|
x[:, :w - 1, :, 3 * c // 4:] = x[:, 1:, :, 3 * c // 4:]
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class SplitAttention(nn.Module):
|
||||||
|
def __init__(self, channel=32, k=3):
|
||||||
|
super().__init__()
|
||||||
|
self.channel = channel
|
||||||
|
self.k = k
|
||||||
|
self.mlp1 = nn.Linear(channel, channel, bias=False)
|
||||||
|
self.gelu = nn.GELU()
|
||||||
|
self.mlp2 = nn.Linear(channel, channel * k, bias=False)
|
||||||
|
self.softmax = nn.Softmax(1)
|
||||||
|
|
||||||
|
def forward(self, x_all):
|
||||||
|
b, k, h, w, c = x_all.shape
|
||||||
|
x_all = x_all.reshape(b, k, -1, c) # bs,k,n,c
|
||||||
|
a = torch.sum(torch.sum(x_all, 1), 1) # bs,c
|
||||||
|
hat_a = self.mlp2(self.gelu(self.mlp1(a))) # bs,kc
|
||||||
|
hat_a = hat_a.reshape(b, self.k, c) # bs,k,c
|
||||||
|
bar_a = self.softmax(hat_a) # bs,k,c
|
||||||
|
attention = bar_a.unsqueeze(-2) # #bs,k,1,c
|
||||||
|
out = attention * x_all # #bs,k,n,c
|
||||||
|
out = torch.sum(out, 1).reshape(b, h, w, c)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class S2Attention(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, channels=32):
|
||||||
|
super().__init__()
|
||||||
|
self.mlp1 = nn.Linear(channels, channels * 3)
|
||||||
|
self.mlp2 = nn.Linear(channels, channels)
|
||||||
|
self.split_attention = SplitAttention()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
b, c, w, h = x.size()
|
||||||
|
x = x.permute(0, 2, 3, 1)
|
||||||
|
x = self.mlp1(x)
|
||||||
|
x1 = spatial_shift1(x[:, :, :, :c])
|
||||||
|
x2 = spatial_shift2(x[:, :, :, c:c * 2])
|
||||||
|
x3 = x[:, :, :, c * 2:]
|
||||||
|
x_all = torch.stack([x1, x2, x3], 1)
|
||||||
|
a = self.split_attention(x_all)
|
||||||
|
x = self.mlp2(a)
|
||||||
|
x = x.permute(0, 3, 1, 2)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
# 输入 N C H W, 输出 N C H W
|
||||||
|
if __name__ == '__main__':
|
||||||
|
input = torch.randn(64, 32, 7, 7)
|
||||||
|
s2att = S2Attention(channels=32)
|
||||||
|
output = s2att(input)
|
||||||
|
print(output.shape)
|
Binary file not shown.
|
@ -0,0 +1,85 @@
|
||||||
|
# https://github.com/implus/SKNet
|
||||||
|
|
||||||
|
"""
|
||||||
|
该模块的主要功能是对输入张量进行一系列卷积操作,然后计算不同卷积核的注意力权重,并将它们应用于输入的不同部分以生成最终的输出。以下是该模块的主要组件和步骤:
|
||||||
|
|
||||||
|
初始化:在初始化中,模块接受以下参数:
|
||||||
|
|
||||||
|
channel:输入通道数。
|
||||||
|
kernels:用于卷积操作的核大小列表。
|
||||||
|
reduction:通道减少比例,用于降低通道数。
|
||||||
|
group:卷积操作的分组数。
|
||||||
|
L:指定的参数,用于确定最大通道数的值。
|
||||||
|
在初始化过程中,模块创建了一系列卷积层、线性层和 Softmax 操作,以用于后续的计算。
|
||||||
|
|
||||||
|
前向传播:在前向传播过程中,模块执行以下步骤:
|
||||||
|
|
||||||
|
针对每个核大小,使用相应的卷积操作对输入进行卷积,并将卷积结果存储在列表 conv_outs 中。
|
||||||
|
将所有卷积结果叠加起来以生成 U,它代表了输入的融合表示。
|
||||||
|
对 U 进行平均池化,然后通过线性层将通道数减少到 d。
|
||||||
|
使用线性层计算不同卷积核的注意力权重,并将它们存储在列表 weights 中。
|
||||||
|
使用 Softmax 函数将注意力权重归一化。
|
||||||
|
将注意力权重应用于不同卷积核的特征表示,并对它们进行加权叠加,生成最终的输出张量 V。
|
||||||
|
最终,模块返回张量 V 作为输出。
|
||||||
|
|
||||||
|
这个模块的核心思想是在不同尺度的卷积核上计算注意力权重,以捕获输入的多尺度信息,然后将不同尺度的特征进行加权叠加以生成最终的输出。这可以增强模型对不同尺度物体的感知能力。
|
||||||
|
"""
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
|
||||||
|
class SKAttention(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, channel=512, kernels=[1, 3, 5, 7], reduction=16, group=1, L=32):
|
||||||
|
super().__init__()
|
||||||
|
self.d = max(L, channel // reduction)
|
||||||
|
self.convs = nn.ModuleList([])
|
||||||
|
for k in kernels:
|
||||||
|
self.convs.append(
|
||||||
|
nn.Sequential(OrderedDict([
|
||||||
|
('conv', nn.Conv2d(channel, channel, kernel_size=k, padding=k // 2, groups=group)),
|
||||||
|
('bn', nn.BatchNorm2d(channel)),
|
||||||
|
('relu', nn.ReLU())
|
||||||
|
]))
|
||||||
|
)
|
||||||
|
self.fc = nn.Linear(channel, self.d)
|
||||||
|
self.fcs = nn.ModuleList([])
|
||||||
|
for i in range(len(kernels)):
|
||||||
|
self.fcs.append(nn.Linear(self.d, channel))
|
||||||
|
self.softmax = nn.Softmax(dim=0)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
bs, c, _, _ = x.size()
|
||||||
|
conv_outs = []
|
||||||
|
### split
|
||||||
|
for conv in self.convs:
|
||||||
|
conv_outs.append(conv(x))
|
||||||
|
feats = torch.stack(conv_outs, 0) # k,bs,channel,h,w
|
||||||
|
|
||||||
|
### fuse
|
||||||
|
U = sum(conv_outs) # bs,c,h,w
|
||||||
|
|
||||||
|
### reduction channel
|
||||||
|
S = U.mean(-1).mean(-1) # bs,c
|
||||||
|
Z = self.fc(S) # bs,d
|
||||||
|
|
||||||
|
### calculate attention weight
|
||||||
|
weights = []
|
||||||
|
for fc in self.fcs:
|
||||||
|
weight = fc(Z)
|
||||||
|
weights.append(weight.view(bs, c, 1, 1)) # bs,channel
|
||||||
|
attention_weughts = torch.stack(weights, 0) # k,bs,channel,1,1
|
||||||
|
attention_weughts = self.softmax(attention_weughts) # k,bs,channel,1,1
|
||||||
|
|
||||||
|
### fuse
|
||||||
|
V = (attention_weughts * feats).sum(0)
|
||||||
|
return V
|
||||||
|
|
||||||
|
# 输入 N C H W, 输出 N C H W
|
||||||
|
if __name__ == '__main__':
|
||||||
|
input = torch.randn(50, 512, 7, 7)
|
||||||
|
se = SKAttention(channel=512, reduction=8)
|
||||||
|
output = se(input)
|
||||||
|
print(output.shape)
|
Binary file not shown.
|
@ -0,0 +1,189 @@
|
||||||
|
# https://github.com/hhb072/SViT
|
||||||
|
|
||||||
|
""""
|
||||||
|
以下是该模块的主要组件和功能:
|
||||||
|
|
||||||
|
Unfold 操作:Unfold 类定义了一个卷积操作,用于将输入图像进行解展开(unfolding)。具体来说,它将输入图像划分成不重叠的局部块,并将这些块展平成向量。这有助于在局部区域之间建立联系。
|
||||||
|
|
||||||
|
Fold 操作:Fold 类定义了一个卷积转置操作,用于将展开的局部块还原为原始的图像形状。这有助于将局部特征重新组合成图像。
|
||||||
|
|
||||||
|
Attention 操作:Attention 类定义了一个加性注意力机制,用于计算局部块之间的关联权重。通过对展开的局部块执行注意力操作,可以确定不同块之间的相关性,从而更好地捕获局部特征。
|
||||||
|
|
||||||
|
Stoken 操作:StokenAttention 类将图像划分为多个小块,并在这些小块之间执行加性注意力操作。它还包括对块之间的关系进行迭代更新的逻辑,以更好地捕获图像中的局部特征。
|
||||||
|
|
||||||
|
直接传递操作:direct_forward 方法用于直接传递输入图像,而不进行块划分和注意力操作。这对于某些情况下不需要局部特征建模的情况很有用。
|
||||||
|
|
||||||
|
Stoken 操作和直接传递操作的选择:根据 self.stoken_size 参数的设置,模块可以选择执行 Stoken 操作或直接传递操作。如果 self.stoken_size 的值大于 1,则执行 Stoken 操作,否则执行直接传递操作。
|
||||||
|
|
||||||
|
总的来说,这个模块提供了一种有效的方式来处理图像数据,并在图像的不同局部区域之间建立关联,以捕获局部特征。这对于许多计算机视觉任务,如目标检测和图像分割,都具有重要意义。
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
|
||||||
|
|
||||||
|
class Unfold(nn.Module):
|
||||||
|
def __init__(self, kernel_size=3):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.kernel_size = kernel_size
|
||||||
|
|
||||||
|
weights = torch.eye(kernel_size ** 2)
|
||||||
|
weights = weights.reshape(kernel_size ** 2, 1, kernel_size, kernel_size)
|
||||||
|
self.weights = nn.Parameter(weights, requires_grad=False)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
b, c, h, w = x.shape
|
||||||
|
x = F.conv2d(x.reshape(b * c, 1, h, w), self.weights, stride=1, padding=self.kernel_size // 2)
|
||||||
|
return x.reshape(b, c * 9, h * w)
|
||||||
|
|
||||||
|
|
||||||
|
class Fold(nn.Module):
|
||||||
|
def __init__(self, kernel_size=3):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.kernel_size = kernel_size
|
||||||
|
|
||||||
|
weights = torch.eye(kernel_size ** 2)
|
||||||
|
weights = weights.reshape(kernel_size ** 2, 1, kernel_size, kernel_size)
|
||||||
|
self.weights = nn.Parameter(weights, requires_grad=False)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
b, _, h, w = x.shape
|
||||||
|
x = F.conv_transpose2d(x, self.weights, stride=1, padding=self.kernel_size // 2)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class Attention(nn.Module):
|
||||||
|
def __init__(self, dim, window_size=None, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.dim = dim
|
||||||
|
self.num_heads = num_heads
|
||||||
|
head_dim = dim // num_heads
|
||||||
|
|
||||||
|
self.window_size = window_size
|
||||||
|
|
||||||
|
self.scale = qk_scale or head_dim ** -0.5
|
||||||
|
|
||||||
|
self.qkv = nn.Conv2d(dim, dim * 3, 1, bias=qkv_bias)
|
||||||
|
self.attn_drop = nn.Dropout(attn_drop)
|
||||||
|
self.proj = nn.Conv2d(dim, dim, 1)
|
||||||
|
self.proj_drop = nn.Dropout(proj_drop)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
B, C, H, W = x.shape
|
||||||
|
N = H * W
|
||||||
|
|
||||||
|
q, k, v = self.qkv(x).reshape(B, self.num_heads, C // self.num_heads * 3, N).chunk(3,
|
||||||
|
dim=2) # (B, num_heads, head_dim, N)
|
||||||
|
|
||||||
|
attn = (k.transpose(-1, -2) @ q) * self.scale
|
||||||
|
|
||||||
|
attn = attn.softmax(dim=-2) # (B, h, N, N)
|
||||||
|
attn = self.attn_drop(attn)
|
||||||
|
|
||||||
|
x = (v @ attn).reshape(B, C, H, W)
|
||||||
|
|
||||||
|
x = self.proj(x)
|
||||||
|
x = self.proj_drop(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class StokenAttention(nn.Module):
|
||||||
|
def __init__(self, dim, stoken_size, n_iter=1, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.,
|
||||||
|
proj_drop=0.):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.n_iter = n_iter
|
||||||
|
self.stoken_size = stoken_size
|
||||||
|
|
||||||
|
self.scale = dim ** - 0.5
|
||||||
|
|
||||||
|
self.unfold = Unfold(3)
|
||||||
|
self.fold = Fold(3)
|
||||||
|
|
||||||
|
self.stoken_refine = Attention(dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
|
||||||
|
attn_drop=attn_drop, proj_drop=proj_drop)
|
||||||
|
|
||||||
|
def stoken_forward(self, x):
|
||||||
|
'''
|
||||||
|
x: (B, C, H, W)
|
||||||
|
'''
|
||||||
|
B, C, H0, W0 = x.shape
|
||||||
|
h, w = self.stoken_size
|
||||||
|
|
||||||
|
pad_l = pad_t = 0
|
||||||
|
pad_r = (w - W0 % w) % w
|
||||||
|
pad_b = (h - H0 % h) % h
|
||||||
|
if pad_r > 0 or pad_b > 0:
|
||||||
|
x = F.pad(x, (pad_l, pad_r, pad_t, pad_b))
|
||||||
|
|
||||||
|
_, _, H, W = x.shape
|
||||||
|
|
||||||
|
hh, ww = H // h, W // w
|
||||||
|
|
||||||
|
stoken_features = F.adaptive_avg_pool2d(x, (hh, ww)) # (B, C, hh, ww)
|
||||||
|
|
||||||
|
pixel_features = x.reshape(B, C, hh, h, ww, w).permute(0, 2, 4, 3, 5, 1).reshape(B, hh * ww, h * w, C)
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
for idx in range(self.n_iter):
|
||||||
|
stoken_features = self.unfold(stoken_features) # (B, C*9, hh*ww)
|
||||||
|
stoken_features = stoken_features.transpose(1, 2).reshape(B, hh * ww, C, 9)
|
||||||
|
affinity_matrix = pixel_features @ stoken_features * self.scale # (B, hh*ww, h*w, 9)
|
||||||
|
|
||||||
|
affinity_matrix = affinity_matrix.softmax(-1) # (B, hh*ww, h*w, 9)
|
||||||
|
|
||||||
|
affinity_matrix_sum = affinity_matrix.sum(2).transpose(1, 2).reshape(B, 9, hh, ww)
|
||||||
|
|
||||||
|
affinity_matrix_sum = self.fold(affinity_matrix_sum)
|
||||||
|
if idx < self.n_iter - 1:
|
||||||
|
stoken_features = pixel_features.transpose(-1, -2) @ affinity_matrix # (B, hh*ww, C, 9)
|
||||||
|
|
||||||
|
stoken_features = self.fold(stoken_features.permute(0, 2, 3, 1).reshape(B * C, 9, hh, ww)).reshape(
|
||||||
|
B, C, hh, ww)
|
||||||
|
|
||||||
|
stoken_features = stoken_features / (affinity_matrix_sum + 1e-12) # (B, C, hh, ww)
|
||||||
|
|
||||||
|
stoken_features = pixel_features.transpose(-1, -2) @ affinity_matrix # (B, hh*ww, C, 9)
|
||||||
|
|
||||||
|
stoken_features = self.fold(stoken_features.permute(0, 2, 3, 1).reshape(B * C, 9, hh, ww)).reshape(B, C, hh, ww)
|
||||||
|
|
||||||
|
stoken_features = stoken_features / (affinity_matrix_sum.detach() + 1e-12) # (B, C, hh, ww)
|
||||||
|
|
||||||
|
stoken_features = self.stoken_refine(stoken_features)
|
||||||
|
|
||||||
|
stoken_features = self.unfold(stoken_features) # (B, C*9, hh*ww)
|
||||||
|
stoken_features = stoken_features.transpose(1, 2).reshape(B, hh * ww, C, 9) # (B, hh*ww, C, 9)
|
||||||
|
|
||||||
|
pixel_features = stoken_features @ affinity_matrix.transpose(-1, -2) # (B, hh*ww, C, h*w)
|
||||||
|
|
||||||
|
pixel_features = pixel_features.reshape(B, hh, ww, C, h, w).permute(0, 3, 1, 4, 2, 5).reshape(B, C, H, W)
|
||||||
|
|
||||||
|
if pad_r > 0 or pad_b > 0:
|
||||||
|
pixel_features = pixel_features[:, :, :H0, :W0]
|
||||||
|
|
||||||
|
return pixel_features
|
||||||
|
|
||||||
|
def direct_forward(self, x):
|
||||||
|
B, C, H, W = x.shape
|
||||||
|
stoken_features = x
|
||||||
|
stoken_features = self.stoken_refine(stoken_features)
|
||||||
|
return stoken_features
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
if self.stoken_size[0] > 1 or self.stoken_size[1] > 1:
|
||||||
|
return self.stoken_forward(x)
|
||||||
|
else:
|
||||||
|
return self.direct_forward(x)
|
||||||
|
|
||||||
|
|
||||||
|
# 输入 N C H W, 输出 N C H W
|
||||||
|
if __name__ == '__main__':
|
||||||
|
input = torch.randn(3, 64, 32, 64).cuda()
|
||||||
|
se = StokenAttention(64, stoken_size=[8,8]).cuda()
|
||||||
|
output = se(input)
|
||||||
|
print(output.shape)
|
Binary file not shown.
|
@ -0,0 +1,163 @@
|
||||||
|
# https://github.com/cheng-haha/ScConv
|
||||||
|
|
||||||
|
"""
|
||||||
|
GroupBatchnorm2d:
|
||||||
|
|
||||||
|
这是一个自定义的批量归一化(Batch Normalization)模块。
|
||||||
|
它支持将通道分组,即将通道分成多个组,每个组共享统计信息。
|
||||||
|
参数包括 c_num(通道数),group_num(分组数),和 eps(防止除以零的小值)。
|
||||||
|
在前向传播中,它首先将输入张量按组进行划分,并在每个组内计算均值和标准差,然后使用这些统计信息来对输入进行标准化。
|
||||||
|
SRU(Self-Reconstruction Unit):
|
||||||
|
|
||||||
|
这是一个自定义的模块,用于增强神经网络的特征表示。
|
||||||
|
参数包括 oup_channels(输出通道数),group_num(分组数),gate_treshold(门控阈值),和 torch_gn(是否使用PyTorch的GroupNorm)。
|
||||||
|
在前向传播中,它首先应用分组归一化(Group Normalization),然后通过门控机制(Gate)重新构造输入特征。
|
||||||
|
门控机制根据输入特征的分布和权重来决定哪些信息被保留,哪些信息被舍弃。
|
||||||
|
CRU(Channel Reorganization Unit):
|
||||||
|
|
||||||
|
这是一个自定义的通道重组模块,用于重新组织神经网络的通道。
|
||||||
|
参数包括 op_channel(输出通道数),alpha(通道划分比例),squeeze_radio(压缩比例),group_size(分组大小),和 group_kernel_size(分组卷积核大小)。
|
||||||
|
在前向传播中,它首先将输入通道分成两部分,然后对每部分进行压缩(squeeze)操作和分组卷积(Group Convolution)操作,最后将结果进行融合。
|
||||||
|
ScConv(Scale and Channel Convolution):
|
||||||
|
|
||||||
|
这是一个结合了SRU和CRU的模块,用于增强特征表示并进行通道重组。
|
||||||
|
参数包括 SRU 和 CRU 模块的参数。
|
||||||
|
在前向传播中,它首先应用SRU模块,然后应用CRU模块,以改善特征表示并重新组织通道。
|
||||||
|
这些自定义模块可以用于构建更复杂的神经网络,以满足特定的任务和需求。模块中的操作和机制可以帮助提高神经网络的性能和泛化能力。
|
||||||
|
"""
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
|
||||||
|
class GroupBatchnorm2d(nn.Module):
|
||||||
|
def __init__(self, c_num: int,
|
||||||
|
group_num: int = 16,
|
||||||
|
eps: float = 1e-10
|
||||||
|
):
|
||||||
|
super(GroupBatchnorm2d, self).__init__()
|
||||||
|
assert c_num >= group_num
|
||||||
|
self.group_num = group_num
|
||||||
|
self.weight = nn.Parameter(torch.randn(c_num, 1, 1))
|
||||||
|
self.bias = nn.Parameter(torch.zeros(c_num, 1, 1))
|
||||||
|
self.eps = eps
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
N, C, H, W = x.size()
|
||||||
|
x = x.view(N, self.group_num, -1)
|
||||||
|
mean = x.mean(dim=2, keepdim=True)
|
||||||
|
std = x.std(dim=2, keepdim=True)
|
||||||
|
x = (x - mean) / (std + self.eps)
|
||||||
|
x = x.view(N, C, H, W)
|
||||||
|
return x * self.weight + self.bias
|
||||||
|
|
||||||
|
|
||||||
|
class SRU(nn.Module):
|
||||||
|
def __init__(self,
|
||||||
|
oup_channels: int,
|
||||||
|
group_num: int = 16,
|
||||||
|
gate_treshold: float = 0.5,
|
||||||
|
torch_gn: bool = False
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.gn = nn.GroupNorm(num_channels=oup_channels, num_groups=group_num) if torch_gn else GroupBatchnorm2d(
|
||||||
|
c_num=oup_channels, group_num=group_num)
|
||||||
|
self.gate_treshold = gate_treshold
|
||||||
|
self.sigomid = nn.Sigmoid()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
gn_x = self.gn(x)
|
||||||
|
w_gamma = self.gn.weight / torch.sum(self.gn.weight)
|
||||||
|
w_gamma = w_gamma.view(1, -1, 1, 1)
|
||||||
|
reweigts = self.sigomid(gn_x * w_gamma)
|
||||||
|
# Gate
|
||||||
|
info_mask = reweigts >= self.gate_treshold
|
||||||
|
noninfo_mask = reweigts < self.gate_treshold
|
||||||
|
x_1 = info_mask * gn_x
|
||||||
|
x_2 = noninfo_mask * gn_x
|
||||||
|
x = self.reconstruct(x_1, x_2)
|
||||||
|
return x
|
||||||
|
|
||||||
|
def reconstruct(self, x_1, x_2):
|
||||||
|
x_11, x_12 = torch.split(x_1, x_1.size(1) // 2, dim=1)
|
||||||
|
x_21, x_22 = torch.split(x_2, x_2.size(1) // 2, dim=1)
|
||||||
|
return torch.cat([x_11 + x_22, x_12 + x_21], dim=1)
|
||||||
|
|
||||||
|
|
||||||
|
class CRU(nn.Module):
|
||||||
|
'''
|
||||||
|
alpha: 0<alpha<1
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
op_channel: int,
|
||||||
|
alpha: float = 1 / 2,
|
||||||
|
squeeze_radio: int = 2,
|
||||||
|
group_size: int = 2,
|
||||||
|
group_kernel_size: int = 3,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.up_channel = up_channel = int(alpha * op_channel)
|
||||||
|
self.low_channel = low_channel = op_channel - up_channel
|
||||||
|
self.squeeze1 = nn.Conv2d(up_channel, up_channel // squeeze_radio, kernel_size=1, bias=False)
|
||||||
|
self.squeeze2 = nn.Conv2d(low_channel, low_channel // squeeze_radio, kernel_size=1, bias=False)
|
||||||
|
# up
|
||||||
|
self.GWC = nn.Conv2d(up_channel // squeeze_radio, op_channel, kernel_size=group_kernel_size, stride=1,
|
||||||
|
padding=group_kernel_size // 2, groups=group_size)
|
||||||
|
self.PWC1 = nn.Conv2d(up_channel // squeeze_radio, op_channel, kernel_size=1, bias=False)
|
||||||
|
# low
|
||||||
|
self.PWC2 = nn.Conv2d(low_channel // squeeze_radio, op_channel - low_channel // squeeze_radio, kernel_size=1,
|
||||||
|
bias=False)
|
||||||
|
self.advavg = nn.AdaptiveAvgPool2d(1)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# Split
|
||||||
|
up, low = torch.split(x, [self.up_channel, self.low_channel], dim=1)
|
||||||
|
up, low = self.squeeze1(up), self.squeeze2(low)
|
||||||
|
# Transform
|
||||||
|
Y1 = self.GWC(up) + self.PWC1(up)
|
||||||
|
Y2 = torch.cat([self.PWC2(low), low], dim=1)
|
||||||
|
# Fuse
|
||||||
|
out = torch.cat([Y1, Y2], dim=1)
|
||||||
|
out = F.softmax(self.advavg(out), dim=1) * out
|
||||||
|
out1, out2 = torch.split(out, out.size(1) // 2, dim=1)
|
||||||
|
return out1 + out2
|
||||||
|
|
||||||
|
|
||||||
|
class ScConv(nn.Module):
|
||||||
|
def __init__(self,
|
||||||
|
op_channel: int,
|
||||||
|
group_num: int = 4,
|
||||||
|
gate_treshold: float = 0.5,
|
||||||
|
alpha: float = 1 / 2,
|
||||||
|
squeeze_radio: int = 2,
|
||||||
|
group_size: int = 2,
|
||||||
|
group_kernel_size: int = 3,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.SRU = SRU(op_channel,
|
||||||
|
group_num=group_num,
|
||||||
|
gate_treshold=gate_treshold)
|
||||||
|
self.CRU = CRU(op_channel,
|
||||||
|
alpha=alpha,
|
||||||
|
squeeze_radio=squeeze_radio,
|
||||||
|
group_size=group_size,
|
||||||
|
group_kernel_size=group_kernel_size)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.SRU(x)
|
||||||
|
x = self.CRU(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
# 输入 N C H W, 输出 N C H W
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# x = torch.randn(1, 32, 16, 16)
|
||||||
|
x = torch.randn(1, 128, 256, 256)
|
||||||
|
model = ScConv(128)
|
||||||
|
x = model(x)
|
||||||
|
# x = torch.unsqueeze(x[:, 0], 1)
|
||||||
|
# print(type(x))
|
||||||
|
print(x.shape)
|
Binary file not shown.
|
@ -0,0 +1,57 @@
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from torch.nn import init
|
||||||
|
|
||||||
|
|
||||||
|
class SpatialGroupEnhance(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, groups):
|
||||||
|
super().__init__()
|
||||||
|
self.groups = groups
|
||||||
|
self.avg_pool = nn.AdaptiveAvgPool2d(1)
|
||||||
|
self.weight = nn.Parameter(torch.zeros(1, groups, 1, 1))
|
||||||
|
self.bias = nn.Parameter(torch.zeros(1, groups, 1, 1))
|
||||||
|
self.sig = nn.Sigmoid()
|
||||||
|
self.init_weights()
|
||||||
|
|
||||||
|
def init_weights(self):
|
||||||
|
for m in self.modules():
|
||||||
|
if isinstance(m, nn.Conv2d):
|
||||||
|
init.kaiming_normal_(m.weight, mode='fan_out')
|
||||||
|
if m.bias is not None:
|
||||||
|
init.constant_(m.bias, 0)
|
||||||
|
elif isinstance(m, nn.BatchNorm2d):
|
||||||
|
init.constant_(m.weight, 1)
|
||||||
|
init.constant_(m.bias, 0)
|
||||||
|
elif isinstance(m, nn.Linear):
|
||||||
|
init.normal_(m.weight, std=0.001)
|
||||||
|
if m.bias is not None:
|
||||||
|
init.constant_(m.bias, 0)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
b, c, h, w = x.shape
|
||||||
|
x = x.view(b * self.groups, -1, h, w) # bs*g,dim//g,h,w
|
||||||
|
xn = x * self.avg_pool(x) # bs*g,dim//g,h,w
|
||||||
|
xn = xn.sum(dim=1, keepdim=True) # bs*g,1,h,w
|
||||||
|
t = xn.view(b * self.groups, -1) # bs*g,h*w
|
||||||
|
|
||||||
|
t = t - t.mean(dim=1, keepdim=True) # bs*g,h*w
|
||||||
|
std = t.std(dim=1, keepdim=True) + 1e-5
|
||||||
|
t = t / std # bs*g,h*w
|
||||||
|
t = t.view(b, self.groups, h, w) # bs,g,h*w
|
||||||
|
|
||||||
|
t = t * self.weight + self.bias # bs,g,h*w
|
||||||
|
t = t.view(b * self.groups, 1, h, w) # bs*g,1,h*w
|
||||||
|
x = x * self.sig(t)
|
||||||
|
x = x.view(b, c, h, w)
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
# 输入 N C H W, 输出 N C H W
|
||||||
|
if __name__ == '__main__':
|
||||||
|
input = torch.randn(50, 512, 7, 7)
|
||||||
|
sge = SpatialGroupEnhance(groups=4)
|
||||||
|
output = sge(input)
|
||||||
|
print(output.shape)
|
Binary file not shown.
|
@ -0,0 +1,97 @@
|
||||||
|
# https://github.com/mindspore-courses/External-Attention-MindSpore/blob/main/model/attention/TripletAttention.py
|
||||||
|
|
||||||
|
"""
|
||||||
|
以下是这些模块的主要特点和作用:
|
||||||
|
|
||||||
|
BasicConv 模块:
|
||||||
|
|
||||||
|
这是一个基本的卷积模块,用于进行卷积操作,包括卷积、批归一化(可选)、ReLU 激活函数(可选)。
|
||||||
|
可以通过参数来控制是否使用批归一化和ReLU激活函数。
|
||||||
|
ZPool 模块:
|
||||||
|
|
||||||
|
这是一个自定义的池化操作,将输入的特征图进行最大池化和平均池化,然后将它们拼接在一起。
|
||||||
|
AttentionGate 模块:
|
||||||
|
|
||||||
|
这个模块实现了一个注意力门控机制,用于学习特征图的注意力权重。
|
||||||
|
首先通过 ZPool 操作将输入的特征图进行池化。
|
||||||
|
然后应用一个卷积层,该卷积层输出一个注意力权重,通过 Sigmoid 激活函数将其归一化。
|
||||||
|
最后,将输入特征图与注意力权重相乘,以得到加权的特征图。
|
||||||
|
TripletAttention 模块:
|
||||||
|
|
||||||
|
这个模块实现了一种三重注意力机制,用于学习特征图的全局和局部信息。
|
||||||
|
该模块包括三个 AttentionGate 模块,分别用于通道维度(c)、高度维度(h)和宽度维度(w)的注意力权重学习。
|
||||||
|
可以通过参数 no_spatial 来控制是否忽略空间维度。
|
||||||
|
最终,将三个注意力权重加权平均,以得到最终的特征图。
|
||||||
|
"""
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
|
||||||
|
class BasicConv(nn.Module):
|
||||||
|
def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True,
|
||||||
|
bn=True, bias=False):
|
||||||
|
super(BasicConv, self).__init__()
|
||||||
|
self.out_channels = out_planes
|
||||||
|
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding,
|
||||||
|
dilation=dilation, groups=groups, bias=bias)
|
||||||
|
self.bn = nn.BatchNorm2d(out_planes, eps=1e-5, momentum=0.01, affine=True) if bn else None
|
||||||
|
self.relu = nn.ReLU() if relu else None
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.conv(x)
|
||||||
|
if self.bn is not None:
|
||||||
|
x = self.bn(x)
|
||||||
|
if self.relu is not None:
|
||||||
|
x = self.relu(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class ZPool(nn.Module):
|
||||||
|
def forward(self, x):
|
||||||
|
return torch.cat((torch.max(x, 1)[0].unsqueeze(1), torch.mean(x, 1).unsqueeze(1)), dim=1)
|
||||||
|
|
||||||
|
|
||||||
|
class AttentionGate(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(AttentionGate, self).__init__()
|
||||||
|
kernel_size = 7
|
||||||
|
self.compress = ZPool()
|
||||||
|
self.conv = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size - 1) // 2, relu=False)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x_compress = self.compress(x)
|
||||||
|
x_out = self.conv(x_compress)
|
||||||
|
scale = torch.sigmoid_(x_out)
|
||||||
|
return x * scale
|
||||||
|
|
||||||
|
|
||||||
|
class TripletAttention(nn.Module):
|
||||||
|
def __init__(self, no_spatial=False):
|
||||||
|
super(TripletAttention, self).__init__()
|
||||||
|
self.cw = AttentionGate()
|
||||||
|
self.hc = AttentionGate()
|
||||||
|
self.no_spatial = no_spatial
|
||||||
|
if not no_spatial:
|
||||||
|
self.hw = AttentionGate()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x_perm1 = x.permute(0, 2, 1, 3).contiguous()
|
||||||
|
x_out1 = self.cw(x_perm1)
|
||||||
|
x_out11 = x_out1.permute(0, 2, 1, 3).contiguous()
|
||||||
|
x_perm2 = x.permute(0, 3, 2, 1).contiguous()
|
||||||
|
x_out2 = self.hc(x_perm2)
|
||||||
|
x_out21 = x_out2.permute(0, 3, 2, 1).contiguous()
|
||||||
|
if not self.no_spatial:
|
||||||
|
x_out = self.hw(x)
|
||||||
|
x_out = 1 / 3 * (x_out + x_out11 + x_out21)
|
||||||
|
else:
|
||||||
|
x_out = 1 / 2 * (x_out11 + x_out21)
|
||||||
|
return x_out
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
input = torch.randn(50, 512, 7, 7)
|
||||||
|
triplet = TripletAttention()
|
||||||
|
output = triplet(input)
|
||||||
|
print(output.shape)
|
|
@ -0,0 +1,97 @@
|
||||||
|
# https://github.com/mindspore-courses/External-Attention-MindSpore/blob/main/model/attention/UFOAttention.py
|
||||||
|
|
||||||
|
"""
|
||||||
|
以下是这个模块的主要特点和作用:
|
||||||
|
|
||||||
|
多头自注意力:这个模块使用了多头自注意力机制,通过将输入进行不同线性变换,分为多个头来计算注意力。h 参数表示注意力头的数量。
|
||||||
|
|
||||||
|
线性变换:模块中的线性层(fc_q、fc_k、fc_v 和 fc_o)用于将输入进行线性变换,以生成查询(Q)、键(K)和值(V)的向量。
|
||||||
|
|
||||||
|
权重初始化:模块中的线性层的权重被初始化,以确保良好的训练收敛性。这些初始化方法包括卷积层的 He 初始化和线性层的正态分布初始化。
|
||||||
|
|
||||||
|
注意力计算:通过计算 Q 和 K 的点积,然后应用归一化函数,得到注意力矩阵。在这个模块中,注意力矩阵经过了一些自定义的归一化(XNorm 函数)。
|
||||||
|
|
||||||
|
多头特征整合:多个注意力头的输出被整合在一起,然后通过线性层进行进一步的处理,以生成最终的输出。
|
||||||
|
|
||||||
|
Dropout 正则化:模块中使用了 Dropout 操作,以减少过拟合的风险。
|
||||||
|
|
||||||
|
参数化的缩放因子:模块中包括一个可学习的缩放因子 gamma,用于调整注意力计算的缩放。
|
||||||
|
|
||||||
|
总的来说,UFOAttention模块提供了一种用于神经网络中的自注意力机制,它可以根据输入数据生成不同的查询、键和值,并计算注意力矩阵,然后整合多个头的输出以生成最终的特征表示。这种模块通常用于处理序列数据,如自然语言处理中的 Transformer 模型中的注意力层。
|
||||||
|
"""
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from torch.functional import norm
|
||||||
|
from torch.nn import init
|
||||||
|
|
||||||
|
|
||||||
|
def XNorm(x, gamma):
|
||||||
|
norm_tensor = torch.norm(x, 2, -1, True)
|
||||||
|
return x * gamma / norm_tensor
|
||||||
|
|
||||||
|
|
||||||
|
class UFOAttention(nn.Module):
|
||||||
|
'''
|
||||||
|
Scaled dot-product attention
|
||||||
|
'''
|
||||||
|
|
||||||
|
def __init__(self, d_model, d_k, d_v, h, dropout=.1):
|
||||||
|
'''
|
||||||
|
:param d_model: Output dimensionality of the model
|
||||||
|
:param d_k: Dimensionality of queries and keys
|
||||||
|
:param d_v: Dimensionality of values
|
||||||
|
:param h: Number of heads
|
||||||
|
'''
|
||||||
|
super(UFOAttention, self).__init__()
|
||||||
|
self.fc_q = nn.Linear(d_model, h * d_k)
|
||||||
|
self.fc_k = nn.Linear(d_model, h * d_k)
|
||||||
|
self.fc_v = nn.Linear(d_model, h * d_v)
|
||||||
|
self.fc_o = nn.Linear(h * d_v, d_model)
|
||||||
|
self.dropout = nn.Dropout(dropout)
|
||||||
|
self.gamma = nn.Parameter(torch.randn((1, h, 1, 1)))
|
||||||
|
|
||||||
|
self.d_model = d_model
|
||||||
|
self.d_k = d_k
|
||||||
|
self.d_v = d_v
|
||||||
|
self.h = h
|
||||||
|
|
||||||
|
self.init_weights()
|
||||||
|
|
||||||
|
def init_weights(self):
|
||||||
|
for m in self.modules():
|
||||||
|
if isinstance(m, nn.Conv2d):
|
||||||
|
init.kaiming_normal_(m.weight, mode='fan_out')
|
||||||
|
if m.bias is not None:
|
||||||
|
init.constant_(m.bias, 0)
|
||||||
|
elif isinstance(m, nn.BatchNorm2d):
|
||||||
|
init.constant_(m.weight, 1)
|
||||||
|
init.constant_(m.bias, 0)
|
||||||
|
elif isinstance(m, nn.Linear):
|
||||||
|
init.normal_(m.weight, std=0.001)
|
||||||
|
if m.bias is not None:
|
||||||
|
init.constant_(m.bias, 0)
|
||||||
|
|
||||||
|
def forward(self, queries, keys, values):
|
||||||
|
b_s, nq = queries.shape[:2]
|
||||||
|
nk = keys.shape[1]
|
||||||
|
|
||||||
|
q = self.fc_q(queries).view(b_s, nq, self.h, self.d_k).permute(0, 2, 1, 3) # (b_s, h, nq, d_k)
|
||||||
|
k = self.fc_k(keys).view(b_s, nk, self.h, self.d_k).permute(0, 2, 3, 1) # (b_s, h, d_k, nk)
|
||||||
|
v = self.fc_v(values).view(b_s, nk, self.h, self.d_v).permute(0, 2, 1, 3) # (b_s, h, nk, d_v)
|
||||||
|
|
||||||
|
kv = torch.matmul(k, v) # bs,h,c,c
|
||||||
|
kv_norm = XNorm(kv, self.gamma) # bs,h,c,c
|
||||||
|
q_norm = XNorm(q, self.gamma) # bs,h,n,c
|
||||||
|
out = torch.matmul(q_norm, kv_norm).permute(0, 2, 1, 3).contiguous().view(b_s, nq, self.h * self.d_v)
|
||||||
|
out = self.fc_o(out) # (b_s, nq, d_model)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
block = UFOAttention(d_model=512, d_k=512, d_v=512, h=8).cuda()
|
||||||
|
input = torch.rand(64, 64, 512).cuda()
|
||||||
|
output = block(input, input, input)
|
||||||
|
print(input.size(), output.size())
|
Binary file not shown.
|
@ -0,0 +1,98 @@
|
||||||
|
# https://github.com/Andrew-Qibin/VisionPermutator
|
||||||
|
|
||||||
|
"""
|
||||||
|
MLP (Multi-Layer Perceptron) 模块:
|
||||||
|
|
||||||
|
MLP 是一个多层感知器(MLP)模块,用于将输入数据进行线性变换和激活函数操作,以学习和提取特征。
|
||||||
|
|
||||||
|
构造函数 (__init__) 接受以下参数:
|
||||||
|
|
||||||
|
in_features:输入特征的维度。
|
||||||
|
hidden_features:中间隐藏层的特征维度。
|
||||||
|
out_features:输出层的特征维度。
|
||||||
|
act_layer:激活函数,默认为 GELU。
|
||||||
|
drop:Dropout 概率,默认为 0.1。
|
||||||
|
MLP 模块包括两个线性层(fc1 和 fc2),一个激活函数(act_layer)和一个 Dropout 层(drop)。
|
||||||
|
|
||||||
|
forward 方法接受输入 x,首先将输入经过第一个线性层和激活函数,然后应用 Dropout,最后通过第二个线性层得到输出。
|
||||||
|
|
||||||
|
WeightedPermuteMLP 模块:
|
||||||
|
|
||||||
|
WeightedPermuteMLP 是一个自注意力模块,它用于对输入张量进行特征变换和加权重组。
|
||||||
|
|
||||||
|
构造函数 (__init__) 接受以下参数:
|
||||||
|
|
||||||
|
dim:输入特征的维度。
|
||||||
|
seg_dim:分段维度,默认为 8。
|
||||||
|
qkv_bias:Q、K 和 V 投影是否包括偏差,默认为 False。
|
||||||
|
proj_drop:投影层后的 Dropout 概率,默认为 0。
|
||||||
|
WeightedPermuteMLP 模块首先将输入张量通过三个线性层(mlp_c、mlp_h 和 mlp_w)进行特征变换,分别用于通道、高度和宽度方向。
|
||||||
|
|
||||||
|
输入张量被分成多个段,并在通道维度上进行重组,然后经过线性层进行特征变换。
|
||||||
|
|
||||||
|
每个变换后的段都会计算一个权重,然后通过加权平均的方式将这些段组合在一起,以获得最终的输出。
|
||||||
|
|
||||||
|
最终输出通过投影层和 Dropout 进行后处理。
|
||||||
|
|
||||||
|
这两个模块通常用于神经网络的不同部分,用于特征提取和建模。MLP 主要用于局部特征的提取,而 WeightedPermuteMLP 主要用于加权重组特征以增强全局特征表示。
|
||||||
|
"""
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
|
||||||
|
|
||||||
|
class MLP(nn.Module):
|
||||||
|
def __init__(self,in_features,hidden_features,out_features,act_layer=nn.GELU,drop=0.1):
|
||||||
|
super().__init__()
|
||||||
|
self.fc1=nn.Linear(in_features,hidden_features)
|
||||||
|
self.act=act_layer()
|
||||||
|
self.fc2=nn.Linear(hidden_features,out_features)
|
||||||
|
self.drop=nn.Dropout(drop)
|
||||||
|
|
||||||
|
def forward(self, x) :
|
||||||
|
return self.drop(self.fc2(self.drop(self.act(self.fc1(x)))))
|
||||||
|
|
||||||
|
class WeightedPermuteMLP(nn.Module):
|
||||||
|
def __init__(self,dim,seg_dim=8, qkv_bias=False, proj_drop=0.):
|
||||||
|
super().__init__()
|
||||||
|
self.seg_dim=seg_dim
|
||||||
|
|
||||||
|
self.mlp_c=nn.Linear(dim,dim,bias=qkv_bias)
|
||||||
|
self.mlp_h=nn.Linear(dim,dim,bias=qkv_bias)
|
||||||
|
self.mlp_w=nn.Linear(dim,dim,bias=qkv_bias)
|
||||||
|
|
||||||
|
self.reweighting=MLP(dim,dim//4,dim*3)
|
||||||
|
|
||||||
|
self.proj=nn.Linear(dim,dim)
|
||||||
|
self.proj_drop=nn.Dropout(proj_drop)
|
||||||
|
|
||||||
|
def forward(self,x) :
|
||||||
|
B,H,W,C=x.shape
|
||||||
|
|
||||||
|
c_embed=self.mlp_c(x)
|
||||||
|
|
||||||
|
S=C//self.seg_dim
|
||||||
|
h_embed=x.reshape(B,H,W,self.seg_dim,S).permute(0,3,2,1,4).reshape(B,self.seg_dim,W,H*S)
|
||||||
|
h_embed=self.mlp_h(h_embed).reshape(B,self.seg_dim,W,H,S).permute(0,3,2,1,4).reshape(B,H,W,C)
|
||||||
|
|
||||||
|
w_embed=x.reshape(B,H,W,self.seg_dim,S).permute(0,3,1,2,4).reshape(B,self.seg_dim,H,W*S)
|
||||||
|
w_embed=self.mlp_w(w_embed).reshape(B,self.seg_dim,H,W,S).permute(0,2,3,1,4).reshape(B,H,W,C)
|
||||||
|
|
||||||
|
weight=(c_embed+h_embed+w_embed).permute(0,3,1,2).flatten(2).mean(2)
|
||||||
|
weight=self.reweighting(weight).reshape(B,C,3).permute(2,0,1).softmax(0).unsqueeze(2).unsqueeze(2)
|
||||||
|
|
||||||
|
x=c_embed*weight[0]+w_embed*weight[1]+h_embed*weight[2]
|
||||||
|
|
||||||
|
x=self.proj_drop(self.proj(x))
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
input=torch.randn(64,8,8,512)
|
||||||
|
seg_dim=8
|
||||||
|
vip=WeightedPermuteMLP(512,seg_dim)
|
||||||
|
out=vip(input)
|
||||||
|
print(out.shape)
|
||||||
|
|
|
@ -0,0 +1,131 @@
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from torch.nn import init
|
||||||
|
|
||||||
|
|
||||||
|
from einops.einops import rearrange
|
||||||
|
|
||||||
|
def to_3d(x):
|
||||||
|
return rearrange(x, 'b c h w -> b (h w) c')
|
||||||
|
|
||||||
|
def to_4d(x, h, w):
|
||||||
|
return rearrange(x, 'b (h w) c -> b c h w', h=h, w=w)
|
||||||
|
|
||||||
|
|
||||||
|
class Partial_conv3(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, dim, n_div, forward):
|
||||||
|
super().__init__()
|
||||||
|
self.dim_conv3 = dim // n_div
|
||||||
|
self.dim_untouched = dim - self.dim_conv3
|
||||||
|
self.partial_conv3 = nn.Conv2d(self.dim_conv3, self.dim_conv3, 3, 1, 1, bias=False)
|
||||||
|
|
||||||
|
if forward == 'slicing':
|
||||||
|
self.forward = self.forward_slicing
|
||||||
|
elif forward == 'split_cat':
|
||||||
|
self.forward = self.forward_split_cat
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def forward_slicing(self, x):
|
||||||
|
# only for inference
|
||||||
|
x = x.clone() # !!! Keep the original input intact for the residual connection later
|
||||||
|
x[:, :self.dim_conv3, :, :] = self.partial_conv3(x[:, :self.dim_conv3, :, :])
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
def forward_split_cat(self, x):
|
||||||
|
x = to_4d(x, 28, 28)
|
||||||
|
# for training/inference
|
||||||
|
x1, x2 = torch.split(x, [self.dim_conv3, self.dim_untouched], dim=1)
|
||||||
|
x1 = self.partial_conv3(x1)
|
||||||
|
x = torch.cat((x1, x2), 1)
|
||||||
|
|
||||||
|
x = to_3d(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class ExternalAttention(nn.Module):
|
||||||
|
def __init__(self, d_model, S=64):
|
||||||
|
super().__init__()
|
||||||
|
self.mk = nn.Linear(d_model, S, bias=False)
|
||||||
|
self.mv = nn.Linear(S, d_model, bias=False)
|
||||||
|
self.softmax = nn.Softmax(dim=1)
|
||||||
|
self.init_weights()
|
||||||
|
|
||||||
|
|
||||||
|
self.pa = Partial_conv3(128, 2, 'split_cat')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def init_weights(self):
|
||||||
|
for m in self.modules():
|
||||||
|
if isinstance(m, nn.Conv2d):
|
||||||
|
init.kaiming_normal_(m.weight, mode='fan_out')
|
||||||
|
if m.bias is not None:
|
||||||
|
init.constant_(m.bias, 0)
|
||||||
|
elif isinstance(m, nn.BatchNorm2d):
|
||||||
|
init.constant_(m.weight, 1)
|
||||||
|
init.constant_(m.bias, 0)
|
||||||
|
elif isinstance(m, nn.Linear):
|
||||||
|
init.normal_(m.weight, std=0.001)
|
||||||
|
if m.bias is not None:
|
||||||
|
init.constant_(m.bias, 0)
|
||||||
|
|
||||||
|
def forward(self, queries): # torch.Size([32, 784, 128])
|
||||||
|
|
||||||
|
queries = self.pa(queries)
|
||||||
|
|
||||||
|
attn = self.mk(queries) # torch.Size([32, 784, 8])
|
||||||
|
|
||||||
|
attn = self.softmax(attn) # torch.Size([32, 784, 8])
|
||||||
|
attn = attn / torch.sum(attn, dim=2, keepdim=True) # torch.Size([32, 784, 8])
|
||||||
|
out = self.mv(attn) # torch.Size([32, 784, 128])
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
# class Paex(nn.Module): # 串联
|
||||||
|
# def __init__(self):
|
||||||
|
# super(Paex, self).__init__()
|
||||||
|
# self.pa = Partial_conv3(128, 2, 'split_cat')
|
||||||
|
# self.ex = ExternalAttention(d_model=128, S=8)
|
||||||
|
#
|
||||||
|
# def forward(self, x):
|
||||||
|
# x1 = self.pa(x)
|
||||||
|
# x2 = self.ex(x1)
|
||||||
|
# return x2
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
# class Paex(nn.Module): # 并联
|
||||||
|
# def __init__(self):
|
||||||
|
# super(Paex, self).__init__()
|
||||||
|
# self.pa = Partial_conv3(128, 2, 'split_cat')
|
||||||
|
# self.ex = ExternalAttention(d_model=128, S=8)
|
||||||
|
#
|
||||||
|
# def forward(self, x):
|
||||||
|
# x1 = self.pa(x)
|
||||||
|
# x2 = self.ex(x)
|
||||||
|
# x3 = x1 + x2
|
||||||
|
# return x3
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# 输入 B C N, 输出 B C N
|
||||||
|
# if __name__ == '__main__':
|
||||||
|
# block = Paex()
|
||||||
|
# input = torch.rand(32, 784, 128)
|
||||||
|
# output = block(input)
|
||||||
|
# print(input.size())
|
||||||
|
# print(output.size())
|
||||||
|
|
||||||
|
# 输入 B C N, 输出 B C N
|
||||||
|
if __name__ == '__main__':
|
||||||
|
block = ExternalAttention(d_model=128, S=8)
|
||||||
|
input = torch.rand(32, 784, 128)
|
||||||
|
output = block(input)
|
||||||
|
print(input.size())
|
||||||
|
print(output.size())
|
||||||
|
|
Binary file not shown.
|
@ -0,0 +1,157 @@
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
import math
|
||||||
|
|
||||||
|
|
||||||
|
from einops.einops import rearrange
|
||||||
|
|
||||||
|
def to_3d(x):
|
||||||
|
return rearrange(x, 'b c h w -> b (h w) c')
|
||||||
|
|
||||||
|
def to_4d(x, h, w):
|
||||||
|
return rearrange(x, 'b (h w) c -> b c h w', h=h, w=w)
|
||||||
|
|
||||||
|
|
||||||
|
class AKConv(nn.Module):
|
||||||
|
def __init__(self, inc, outc, num_param, stride=1, bias=None):
|
||||||
|
super(AKConv, self).__init__()
|
||||||
|
self.num_param = num_param
|
||||||
|
self.stride = stride
|
||||||
|
self.conv = nn.Sequential(nn.Conv2d(inc, outc, kernel_size=(num_param, 1), stride=(num_param, 1), bias=bias)
|
||||||
|
,nn.BatchNorm2d(outc)
|
||||||
|
,nn.SiLU()) # the conv adds the BN and SiLU to compare original Conv in YOLOv5.
|
||||||
|
self.p_conv = nn.Conv2d(inc, 2 * num_param, kernel_size=3, padding=1, stride=stride)
|
||||||
|
nn.init.constant_(self.p_conv.weight, 0)
|
||||||
|
self.p_conv.register_full_backward_hook(self._set_lr)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _set_lr(module, grad_input, grad_output):
|
||||||
|
grad_input = (grad_input[i] * 0.1 for i in range(len(grad_input)))
|
||||||
|
grad_output = (grad_output[i] * 0.1 for i in range(len(grad_output)))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# N is num_param.
|
||||||
|
offset = self.p_conv(x)
|
||||||
|
dtype = offset.data.type()
|
||||||
|
N = offset.size(1) // 2
|
||||||
|
# (b, 2N, h, w)
|
||||||
|
p = self._get_p(offset, dtype)
|
||||||
|
|
||||||
|
# (b, h, w, 2N)
|
||||||
|
p = p.contiguous().permute(0, 2, 3, 1)
|
||||||
|
q_lt = p.detach().floor()
|
||||||
|
q_rb = q_lt + 1
|
||||||
|
|
||||||
|
q_lt = torch.cat([torch.clamp(q_lt[..., :N], 0, x.size(2) - 1), torch.clamp(q_lt[..., N:], 0, x.size(3) - 1)],
|
||||||
|
dim=-1).long()
|
||||||
|
q_rb = torch.cat([torch.clamp(q_rb[..., :N], 0, x.size(2) - 1), torch.clamp(q_rb[..., N:], 0, x.size(3) - 1)],
|
||||||
|
dim=-1).long()
|
||||||
|
q_lb = torch.cat([q_lt[..., :N], q_rb[..., N:]], dim=-1)
|
||||||
|
q_rt = torch.cat([q_rb[..., :N], q_lt[..., N:]], dim=-1)
|
||||||
|
|
||||||
|
# clip p
|
||||||
|
p = torch.cat([torch.clamp(p[..., :N], 0, x.size(2) - 1), torch.clamp(p[..., N:], 0, x.size(3) - 1)], dim=-1)
|
||||||
|
|
||||||
|
# bilinear kernel (b, h, w, N)
|
||||||
|
g_lt = (1 + (q_lt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_lt[..., N:].type_as(p) - p[..., N:]))
|
||||||
|
g_rb = (1 - (q_rb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_rb[..., N:].type_as(p) - p[..., N:]))
|
||||||
|
g_lb = (1 + (q_lb[..., :N].type_as(p) - p[..., :N])) * (1 - (q_lb[..., N:].type_as(p) - p[..., N:]))
|
||||||
|
g_rt = (1 - (q_rt[..., :N].type_as(p) - p[..., :N])) * (1 + (q_rt[..., N:].type_as(p) - p[..., N:]))
|
||||||
|
|
||||||
|
# resampling the features based on the modified coordinates.
|
||||||
|
x_q_lt = self._get_x_q(x, q_lt, N)
|
||||||
|
x_q_rb = self._get_x_q(x, q_rb, N)
|
||||||
|
x_q_lb = self._get_x_q(x, q_lb, N)
|
||||||
|
x_q_rt = self._get_x_q(x, q_rt, N)
|
||||||
|
|
||||||
|
# bilinear
|
||||||
|
x_offset = g_lt.unsqueeze(dim=1) * x_q_lt + \
|
||||||
|
g_rb.unsqueeze(dim=1) * x_q_rb + \
|
||||||
|
g_lb.unsqueeze(dim=1) * x_q_lb + \
|
||||||
|
g_rt.unsqueeze(dim=1) * x_q_rt
|
||||||
|
|
||||||
|
x_offset = self._reshape_x_offset(x_offset, self.num_param)
|
||||||
|
out = self.conv(x_offset)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
# generating the inital sampled shapes for the AKConv with different sizes.
|
||||||
|
def _get_p_n(self, N, dtype):
|
||||||
|
base_int = round(math.sqrt(self.num_param))
|
||||||
|
row_number = self.num_param // base_int
|
||||||
|
mod_number = self.num_param % base_int
|
||||||
|
p_n_x ,p_n_y = torch.meshgrid(
|
||||||
|
torch.arange(0, row_number),
|
||||||
|
torch.arange(0 ,base_int))
|
||||||
|
p_n_x = torch.flatten(p_n_x)
|
||||||
|
p_n_y = torch.flatten(p_n_y)
|
||||||
|
if mod_number > 0:
|
||||||
|
mod_p_n_x ,mod_p_n_y = torch.meshgrid(
|
||||||
|
torch.arange(row_number ,row_number +1),
|
||||||
|
torch.arange(0 ,mod_number))
|
||||||
|
|
||||||
|
mod_p_n_x = torch.flatten(mod_p_n_x)
|
||||||
|
mod_p_n_y = torch.flatten(mod_p_n_y)
|
||||||
|
p_n_x ,p_n_y = torch.cat((p_n_x ,mod_p_n_x)) ,torch.cat((p_n_y ,mod_p_n_y))
|
||||||
|
p_n = torch.cat([p_n_x ,p_n_y], 0)
|
||||||
|
p_n = p_n.view(1, 2 * N, 1, 1).type(dtype)
|
||||||
|
return p_n
|
||||||
|
|
||||||
|
# no zero-padding
|
||||||
|
def _get_p_0(self, h, w, N, dtype):
|
||||||
|
p_0_x, p_0_y = torch.meshgrid(
|
||||||
|
torch.arange(0, h * self.stride, self.stride),
|
||||||
|
torch.arange(0, w * self.stride, self.stride))
|
||||||
|
|
||||||
|
p_0_x = torch.flatten(p_0_x).view(1, 1, h, w).repeat(1, N, 1, 1)
|
||||||
|
p_0_y = torch.flatten(p_0_y).view(1, 1, h, w).repeat(1, N, 1, 1)
|
||||||
|
p_0 = torch.cat([p_0_x, p_0_y], 1).type(dtype)
|
||||||
|
|
||||||
|
return p_0
|
||||||
|
|
||||||
|
def _get_p(self, offset, dtype):
|
||||||
|
N, h, w = offset.size(1) // 2, offset.size(2), offset.size(3)
|
||||||
|
|
||||||
|
# (1, 2N, 1, 1)
|
||||||
|
p_n = self._get_p_n(N, dtype)
|
||||||
|
# (1, 2N, h, w)
|
||||||
|
p_0 = self._get_p_0(h, w, N, dtype)
|
||||||
|
p = p_0 + p_n + offset
|
||||||
|
return p
|
||||||
|
|
||||||
|
def _get_x_q(self, x, q, N):
|
||||||
|
b, h, w, _ = q.size()
|
||||||
|
padded_w = x.size(3)
|
||||||
|
c = x.size(1)
|
||||||
|
# (b, c, h*w)
|
||||||
|
x = x.contiguous().view(b, c, -1)
|
||||||
|
|
||||||
|
# (b, h, w, N)
|
||||||
|
index = q[..., :N] * padded_w + q[..., N:] # offset_x*w + offset_y
|
||||||
|
# (b, c, h*w*N)
|
||||||
|
index = index.contiguous().unsqueeze(dim=1).expand(-1, c, -1, -1, -1).contiguous().view(b, c, -1)
|
||||||
|
|
||||||
|
x_offset = x.gather(dim=-1, index=index).contiguous().view(b, c, h, w, N)
|
||||||
|
|
||||||
|
return x_offset
|
||||||
|
|
||||||
|
|
||||||
|
# Stacking resampled features in the row direction.
|
||||||
|
@staticmethod
|
||||||
|
def _reshape_x_offset(x_offset, num_param):
|
||||||
|
b, c, h, w, n = x_offset.size()
|
||||||
|
# using Conv3d
|
||||||
|
# x_offset = x_offset.permute(0,1,4,2,3), then Conv3d(c,c_out, kernel_size =(num_param,1,1),stride=(num_param,1,1),bias= False)
|
||||||
|
# using 1 × 1 Conv
|
||||||
|
# x_offset = x_offset.permute(0,1,4,2,3), then, x_offset.view(b,c×num_param,h,w) finally, Conv2d(c×num_param,c_out, kernel_size =1,stride=1,bias= False)
|
||||||
|
# using the column conv as follow, then, Conv2d(inc, outc, kernel_size=(num_param, 1), stride=(num_param, 1), bias=bias)
|
||||||
|
|
||||||
|
x_offset = rearrange(x_offset, 'b c h w n -> b c (h n) w')
|
||||||
|
return x_offset
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
block = AKConv(inc=32,outc=32,num_param=3)
|
||||||
|
input = torch.rand(64,32,15,15)
|
||||||
|
output = block(input)
|
||||||
|
print(input.size())
|
||||||
|
print(output.size())
|
Binary file not shown.
|
@ -0,0 +1,430 @@
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
|
||||||
|
class BatchNorm2D(nn.Module):
|
||||||
|
def __init__(self, num_channels, epsilon=1e-5, momentum=0.9, rescale=True):
|
||||||
|
super(BatchNorm2D, self).__init__()
|
||||||
|
self.num_channels = num_channels
|
||||||
|
self.epsilon = epsilon
|
||||||
|
self.momentum = momentum
|
||||||
|
self.rescale = rescale
|
||||||
|
|
||||||
|
if (self.rescale == True):
|
||||||
|
# define parameters gamma, beta which are learnable
|
||||||
|
# dimension of gamma and beta should be (num_channels) ie its a one dimensional vector
|
||||||
|
# initializing gamma as ones vector and beta as zeros vector (implies no scaling/shifting at the start)
|
||||||
|
self.gamma = nn.Parameter(torch.ones(num_channels))
|
||||||
|
self.beta = nn.Parameter(torch.zeros(num_channels))
|
||||||
|
# define parameters running mean and variance which is not learnable
|
||||||
|
# keep track of mean and variance(but donot learn them), momentum is used which weighs current batch-mean and
|
||||||
|
# variance with the running mean and variance using (momentum*runningmean+(1-momentum)*currentmean)
|
||||||
|
self.register_buffer('runningmean', torch.zeros(num_channels))
|
||||||
|
self.register_buffer('runningvar', torch.ones(num_channels))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
assert x.shape[1] == self.num_channels
|
||||||
|
assert len(x.shape) == 4 # 4 because (batchsize, numchannels, height, width)
|
||||||
|
|
||||||
|
if (self.training):
|
||||||
|
# calculate mean and variance along the dimensions other than the channel dimension
|
||||||
|
# variance calculation is using the biased formula during training
|
||||||
|
variance = torch.var(x, dim=[0, 2, 3], unbiased=False)
|
||||||
|
mean = torch.mean(x, dim=[0, 2, 3])
|
||||||
|
self.runningmean.mul_(self.momentum).add_((1 - self.momentum) * mean.detach())
|
||||||
|
self.runningvar.mul_(self.momentum).add_((1 - self.momentum) * variance.detach())
|
||||||
|
out = (x - mean.view([1, self.num_channels, 1, 1])) / torch.sqrt(
|
||||||
|
variance.view([1, self.num_channels, 1, 1]) + self.epsilon)
|
||||||
|
else:
|
||||||
|
m = x.shape[0] * x.shape[2] * x.shape[3]
|
||||||
|
out = (x - self.runningmean.view([1, self.num_channels, 1, 1])) / torch.sqrt(
|
||||||
|
(m / (m - 1)) * self.runningvar.view([1, self.num_channels, 1, 1]) + self.epsilon)
|
||||||
|
# during testing just use the running mean and (UnBiased) variance
|
||||||
|
if (self.rescale == True):
|
||||||
|
out = self.gamma.view([1, self.num_channels, 1, 1]) * out + self.beta.view([1, self.num_channels, 1, 1])
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class BatchNormm2D(nn.Module):
|
||||||
|
def __init__(self, num_channels, epsilon=1e-5, momentum=0.9, rescale=True):
|
||||||
|
super(BatchNormm2D, self).__init__()
|
||||||
|
self.num_channels = num_channels
|
||||||
|
self.epsilon = epsilon
|
||||||
|
self.momentum = momentum
|
||||||
|
self.rescale = rescale
|
||||||
|
|
||||||
|
if (self.rescale == True):
|
||||||
|
# define parameters gamma, beta which are learnable
|
||||||
|
# dimension of gamma and beta should be (num_channels) ie its a one dimensional vector
|
||||||
|
# initializing gamma as ones vector and beta as zeros vector (implies no scaling/shifting at the start)
|
||||||
|
self.gamma = nn.Parameter(torch.ones(num_channels))
|
||||||
|
self.beta = nn.Parameter(torch.zeros(num_channels))
|
||||||
|
# define parameters running mean and variance which is not learnable
|
||||||
|
# keep track of mean and variance(but donot learn them), momentum is used which weighs current batch-mean and
|
||||||
|
# variance with the running mean and variance using (momentum*runningmean+(1-momentum)*currentmean)
|
||||||
|
self.register_buffer('runningmean', torch.zeros(num_channels))
|
||||||
|
self.register_buffer('runningvar', torch.ones(num_channels))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
assert x.shape[1] == self.num_channels
|
||||||
|
assert len(x.shape) == 4 # 4 because (batchsize, numchannels, height, width)
|
||||||
|
|
||||||
|
if (self.training):
|
||||||
|
# calculate mean and variance along the dimensions other than the channel dimension
|
||||||
|
# variance calculation is using the biased formula during training
|
||||||
|
variance = torch.var(x, dim=[0, 2, 3], unbiased=False)
|
||||||
|
mean = torch.mean(x, dim=[0, 2, 3])
|
||||||
|
self.runningmean = (1 - self.momentum) * mean + (self.momentum) * self.runningmean
|
||||||
|
self.runningvar = (1 - self.momentum) * variance + (self.momentum) * self.runningvar
|
||||||
|
out = (x - mean.view([1, self.num_channels, 1, 1])) / torch.sqrt(
|
||||||
|
variance.view([1, self.num_channels, 1, 1]) + self.epsilon)
|
||||||
|
else:
|
||||||
|
m = x.shape[0] * x.shape[2] * x.shape[3]
|
||||||
|
out = (x - self.runningmean.view([1, self.num_channels, 1, 1])) / torch.sqrt(
|
||||||
|
(m / (m - 1)) * self.runningvar.view([1, self.num_channels, 1, 1]) + self.epsilon)
|
||||||
|
# during testing just use the running mean and (UnBiased) variance
|
||||||
|
if (self.rescale == True):
|
||||||
|
return out
|
||||||
|
|
||||||
|
class BatchNormm2DViiT(nn.Module):
|
||||||
|
def __init__(self, num_channels, epsilon=1e-5, momentum=0.9, rescale=True):
|
||||||
|
super(BatchNormm2DViiT, self).__init__()
|
||||||
|
self.num_channels = num_channels
|
||||||
|
self.epsilon = epsilon
|
||||||
|
self.momentum = momentum
|
||||||
|
self.rescale = rescale
|
||||||
|
|
||||||
|
if (self.rescale == True):
|
||||||
|
# define parameters gamma, beta which are learnable
|
||||||
|
# dimension of gamma and beta should be (num_channels) ie its a one dimensional vector
|
||||||
|
# initializing gamma as ones vector and beta as zeros vector (implies no scaling/shifting at the start)
|
||||||
|
self.gamma = nn.Parameter(torch.ones(num_channels))
|
||||||
|
self.beta = nn.Parameter(torch.zeros(num_channels))
|
||||||
|
# define parameters running mean and variance which is not learnable
|
||||||
|
# keep track of mean and variance(but donot learn them), momentum is used which weighs current batch-mean and
|
||||||
|
# variance with the running mean and variance using (momentum*runningmean+(1-momentum)*currentmean)
|
||||||
|
self.register_buffer('runningmean', torch.zeros(num_channels))
|
||||||
|
self.register_buffer('runningvar', torch.ones(num_channels))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
|
||||||
|
if (self.training):
|
||||||
|
# calculate mean and variance along the dimensions other than the channel dimension
|
||||||
|
# variance calculation is using the biased formula during training
|
||||||
|
mean = x.mean(-1, keepdim=True) # mean: [bsz, max_len, 1]
|
||||||
|
std = x.std(-1, keepdim=True) # std: [bsz, max_len, 1]
|
||||||
|
self.runningmean = (1 - self.momentum) * mean + (self.momentum) * self.runningmean
|
||||||
|
self.runningvar = (1 - self.momentum) * std + (self.momentum) * self.runningvar
|
||||||
|
out=(x - mean) / (std + self.epsilon)
|
||||||
|
else:
|
||||||
|
m = x.shape[0] * x.shape[2] * x.shape[3]
|
||||||
|
out = (x - self.runningmean) / torch.sqrt(
|
||||||
|
(m / (m - 1))* self.runningvar + self.epsilon)
|
||||||
|
# during testing just use the running mean and (UnBiased) variance
|
||||||
|
if (self.rescale == True):
|
||||||
|
out = self.a_2 * out + self.b_2
|
||||||
|
return out
|
||||||
|
|
||||||
|
class BatchNormm2DViTC(nn.Module):
|
||||||
|
def __init__(self, num_channels, epsilon=1e-5, momentum=0.9, rescale=True):
|
||||||
|
super(BatchNormm2DViTC, self).__init__()
|
||||||
|
self.num_channels = num_channels
|
||||||
|
self.epsilon = epsilon
|
||||||
|
self.momentum = momentum
|
||||||
|
self.rescale = rescale
|
||||||
|
|
||||||
|
if (self.rescale == True):
|
||||||
|
# define parameters gamma, beta which are learnable
|
||||||
|
# dimension of gamma and beta should be (num_channels) ie its a one dimensional vector
|
||||||
|
# initializing gamma as ones vector and beta as zeros vector (implies no scaling/shifting at the start)
|
||||||
|
self.gamma = nn.Parameter(torch.ones(num_channels))
|
||||||
|
self.beta = nn.Parameter(torch.zeros(num_channels))
|
||||||
|
# define parameters running mean and variance which is not learnable
|
||||||
|
# keep track of mean and variance(but donot learn them), momentum is used which weighs current batch-mean and
|
||||||
|
# variance with the running mean and variance using (momentum*runningmean+(1-momentum)*currentmean)
|
||||||
|
self.register_buffer('runningmean', torch.zeros(num_channels))
|
||||||
|
self.register_buffer('runningvar', torch.ones(num_channels))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
if (self.training):
|
||||||
|
# calculate mean and variance along the dimensions other than the channel dimension
|
||||||
|
# variance calculation is using the biased formula during training
|
||||||
|
mean = x.mean(-1, keepdim=True) # mean: [bsz, max_len, 1]
|
||||||
|
std = x.std(-1, keepdim=True) # std: [bsz, max_len, 1]
|
||||||
|
self.runningmean = (1 - self.momentum) * mean + (self.momentum) * self.runningmean
|
||||||
|
self.runningvar = (1 - self.momentum) * std + (self.momentum) * self.runningvar
|
||||||
|
out=(x - mean) / (std + self.epsilon)
|
||||||
|
else:
|
||||||
|
m = x.shape[0] * x.shape[2] * x.shape[3]
|
||||||
|
out = (x - self.runningmean) / torch.sqrt(
|
||||||
|
(m / (m - 1))* self.runningvar + self.epsilon)
|
||||||
|
# during testing just use the running mean and (UnBiased) variance
|
||||||
|
if (self.rescale == True):
|
||||||
|
return out
|
||||||
|
|
||||||
|
class InstanceNorm2D(nn.Module):
|
||||||
|
def __init__(self, num_channels, epsilon=1e-5, momentum=0.9, rescale=True):
|
||||||
|
super(InstanceNorm2D, self).__init__()
|
||||||
|
self.num_channels = num_channels
|
||||||
|
self.epsilon = epsilon
|
||||||
|
self.momentum = momentum
|
||||||
|
self.rescale = rescale
|
||||||
|
|
||||||
|
if (self.rescale == True):
|
||||||
|
# define parameters gamma, beta which are learnable
|
||||||
|
# dimension of gamma and beta should be (num_channels) ie its a one dimensional vector
|
||||||
|
# initializing gamma as ones vector and beta as zeros vector (implies no scaling/shifting at the start)
|
||||||
|
self.gamma = nn.Parameter(torch.ones(num_channels))
|
||||||
|
self.beta = nn.Parameter(torch.zeros(num_channels))
|
||||||
|
|
||||||
|
# running mean and variance should have the same dimension as in batchnorm
|
||||||
|
# ie, a vector of size num_channels because while testing, when we get one
|
||||||
|
# sample at a time, we should be able to use this.
|
||||||
|
self.register_buffer('runningmean', torch.zeros(num_channels))
|
||||||
|
self.register_buffer('runningvar', torch.ones(num_channels))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
assert x.shape[1] == self.num_channels
|
||||||
|
assert len(x.shape) == 4 # 4 because len((batchsize, numchannels, height, width)) = 4
|
||||||
|
|
||||||
|
if (self.training):
|
||||||
|
# calculate mean and variance along the dimensions other than the channel dimension
|
||||||
|
# variance calculation is using the biased formula during training
|
||||||
|
variance, mean = torch.var(x, dim=[2, 3], unbiased=False), torch.mean(x, dim=[2, 3])
|
||||||
|
out = (x - mean.view([-1, self.num_channels, 1, 1])) / torch.sqrt(
|
||||||
|
variance.view([-1, self.num_channels, 1, 1]) + self.epsilon)
|
||||||
|
|
||||||
|
else:
|
||||||
|
variance, mean = torch.var(x, dim=[2, 3], unbiased=False), torch.mean(x, dim=[2, 3])
|
||||||
|
out = (x - mean.view([-1, self.num_channels, 1, 1])) / torch.sqrt(
|
||||||
|
variance.view([-1, self.num_channels, 1, 1]) + self.epsilon)
|
||||||
|
|
||||||
|
if (self.rescale == True):
|
||||||
|
out = self.gamma.view([1, self.num_channels, 1, 1]) * out + self.beta.view([1, self.num_channels, 1, 1])
|
||||||
|
return out
|
||||||
|
|
||||||
|
class LayerNormViT(nn.Module):
|
||||||
|
def __init__(self, features, eps=1e-6):
|
||||||
|
super(LayerNormViT, self).__init__()
|
||||||
|
|
||||||
|
self.a_2 = nn.Parameter(torch.ones(features))
|
||||||
|
self.b_2 = nn.Parameter(torch.zeros(features))
|
||||||
|
self.eps = eps
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
mean = x.mean(-1, keepdim=True) # mean: [bsz, max_len, 1]
|
||||||
|
std = x.std(-1, keepdim=True) # std: [bsz, max_len, 1]
|
||||||
|
return self.a_2 * (x - mean) / (std + self.eps) + self.b_2
|
||||||
|
|
||||||
|
|
||||||
|
class LayerNormViTC(nn.Module):
|
||||||
|
def __init__(self, features, eps=1e-6):
|
||||||
|
super(LayerNormViTC, self).__init__()
|
||||||
|
|
||||||
|
self.a_2 = nn.Parameter(torch.ones(features))
|
||||||
|
self.b_2 = nn.Parameter(torch.zeros(features))
|
||||||
|
self.eps = eps
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
mean = x.mean(-1, keepdim=True) # mean: [bsz, max_len, 1]
|
||||||
|
std = x.std(-1, keepdim=True) # std: [bsz, max_len, 1]
|
||||||
|
return(x - mean) / (std + self.eps)
|
||||||
|
|
||||||
|
|
||||||
|
class LayerNorm2D(nn.Module):
|
||||||
|
def __init__(self, num_channels, epsilon = 1e-5):
|
||||||
|
super(LayerNorm2D, self).__init__()
|
||||||
|
self.num_channels = num_channels
|
||||||
|
self.epsilon = epsilon
|
||||||
|
|
||||||
|
self.gamma = nn.Parameter(torch.ones(num_channels))
|
||||||
|
self.beta = nn.Parameter(torch.zeros(num_channels))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# assert list(x.shape)[1] == self.num_channels
|
||||||
|
# assert len(x.shape) == 4 # 4 because len((batchsize, numchannels, height, width)) = 4
|
||||||
|
|
||||||
|
variance, mean = torch.var(x, dim = [1,2, 3], unbiased=False), torch.mean(x, dim = [1,2, 3])
|
||||||
|
out = (x-mean.view([-1, 1, 1, 1]))/torch.sqrt(variance.view([-1, 1, 1, 1])+self.epsilon)
|
||||||
|
|
||||||
|
out = self.gamma.view([1, self.num_channels, 1, 1]) * out + self.beta.view([1, self.num_channels, 1, 1])
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class LayerNormm2D(nn.Module):
|
||||||
|
def __init__(self, num_channels, epsilon=1e-5):
|
||||||
|
super(LayerNormm2D, self).__init__()
|
||||||
|
self.num_channels = num_channels
|
||||||
|
self.epsilon = epsilon
|
||||||
|
|
||||||
|
self.gamma = nn.Parameter(torch.ones(num_channels))
|
||||||
|
self.beta = nn.Parameter(torch.zeros(num_channels))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
assert list(x.shape)[1] == self.num_channels
|
||||||
|
assert len(x.shape) == 4 # 4 because len((batchsize, numchannels, height, width)) = 4
|
||||||
|
variance, mean = torch.var(x, dim=[1, 2, 3], unbiased=False), torch.mean(x, dim=[1, 2, 3])
|
||||||
|
|
||||||
|
out = (x - mean.view([-1, 1, 1, 1])) / torch.sqrt(variance.view([-1, 1, 1, 1]) + self.epsilon)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class GroupNorm2D(nn.Module):
|
||||||
|
def __init__(self, num_channels, num_groups=4, epsilon=1e-5):
|
||||||
|
super(GroupNorm2D, self).__init__()
|
||||||
|
self.num_channels = num_channels
|
||||||
|
# self.num_groups = num_groups
|
||||||
|
self.num_groups = num_channels // 4
|
||||||
|
self.epsilon = epsilon
|
||||||
|
self.gamma = nn.Parameter(torch.ones(num_channels))
|
||||||
|
self.beta = nn.Parameter(torch.zeros(num_channels))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
assert x.shape[1] == self.num_channels
|
||||||
|
assert len(x.shape) == 4 # 4 because (batchsize, numchannels, height, width)
|
||||||
|
[N, C, H, W] = list(x.shape)
|
||||||
|
|
||||||
|
out = torch.reshape(x, (N, self.num_groups, self.num_channels // self.num_groups, H, W))
|
||||||
|
variance, mean = torch.var(out, dim=[2, 3, 4], unbiased=False, keepdim=True), torch.mean(out, dim=[2, 3, 4],
|
||||||
|
keepdim=True)
|
||||||
|
out = (out - mean) / torch.sqrt(variance + self.epsilon)
|
||||||
|
out = out.view(N, self.num_channels, H, W)
|
||||||
|
out = self.gamma.view([1, self.num_channels, 1, 1]) * out + self.beta.view([1, self.num_channels, 1, 1])
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class BatchNorm_ByoL(nn.Module):
|
||||||
|
def __init__(self, bn, num_channels=2048, epsilon=1e-5, momentum=0.9, rescale=True):
|
||||||
|
super(BatchNorm_ByoL, self).__init__()
|
||||||
|
self.num_channels = num_channels
|
||||||
|
self.gamma = nn.Parameter(torch.ones(num_channels))
|
||||||
|
self.beta = nn.Parameter(torch.zeros(num_channels))
|
||||||
|
self.eps = epsilon
|
||||||
|
self.register_buffer('runningmean', torch.zeros(num_channels))
|
||||||
|
self.register_buffer('runningvar', torch.ones(num_channels))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
std = self.runningvar.add(self.eps).sqrt()
|
||||||
|
return x.sub(self.runningmean).div(std).mul(self.gamma).add(self.beta)
|
||||||
|
|
||||||
|
|
||||||
|
class LaychNorm_ByoL(nn.Module):
|
||||||
|
def __init__(self, bn, num_channels=2048, epsilon=1e-5, momentum=0.9, rescale=True):
|
||||||
|
super(LaychNorm_ByoL, self).__init__()
|
||||||
|
self.num_channels = num_channels
|
||||||
|
self.gamma = nn.Parameter(torch.ones(num_channels))
|
||||||
|
self.beta = nn.Parameter(torch.zeros(num_channels))
|
||||||
|
self.eps = epsilon
|
||||||
|
self.register_buffer('runningmean', torch.zeros(num_channels))
|
||||||
|
self.register_buffer('runningvar', torch.ones(num_channels))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
std = self.runningvar.add(self.eps).sqrt()
|
||||||
|
return x.sub(self.runningmean).div(std).mul(self.gamma).add(self.beta)
|
||||||
|
|
||||||
|
|
||||||
|
class BatchNorm_Byol(nn.Module):
|
||||||
|
def __init__(self, bn, num_channels=2048, epsilon=1e-5, momentum=0.9, rescale=True):
|
||||||
|
super(BatchNorm_Byol, self).__init__()
|
||||||
|
self.num_channels = num_channels
|
||||||
|
self.gamma = nn.Parameter(torch.ones(num_channels))
|
||||||
|
self.beta = nn.Parameter(torch.zeros(num_channels))
|
||||||
|
self.eps = epsilon
|
||||||
|
self.register_buffer('runningmean', torch.zeros(num_channels))
|
||||||
|
self.register_buffer('runningvar', torch.ones(num_channels))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
std = self.runningvar.add(self.eps).sqrt()
|
||||||
|
return x.sub(self.runningmean).div(std)
|
||||||
|
|
||||||
|
|
||||||
|
class LaychNorm_Byol(nn.Module):
|
||||||
|
def __init__(self, bn, num_channels=2048, epsilon=1e-5, momentum=0.9, rescale=True):
|
||||||
|
super(LaychNorm_Byol, self).__init__()
|
||||||
|
self.num_channels = num_channels
|
||||||
|
self.gamma = nn.Parameter(torch.ones(num_channels))
|
||||||
|
self.beta = nn.Parameter(torch.zeros(num_channels))
|
||||||
|
self.eps = epsilon
|
||||||
|
self.register_buffer('runningmean', torch.zeros(num_channels))
|
||||||
|
self.register_buffer('runningvar', torch.ones(num_channels))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
std = self.runningvar.add(self.eps).sqrt()
|
||||||
|
return x.sub(self.runningmean).div(std)
|
||||||
|
|
||||||
|
|
||||||
|
class BatchChannelNorm_Byol(nn.Module):
|
||||||
|
def __init__(self, num_channels, epsilon=1e-5, momentum=0.9):
|
||||||
|
super(BatchChannelNorm_Byol, self).__init__()
|
||||||
|
self.num_channels = num_channels
|
||||||
|
self.epsilon = epsilon
|
||||||
|
self.momentum = momentum
|
||||||
|
self.Batchh = BatchNorm_Byol(self.num_channels, epsilon=self.epsilon)
|
||||||
|
self.layeer = LaychNorm_Byol(self.num_channels, epsilon=self.epsilon)
|
||||||
|
# The BCN variable to be learnt
|
||||||
|
self.BCN_var = nn.Parameter(torch.ones(self.num_channels))
|
||||||
|
# Gamma and Beta for rescaling
|
||||||
|
self.gamma = nn.Parameter(torch.ones(num_channels))
|
||||||
|
self.beta = nn.Parameter(torch.zeros(num_channels))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
X = self.Batchh(x)
|
||||||
|
Y = self.layeer(x)
|
||||||
|
out = self.BCN_var * X + 1 - self.BCN_var * Y
|
||||||
|
out = self.gamma * out + self.beta
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
class BatchChannelNorm(nn.Module):
|
||||||
|
def __init__(self, num_channels, epsilon=1e-5, momentum=0.9):
|
||||||
|
super(BatchChannelNorm, self).__init__()
|
||||||
|
self.num_channels = num_channels
|
||||||
|
self.epsilon = epsilon
|
||||||
|
self.momentum = momentum
|
||||||
|
self.Batchh = BatchNormm2D(self.num_channels, epsilon=self.epsilon)
|
||||||
|
self.layeer = LayerNormm2D(self.num_channels, epsilon=self.epsilon)
|
||||||
|
# The BCN variable to be learnt
|
||||||
|
self.BCN_var = nn.Parameter(torch.ones(self.num_channels))
|
||||||
|
# Gamma and Beta for rescaling
|
||||||
|
self.gamma = nn.Parameter(torch.ones(num_channels))
|
||||||
|
self.beta = nn.Parameter(torch.zeros(num_channels))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
X = self.Batchh(x)
|
||||||
|
Y = self.layeer(x)
|
||||||
|
out = self.BCN_var.view([1, self.num_channels, 1, 1]) * X + (
|
||||||
|
1 - self.BCN_var.view([1, self.num_channels, 1, 1])) * Y
|
||||||
|
out = self.gamma.view([1, self.num_channels, 1, 1]) * out + self.beta.view([1, self.num_channels, 1, 1])
|
||||||
|
return out
|
||||||
|
|
||||||
|
class BatchChannelNormvit(nn.Module):
|
||||||
|
def __init__(self, num_channels, epsilon=1e-5, momentum=0.9):
|
||||||
|
super(BatchChannelNormvit, self).__init__()
|
||||||
|
self.num_channels = num_channels
|
||||||
|
self.epsilon = epsilon
|
||||||
|
self.momentum = momentum
|
||||||
|
self.Batchh = BatchNormm2DViTC(self.num_channels, epsilon=self.epsilon)
|
||||||
|
self.layeer = LayerNormViTC(self.num_channels)
|
||||||
|
# The BCN variable to be learnt
|
||||||
|
self.BCN_var = nn.Parameter(torch.ones(self.num_channels))
|
||||||
|
# Gamma and Beta for rescaling
|
||||||
|
self.gamma = nn.Parameter(torch.ones(num_channels))
|
||||||
|
self.beta = nn.Parameter(torch.zeros(num_channels))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
X = self.Batchh(x)
|
||||||
|
Y = self.layeer(x)
|
||||||
|
out = self.BCN_var * X + (
|
||||||
|
1 - self.BCN_var) * Y
|
||||||
|
out = self.gamma* out + self.beta
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
block = BatchChannelNorm(num_channels=64)
|
||||||
|
input = torch.rand(64, 64, 9, 9)
|
||||||
|
output = block(input)
|
||||||
|
print(input.size())
|
||||||
|
print(output.size())
|
Binary file not shown.
|
@ -0,0 +1,31 @@
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
from pytorch_wavelets import DWTForward
|
||||||
|
|
||||||
|
|
||||||
|
class Down_wt(nn.Module):
|
||||||
|
def __init__(self, in_ch, out_ch):
|
||||||
|
super(Down_wt, self).__init__()
|
||||||
|
self.wt = DWTForward(J=1, mode='zero', wave='haar')
|
||||||
|
self.conv_bn_relu = nn.Sequential(
|
||||||
|
nn.Conv2d(in_ch * 4, out_ch, kernel_size=1, stride=1),
|
||||||
|
nn.BatchNorm2d(out_ch),
|
||||||
|
nn.ReLU(inplace=True),
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
yL, yH = self.wt(x)
|
||||||
|
y_HL = yH[0][:, :, 0, ::]
|
||||||
|
y_LH = yH[0][:, :, 1, ::]
|
||||||
|
y_HH = yH[0][:, :, 2, ::]
|
||||||
|
x = torch.cat([yL, y_HL, y_LH, y_HH], dim=1)
|
||||||
|
x = self.conv_bn_relu(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
# 输入 N C H W, 输出 N C H W
|
||||||
|
if __name__ == '__main__':
|
||||||
|
block = Down_wt(64, 64) # 输入通道数,输出通道数
|
||||||
|
input = torch.rand(3, 64, 64, 64)
|
||||||
|
output = block(input)
|
||||||
|
print(output.size())
|
Binary file not shown.
|
@ -0,0 +1,128 @@
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
import math
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
这个模块实现了一个称为"MCALayer"(Multi-modal Channel Attention Layer)的注意力机制,它主要用于增强神经网络在不同通道之间的交互和信息整合。MCALayer具有以下几个关键组件和特点:
|
||||||
|
|
||||||
|
MCAGate模块:MCALayer包含了MCAGate模块,这是一个多模态的注意力机制。它利用池化操作(平均池化、最大池化、标准差池化)来提取不同的通道间的特征信息。这些不同类型的池化操作有助于捕捉通道间的不同统计特性。
|
||||||
|
|
||||||
|
通道间的交互:MCALayer具有三种不同类型的通道间交互方式,分别是水平-通道(h-cw)、垂直-通道(w-hc)和通道-通道(c-hw)交互。这些交互方式分别针对不同的维度,有助于模型更好地理解和整合不同通道之间的信息。
|
||||||
|
|
||||||
|
空间维度的处理:根据no_spatial参数的设置,MCALayer可以选择是否进行空间维度上的交互。如果no_spatial为True,只会进行通道间的交互;如果为False,还会进行空间维度上的交互。
|
||||||
|
|
||||||
|
权重融合:在不同的通道交互之后,MCALayer使用权重融合来整合不同池化方式的信息。通过学习的方式,模型可以决定如何分配不同池化方式的重要性。
|
||||||
|
|
||||||
|
多尺度核大小:MCALayer中的核大小会根据输入通道数自动选择,以增强模块的适应性。
|
||||||
|
|
||||||
|
总的来说,MCALayer通过多模态的注意力机制,引入不同类型的通道交互和池化操作,从而可以更好地捕捉特征之间的关系,提高模型的特征表示能力,有助于在计算机视觉任务中提高性能,如图像分类、目标检测和语义分割。此外,MCALayer的模块化设计使得它可以方便地嵌入到神经网络中,以增强模型的特征提取和表示能力。
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ['MCALayer', 'MCAGate']
|
||||||
|
|
||||||
|
|
||||||
|
class StdPool(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(StdPool, self).__init__()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
b, c, _, _ = x.size()
|
||||||
|
|
||||||
|
std = x.view(b, c, -1).std(dim=2, keepdim=True)
|
||||||
|
std = std.reshape(b, c, 1, 1)
|
||||||
|
|
||||||
|
return std
|
||||||
|
|
||||||
|
|
||||||
|
class MCAGate(nn.Module):
|
||||||
|
def __init__(self, k_size, pool_types=['avg', 'std']):
|
||||||
|
"""Constructs a MCAGate module.
|
||||||
|
Args:
|
||||||
|
k_size: kernel size
|
||||||
|
pool_types: pooling type. 'avg': average pooling, 'max': max pooling, 'std': standard deviation pooling.
|
||||||
|
"""
|
||||||
|
super(MCAGate, self).__init__()
|
||||||
|
|
||||||
|
self.pools = nn.ModuleList([])
|
||||||
|
for pool_type in pool_types:
|
||||||
|
if pool_type == 'avg':
|
||||||
|
self.pools.append(nn.AdaptiveAvgPool2d(1))
|
||||||
|
elif pool_type == 'max':
|
||||||
|
self.pools.append(nn.AdaptiveMaxPool2d(1))
|
||||||
|
elif pool_type == 'std':
|
||||||
|
self.pools.append(StdPool())
|
||||||
|
else:
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
self.conv = nn.Conv2d(1, 1, kernel_size=(1, k_size), stride=1, padding=(0, (k_size - 1) // 2), bias=False)
|
||||||
|
self.sigmoid = nn.Sigmoid()
|
||||||
|
|
||||||
|
self.weight = nn.Parameter(torch.rand(2))
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
feats = [pool(x) for pool in self.pools]
|
||||||
|
|
||||||
|
if len(feats) == 1:
|
||||||
|
out = feats[0]
|
||||||
|
elif len(feats) == 2:
|
||||||
|
weight = torch.sigmoid(self.weight)
|
||||||
|
out = 1 / 2 * (feats[0] + feats[1]) + weight[0] * feats[0] + weight[1] * feats[1]
|
||||||
|
else:
|
||||||
|
assert False, "Feature Extraction Exception!"
|
||||||
|
|
||||||
|
out = out.permute(0, 3, 2, 1).contiguous()
|
||||||
|
out = self.conv(out)
|
||||||
|
out = out.permute(0, 3, 2, 1).contiguous()
|
||||||
|
|
||||||
|
out = self.sigmoid(out)
|
||||||
|
out = out.expand_as(x)
|
||||||
|
|
||||||
|
return x * out
|
||||||
|
|
||||||
|
|
||||||
|
class MCALayer(nn.Module):
|
||||||
|
def __init__(self, inp, no_spatial=False):
|
||||||
|
"""Constructs a MCA module.
|
||||||
|
Args:
|
||||||
|
inp: Number of channels of the input feature maps
|
||||||
|
no_spatial: whether to build channel dimension interactions
|
||||||
|
"""
|
||||||
|
super(MCALayer, self).__init__()
|
||||||
|
|
||||||
|
lambd = 1.5
|
||||||
|
gamma = 1
|
||||||
|
temp = round(abs((math.log2(inp) - gamma) / lambd))
|
||||||
|
kernel = temp if temp % 2 else temp - 1
|
||||||
|
|
||||||
|
self.h_cw = MCAGate(3)
|
||||||
|
self.w_hc = MCAGate(3)
|
||||||
|
self.no_spatial = no_spatial
|
||||||
|
if not no_spatial:
|
||||||
|
self.c_hw = MCAGate(kernel)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x_h = x.permute(0, 2, 1, 3).contiguous()
|
||||||
|
x_h = self.h_cw(x_h)
|
||||||
|
x_h = x_h.permute(0, 2, 1, 3).contiguous()
|
||||||
|
|
||||||
|
x_w = x.permute(0, 3, 2, 1).contiguous()
|
||||||
|
x_w = self.w_hc(x_w)
|
||||||
|
x_w = x_w.permute(0, 3, 2, 1).contiguous()
|
||||||
|
|
||||||
|
if not self.no_spatial:
|
||||||
|
x_c = self.c_hw(x)
|
||||||
|
x_out = 1 / 3 * (x_c + x_h + x_w)
|
||||||
|
else:
|
||||||
|
x_out = 1 / 2 * (x_h + x_w)
|
||||||
|
|
||||||
|
return x_out
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
block = MCALayer(inp=64)
|
||||||
|
input = torch.rand(64, 64, 9, 9)
|
||||||
|
output = block(input)
|
||||||
|
print(input.size())
|
||||||
|
print(output.size())
|
Binary file not shown.
|
@ -0,0 +1,81 @@
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
from mmengine.model import BaseModule
|
||||||
|
|
||||||
|
|
||||||
|
"""
|
||||||
|
这个代码实现了一个称为"MSCAAttention"(Multi-Scale Channel Attention)的注意力模块。这种注意力模块的主要作用是增强神经网络在特定通道和空间维度上的感知能力,从而有助于提取更加丰富和有用的特征。
|
||||||
|
|
||||||
|
这个注意力模块的特点如下:
|
||||||
|
|
||||||
|
多尺度特征提取:它使用了多个卷积核大小和填充的卷积操作,以提取不同尺度的特征信息。这些卷积操作包括一个具有较大卷积核的初始卷积 (self.conv0) 和多个后续的卷积操作(self.conv0_1,self.conv0_2,self.conv1_1,self.conv1_2,self.conv2_1,self.conv2_2),每个都针对不同的核大小和填充。
|
||||||
|
|
||||||
|
通道混合:在提取多尺度特征之后,通过对这些特征进行通道混合来整合不同尺度的信息。通道混合操作由最后一个卷积层 self.conv3 完成。
|
||||||
|
|
||||||
|
卷积注意力:最后,通过将通道混合后的特征与输入特征进行逐元素乘法,实现了一种卷积注意力机制。这意味着模块通过对不同通道的特征赋予不同的权重来选择性地强调或抑制输入特征。
|
||||||
|
|
||||||
|
总的来说,MSCAAttention的主要作用是增强特征图的表示能力,它能够自动学习特定通道和空间位置的重要性,从而更好地捕捉图像或特征图中的关键信息。这有助于改善模型在各种计算机视觉任务中的性能,例如图像分类、目标检测和语义分割。
|
||||||
|
"""
|
||||||
|
|
||||||
|
class MSCAAttention(BaseModule):
|
||||||
|
|
||||||
|
def __init__(self,
|
||||||
|
channels,
|
||||||
|
kernel_sizes=[5, [1, 7], [1, 11], [1, 21]],
|
||||||
|
paddings=[2, [0, 3], [0, 5], [0, 10]]):
|
||||||
|
super().__init__()
|
||||||
|
self.conv0 = nn.Conv2d(
|
||||||
|
channels,
|
||||||
|
channels,
|
||||||
|
kernel_size=kernel_sizes[0],
|
||||||
|
padding=paddings[0],
|
||||||
|
groups=channels)
|
||||||
|
for i, (kernel_size,
|
||||||
|
padding) in enumerate(zip(kernel_sizes[1:], paddings[1:])):
|
||||||
|
kernel_size_ = [kernel_size, kernel_size[::-1]]
|
||||||
|
padding_ = [padding, padding[::-1]]
|
||||||
|
conv_name = [f'conv{i}_1', f'conv{i}_2']
|
||||||
|
for i_kernel, i_pad, i_conv in zip(kernel_size_, padding_,
|
||||||
|
conv_name):
|
||||||
|
self.add_module(
|
||||||
|
i_conv,
|
||||||
|
nn.Conv2d(
|
||||||
|
channels,
|
||||||
|
channels,
|
||||||
|
tuple(i_kernel),
|
||||||
|
padding=i_pad,
|
||||||
|
groups=channels))
|
||||||
|
self.conv3 = nn.Conv2d(channels, channels, 1)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
"""Forward function."""
|
||||||
|
|
||||||
|
u = x.clone()
|
||||||
|
|
||||||
|
attn = self.conv0(x)
|
||||||
|
|
||||||
|
# Multi-Scale Feature extraction
|
||||||
|
attn_0 = self.conv0_1(attn)
|
||||||
|
attn_0 = self.conv0_2(attn_0)
|
||||||
|
|
||||||
|
attn_1 = self.conv1_1(attn)
|
||||||
|
attn_1 = self.conv1_2(attn_1)
|
||||||
|
|
||||||
|
attn_2 = self.conv2_1(attn)
|
||||||
|
attn_2 = self.conv2_2(attn_2)
|
||||||
|
|
||||||
|
attn = attn + attn_0 + attn_1 + attn_2
|
||||||
|
# Channel Mixing
|
||||||
|
attn = self.conv3(attn)
|
||||||
|
|
||||||
|
# Convolutional Attention
|
||||||
|
x = attn * u
|
||||||
|
|
||||||
|
return x
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
block = MSCAAttention(channels=64)
|
||||||
|
input = torch.rand(64, 64, 9, 9)
|
||||||
|
output = block(input)
|
||||||
|
print(input.size())
|
||||||
|
print(output.size())
|
Binary file not shown.
|
@ -0,0 +1,68 @@
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
|
||||||
|
"""
|
||||||
|
个代码实现了一个名为"TalkingHeadAttn"的自注意力模块(Self-Attention),主要用于增强神经网络在输入序列上的特征表示和建模。以下是这个自注意力模块的关键部分和特点:
|
||||||
|
|
||||||
|
多头自注意力:这个模块使用了多头自注意力机制,通过将输入数据进行不同方式的投影来构建多个注意力头。num_heads参数指定了注意力头的数量,每个头将学习捕捉输入序列中不同的特征关系。
|
||||||
|
|
||||||
|
查询-键-值(QKV)投影:模块使用线性变换(nn.Linear)将输入 x 投影到查询(Q),键(K),和值(V)的空间。这个投影操作是通过self.qkv完成的。注意,为了提高计算效率,一次性生成了三个部分的投影结果。
|
||||||
|
|
||||||
|
注意力计算:通过计算 Q 和 K 的点积,然后应用 Softmax 操作,得到了注意力矩阵,表示了输入序列中各个位置之间的关联程度。这个计算是通过 attn = q @ k.transpose(-2, -1) 和 attn = attn.softmax(dim=-1) 完成的。
|
||||||
|
|
||||||
|
多头特征整合:多头注意力的输出被整合在一起,通过乘以值(V)矩阵,并进行线性变换,将多个头的结果整合到一起。这个整合过程包括了投影 self.proj_l 和 self.proj_w 操作。
|
||||||
|
|
||||||
|
Dropout正则化:在注意力计算和投影操作之后,使用 Dropout 来进行正则化,减少过拟合风险。
|
||||||
|
|
||||||
|
输出:最终的输出是通过 self.proj 和 self.proj_drop 完成的。
|
||||||
|
|
||||||
|
总的来说,TalkingHeadAttn模块通过多头自注意力机制,能够同时考虑输入序列中不同位置之间的关系,以及不同的特征关系。这有助于提高模型在序列数据上的特征提取和建模能力,使其在自然语言处理和其他序列数据任务中表现出色。这个模块通常作为大型神经网络模型的子模块,用于处理序列数据。
|
||||||
|
"""
|
||||||
|
|
||||||
|
class TalkingHeadAttn(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, dim, num_heads=8, qkv_bias=False, attn_drop=0., proj_drop=0.):
|
||||||
|
super().__init__()
|
||||||
|
|
||||||
|
self.num_heads = num_heads
|
||||||
|
|
||||||
|
head_dim = dim // num_heads
|
||||||
|
|
||||||
|
self.scale = head_dim ** -0.5
|
||||||
|
|
||||||
|
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
|
||||||
|
self.attn_drop = nn.Dropout(attn_drop)
|
||||||
|
|
||||||
|
self.proj = nn.Linear(dim, dim)
|
||||||
|
|
||||||
|
self.proj_l = nn.Linear(num_heads, num_heads)
|
||||||
|
self.proj_w = nn.Linear(num_heads, num_heads)
|
||||||
|
|
||||||
|
self.proj_drop = nn.Dropout(proj_drop)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
B, N, C = x.shape
|
||||||
|
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
|
||||||
|
q, k, v = qkv[0] * self.scale, qkv[1], qkv[2]
|
||||||
|
|
||||||
|
attn = q @ k.transpose(-2, -1)
|
||||||
|
|
||||||
|
attn = self.proj_l(attn.permute(0, 2, 3, 1)).permute(0, 3, 1, 2)
|
||||||
|
|
||||||
|
attn = attn.softmax(dim=-1)
|
||||||
|
|
||||||
|
attn = self.proj_w(attn.permute(0, 2, 3, 1)).permute(0, 3, 1, 2)
|
||||||
|
attn = self.attn_drop(attn)
|
||||||
|
|
||||||
|
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
|
||||||
|
x = self.proj(x)
|
||||||
|
x = self.proj_drop(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
block = TalkingHeadAttn(dim=128)
|
||||||
|
input = torch.rand(32, 784, 128)
|
||||||
|
output = block(input)
|
||||||
|
print(input.size())
|
||||||
|
print(output.size())
|
Binary file not shown.
|
@ -0,0 +1,60 @@
|
||||||
|
# https://github.com/damo-cv/KVT
|
||||||
|
|
||||||
|
"""
|
||||||
|
以下是该模块的主要组件和操作:
|
||||||
|
|
||||||
|
qkv:这是一个线性层,将输入特征 x 映射到三个不同的线性变换,分别对应查询 (query),键 (key),和值 (value)。这三个变换将输入特征的通道划分成多个头 (heads)。
|
||||||
|
|
||||||
|
attn_drop 和 proj_drop:这是用于进行注意力矩阵和输出特征的丢弃操作的 Dropout 层。
|
||||||
|
|
||||||
|
topk:这是一个超参数,表示要选择每个查询的前 k 个最相关的键。它控制了 k-最近邻注意力机制的行为。
|
||||||
|
|
||||||
|
在前向传播过程中,该模块首先将输入特征 x 映射为查询、键和值。然后,通过矩阵乘法操作计算注意力矩阵,但注意力矩阵的计算在这里进行了修改。具体来说,它使用 torch.topk 函数来选择每个查询的前 k 个最相关的键,然后将其余的注意力权重设为负无穷大,以实现 k-最近邻注意力机制。之后,应用 softmax 归一化得到最终的注意力矩阵。最后,利用注意力矩阵对值进行加权平均,得到最终的输出特征。
|
||||||
|
|
||||||
|
这个模块的核心思想是在计算注意力时仅考虑与每个查询最相关的 k 个键,从而减少计算复杂度并提高效率。这对于处理大规模数据或具有长序列的模型特别有用。
|
||||||
|
"""
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
class kNNAttention(nn.Module):
|
||||||
|
def __init__(self, dim, n um_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.,topk=100):
|
||||||
|
super().__init__()
|
||||||
|
self.num_heads = num_heads
|
||||||
|
head_dim = dim // num_heads
|
||||||
|
# NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
|
||||||
|
self.scale = qk_scale or head_dim ** -0.5
|
||||||
|
|
||||||
|
self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
|
||||||
|
self.attn_drop = nn.Dropout(attn_drop)
|
||||||
|
self.proj = nn.Linear(dim, dim)
|
||||||
|
self.proj_drop = nn.Dropout(proj_drop)
|
||||||
|
self.topk = topk
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
B, N, C = x.shape
|
||||||
|
qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
|
||||||
|
q, k, v = qkv[0], qkv[1], qkv[2] # make torchscript happy (cannot use tensor as tuple)
|
||||||
|
attn = (q @ k.transpose(-2, -1)) * self.scale
|
||||||
|
# the core code block
|
||||||
|
mask=torch.zeros(B,self.num_heads,N,N,device=x.device,requires_grad=False)
|
||||||
|
index=torch.topk(attn,k=self.topk,dim=-1,largest=True)[1]
|
||||||
|
mask.scatter_(-1,index,1.)
|
||||||
|
attn=torch.where(mask>0, attn,torch.full_like(attn, float('-inf')))
|
||||||
|
# end of the core code block
|
||||||
|
|
||||||
|
attn = attn.softmax(dim=-1)
|
||||||
|
attn = self.attn_drop(attn)
|
||||||
|
|
||||||
|
x = (attn @ v).transpose(1, 2).reshape(B, N, C)
|
||||||
|
x = self.proj(x)
|
||||||
|
x = self.proj_drop(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
block = kNNAttention(dim=128)
|
||||||
|
input = torch.rand(32,784,128)
|
||||||
|
output = block(input)
|
||||||
|
print(input.size())
|
||||||
|
print(output.size())
|
Binary file not shown.
|
@ -0,0 +1,59 @@
|
||||||
|
# https://github.com/ZjjConan/SimAM
|
||||||
|
|
||||||
|
"""
|
||||||
|
该模块的目的是增强图像特征之间的关系,以提高模型的表现。
|
||||||
|
|
||||||
|
以下是模块的主要组件和功能:
|
||||||
|
|
||||||
|
初始化:在初始化过程中,模块接受一个参数 e_lambda,它是一个小的正数(默认为1e-4)。e_lambda 用于避免分母为零的情况,以确保数值稳定性。此外,模块还创建了一个 Sigmoid 激活函数 act。
|
||||||
|
|
||||||
|
前向传播:在前向传播中,模块执行以下步骤:
|
||||||
|
|
||||||
|
计算输入张量 x 的形状信息,包括批量大小 b、通道数 c、高度 h 和宽度 w。
|
||||||
|
计算像素点的数量 n,即图像的高度和宽度的乘积减去1(减1是因为在计算方差时要排除一个像素的均值)。
|
||||||
|
计算每个像素点与均值的差的平方,即 (x - x.mean(dim=[2, 3], keepdim=True)).pow(2),这样可以得到差的平方矩阵。
|
||||||
|
计算分母部分,即 (x_minus_mu_square.sum(dim=[2, 3], keepdim=True) / n + self.e_lambda),并加上小的正数 e_lambda 以确保分母不为零。
|
||||||
|
计算 y,通过将差的平方矩阵除以分母部分,然后加上0.5。这个操作应用了 Sigmoid 函数,将结果限制在0到1之间。
|
||||||
|
最后,将输入张量 x 与 y 经过 Sigmoid 激活后的结果相乘,以产生最终的输出。
|
||||||
|
SIMAM 模块的关键思想是计算每个像素点的特征值与均值之间的关系,并通过 Sigmoid 激活函数来调整这种关系,从而增强特征之间的互动性。这有助于捕获图像中不同位置之间的关系,有助于提高模型性能。
|
||||||
|
"""
|
||||||
|
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
from thop import profile
|
||||||
|
|
||||||
|
from einops import rearrange
|
||||||
|
|
||||||
|
def to_3d(x):
|
||||||
|
return rearrange(x, 'b c h w -> b (h w) c')
|
||||||
|
|
||||||
|
def to_4d(x,h,w):
|
||||||
|
return rearrange(x, 'b (h w) c -> b c h w', h=h, w=w)
|
||||||
|
|
||||||
|
class Simam_module(torch.nn.Module):
|
||||||
|
def __init__(self, e_lambda=1e-4):
|
||||||
|
super(Simam_module, self).__init__()
|
||||||
|
self.act = nn.Sigmoid()
|
||||||
|
self.e_lambda = e_lambda
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
b, c, h, w = x.size()
|
||||||
|
n = w * h - 1
|
||||||
|
x_minus_mu_square = (x - x.mean(dim=[2, 3], keepdim=True)).pow(2)
|
||||||
|
y = x_minus_mu_square / (4 * (x_minus_mu_square.sum(dim=[2, 3], keepdim=True) / n + self.e_lambda)) + 0.5
|
||||||
|
|
||||||
|
return x * self.act(y)
|
||||||
|
|
||||||
|
|
||||||
|
# 输入 N C H W, 输出 N C H W
|
||||||
|
if __name__ == '__main__':
|
||||||
|
model = Simam_module().cuda()
|
||||||
|
# x = torch.randn(1, 3, 64, 64).cuda()
|
||||||
|
x = torch.randn(32, 784, 128).cuda()
|
||||||
|
x = to_4d(x,h=28,w=28)
|
||||||
|
y = model(x)
|
||||||
|
y = to_3d(y)
|
||||||
|
print(y.shape)
|
||||||
|
flops, params = profile(model, (x,))
|
||||||
|
print(flops / 1e9)
|
||||||
|
print(params)
|
Binary file not shown.
|
@ -0,0 +1,9 @@
|
||||||
|
from attention.A2Atttention import DoubleAttention
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from torch.nn import functional as F
|
||||||
|
|
||||||
|
input=torch.randn(50,512,7,7)
|
||||||
|
a2 = DoubleAttention(512,128,128,True)
|
||||||
|
output=a2(input)
|
||||||
|
print(output.shape)
|
|
@ -0,0 +1,8 @@
|
||||||
|
from model.attention.ACmix import ACmix
|
||||||
|
import torch
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
input = torch.randn(50, 256, 7, 7)
|
||||||
|
acmix = ACmix(in_planes=256, out_planes=256)
|
||||||
|
output = acmix(input)
|
||||||
|
print(output.shape)
|
|
@ -0,0 +1,9 @@
|
||||||
|
from attention.AFT import AFT_FULL
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from torch.nn import functional as F
|
||||||
|
|
||||||
|
input=torch.randn(50,49,512)
|
||||||
|
aft_full = AFT_FULL(d_model=512, n=49)
|
||||||
|
output=aft_full(input)
|
||||||
|
print(output.shape)
|
|
@ -0,0 +1,12 @@
|
||||||
|
from model.attention.Axial_attention import AxialImageTransformer
|
||||||
|
import torch
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
input = torch.randn(3, 128, 7, 7)
|
||||||
|
model = AxialImageTransformer(
|
||||||
|
dim=128,
|
||||||
|
depth=12,
|
||||||
|
reversible=True
|
||||||
|
)
|
||||||
|
outputs = model(input)
|
||||||
|
print(outputs.shape)
|
|
@ -0,0 +1,18 @@
|
||||||
|
"""
|
||||||
|
BAM: Bottleneck Attention Module---BMCV2018
|
||||||
|
|
||||||
|
论文地址:https://arxiv.org/pdf/1807.06514.pdf
|
||||||
|
|
||||||
|
这是CBAM同作者同时期的工作,工作与CBAM非常相似,也是双重Attention,不同的是CBAM是将两个attention的结果串联;而BAM是直接将两个attention矩阵进行相加。
|
||||||
|
|
||||||
|
Channel Attention方面,与SE的结构基本一样。Spatial Attention方面,还是在通道维度进行pool,然后用了两次3x3的空洞卷积,最后将用一次1x1的卷积得到Spatial Attention的矩阵。
|
||||||
|
|
||||||
|
最后Channel Attention和Spatial Attention矩阵进行相加(这里用到了广播机制),并进行归一化,这样一来,就得到了空间和通道结合的attention矩阵。
|
||||||
|
"""
|
||||||
|
from attention.BAM import BAMBlock
|
||||||
|
import torch
|
||||||
|
|
||||||
|
input = torch.randn(50, 512, 7, 7)
|
||||||
|
bam = BAMBlock(channel=512, reduction=16, dia_val=2)
|
||||||
|
output = bam(input)
|
||||||
|
print(output.shape)
|
|
@ -0,0 +1,20 @@
|
||||||
|
"""
|
||||||
|
CBAM: Convolutional Block Attention Module---ECCV2018
|
||||||
|
|
||||||
|
论文地址:https://openaccess.thecvf.com/content_ECCV_2018/papers/Sanghyun_Woo_Convolutional_Block_Attention_ECCV_2018_paper.pdf
|
||||||
|
这是ECCV2018的一篇论文,这篇文章同时使用了Channel Attention和Spatial Attention,将两者进行了串联(文章也做了并联和两种串联方式的消融实验)。
|
||||||
|
|
||||||
|
Channel Attention方面,大致结构还是和SE相似,不过作者提出AvgPool和MaxPool有不同的表示效果,所以作者对原来的特征在Spatial维度分别进行了AvgPool和MaxPool,然
|
||||||
|
后用SE的结构提取channel attention,注意这里是参数共享的,然后将两个特征相加后做归一化,就得到了注意力矩阵。
|
||||||
|
|
||||||
|
Spatial Attention和Channel Attention类似,先在channel维度进行两种pool后,将两个特征进行拼接,然后用7x7的卷积来提取Spatial Attention
|
||||||
|
(之所以用7x7是因为提取的是空间注意力,所以用的卷积核必须足够大)。然后做一次归一化,就得到了空间的注意力矩阵。
|
||||||
|
"""
|
||||||
|
from attention.CBAM import CBAMBlock
|
||||||
|
import torch
|
||||||
|
|
||||||
|
input = torch.randn(50, 512, 7, 7)
|
||||||
|
kernel_size = input.shape[2]
|
||||||
|
cbam = CBAMBlock(channel=512, reduction=16, kernel_size=kernel_size)
|
||||||
|
output = cbam(input)
|
||||||
|
print(output.shape)
|
|
@ -0,0 +1,10 @@
|
||||||
|
|
||||||
|
from attention.CoAtNet import CoAtNet
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from torch.nn import functional as F
|
||||||
|
|
||||||
|
input=torch.randn(1,3,224,224)
|
||||||
|
mbconv=CoAtNet(in_ch=3,image_size=224)
|
||||||
|
out=mbconv(input)
|
||||||
|
print(out.shape)
|
|
@ -0,0 +1,12 @@
|
||||||
|
|
||||||
|
from attention.CoTAttention import CoTAttention
|
||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from torch.nn import functional as F
|
||||||
|
|
||||||
|
input=torch.randn(50,512,7,7)
|
||||||
|
cot = CoTAttention(dim=512,kernel_size=3)
|
||||||
|
output=cot(input)
|
||||||
|
print(output.shape)
|
||||||
|
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue