add the codes of TNT, HarDNet, RedNet and DLA models
parent
f4f0984029
commit
a950ec42b0
|
@ -47,4 +47,8 @@ from ppcls.arch.backbone.model_zoo.distillation_models import ResNet50_vd_distil
|
|||
from ppcls.arch.backbone.model_zoo.swin_transformer import SwinTransformer_tiny_patch4_window7_224, SwinTransformer_small_patch4_window7_224, SwinTransformer_base_patch4_window7_224, SwinTransformer_base_patch4_window12_384, SwinTransformer_large_patch4_window7_224, SwinTransformer_large_patch4_window12_384
|
||||
from ppcls.arch.backbone.model_zoo.mixnet import MixNet_S, MixNet_M, MixNet_L
|
||||
from ppcls.arch.backbone.model_zoo.rexnet import ReXNet_1_0, ReXNet_1_3, ReXNet_1_5, ReXNet_2_0, ReXNet_3_0
|
||||
from ppcls.arch.backbone.model_zoo.dla import DLA34, DLA46_c, DLA46x_c, DLA60, DLA60x, DLA60x_c, DLA102, DLA102x, DLA102x2, DLA169
|
||||
from ppcls.arch.backbone.model_zoo.rednet import RedNet26, RedNet38, RedNet50, RedNet101, RedNet152
|
||||
from ppcls.arch.backbone.model_zoo.tnt import TNT_small
|
||||
from ppcls.arch.backbone.model_zoo.hardnet import HarDNet68, HarDNet85, HarDNet39_ds, HarDNet68_ds
|
||||
from ppcls.arch.backbone.variant_models.resnet_variant import ResNet50_last_stage_stride1
|
||||
|
|
|
@ -0,0 +1,451 @@
|
|||
import math
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
import paddle.nn.functional as F
|
||||
|
||||
from paddle.nn.initializer import Normal, Constant
|
||||
|
||||
from ppcls.arch.backbone.base.theseus_layer import Identity
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
|
||||
MODEL_URLS = {
|
||||
"DLA34":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA34_pretrained.pdparams",
|
||||
"DLA46_c":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA46_c_pretrained.pdparams",
|
||||
"DLA46x_c":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA46x_c_pretrained.pdparams",
|
||||
"DLA60":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA60_pretrained.pdparams",
|
||||
"DLA60x":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA60x_pretrained.pdparams",
|
||||
"DLA60x_c":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA60x_c_pretrained.pdparams",
|
||||
"DLA102":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA102_pretrained.pdparams",
|
||||
"DLA102x":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA102x_pretrained.pdparams",
|
||||
"DLA102x2":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA102x2_pretrained.pdparams",
|
||||
"DLA169":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/DLA169_pretrained.pdparams"
|
||||
}
|
||||
|
||||
|
||||
__all__ = MODEL_URLS.keys()
|
||||
|
||||
|
||||
zeros_ = Constant(value=0.)
|
||||
ones_ = Constant(value=1.)
|
||||
|
||||
|
||||
class DlaBasic(nn.Layer):
|
||||
def __init__(self, inplanes, planes, stride=1, dilation=1, **cargs):
|
||||
super(DlaBasic, self).__init__()
|
||||
self.conv1 = nn.Conv2D(
|
||||
inplanes, planes, kernel_size=3, stride=stride,
|
||||
padding=dilation, bias_attr=False, dilation=dilation
|
||||
)
|
||||
self.bn1 = nn.BatchNorm2D(planes)
|
||||
self.relu = nn.ReLU()
|
||||
self.conv2 = nn.Conv2D(
|
||||
planes, planes, kernel_size=3, stride=1,
|
||||
padding=dilation, bias_attr=False, dilation=dilation
|
||||
)
|
||||
self.bn2 = nn.BatchNorm2D(planes)
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x, residual=None):
|
||||
if residual is None:
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class DlaBottleneck(nn.Layer):
|
||||
expansion = 2
|
||||
|
||||
def __init__(self, inplanes, outplanes, stride=1,
|
||||
dilation=1, cardinality=1, base_width=64):
|
||||
super(DlaBottleneck, self).__init__()
|
||||
self.stride = stride
|
||||
mid_planes = int(math.floor(
|
||||
outplanes * (base_width / 64)) * cardinality)
|
||||
mid_planes = mid_planes // self.expansion
|
||||
|
||||
self.conv1 = nn.Conv2D(inplanes, mid_planes, kernel_size=1, bias_attr=False)
|
||||
self.bn1 = nn.BatchNorm2D(mid_planes)
|
||||
self.conv2 = nn.Conv2D(
|
||||
mid_planes, mid_planes, kernel_size=3,
|
||||
stride=stride, padding=dilation, bias_attr=False,
|
||||
dilation=dilation, groups=cardinality
|
||||
)
|
||||
self.bn2 = nn.BatchNorm2D(mid_planes)
|
||||
self.conv3 = nn.Conv2D(mid_planes, outplanes, kernel_size=1, bias_attr=False)
|
||||
self.bn3 = nn.BatchNorm2D(outplanes)
|
||||
self.relu = nn.ReLU()
|
||||
|
||||
def forward(self, x, residual=None):
|
||||
if residual is None:
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
out += residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class DlaRoot(nn.Layer):
|
||||
def __init__(self, in_channels, out_channels, kernel_size, residual):
|
||||
super(DlaRoot, self).__init__()
|
||||
self.conv = nn.Conv2D(
|
||||
in_channels, out_channels, 1, stride=1,
|
||||
bias_attr=False, padding=(kernel_size - 1) // 2
|
||||
)
|
||||
self.bn = nn.BatchNorm2D(out_channels)
|
||||
self.relu = nn.ReLU()
|
||||
self.residual = residual
|
||||
|
||||
def forward(self, *x):
|
||||
children = x
|
||||
x = self.conv(paddle.concat(x, 1))
|
||||
x = self.bn(x)
|
||||
if self.residual:
|
||||
x += children[0]
|
||||
x = self.relu(x)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class DlaTree(nn.Layer):
|
||||
def __init__(self, levels, block, in_channels, out_channels,
|
||||
stride=1,dilation=1, cardinality=1, base_width=64,
|
||||
level_root=False, root_dim=0, root_kernel_size=1,
|
||||
root_residual=False):
|
||||
super(DlaTree, self).__init__()
|
||||
if root_dim == 0:
|
||||
root_dim = 2 * out_channels
|
||||
if level_root:
|
||||
root_dim += in_channels
|
||||
|
||||
self.downsample = nn.MaxPool2D(
|
||||
stride, stride=stride) if stride > 1 else Identity()
|
||||
self.project = Identity()
|
||||
cargs = dict(dilation=dilation, cardinality=cardinality, base_width=base_width)
|
||||
|
||||
if levels == 1:
|
||||
self.tree1 = block(in_channels, out_channels, stride, **cargs)
|
||||
self.tree2 = block(out_channels, out_channels, 1, **cargs)
|
||||
if in_channels != out_channels:
|
||||
self.project = nn.Sequential(
|
||||
nn.Conv2D(in_channels, out_channels, kernel_size=1, stride=1, bias_attr=False),
|
||||
nn.BatchNorm2D(out_channels))
|
||||
else:
|
||||
cargs.update(dict(root_kernel_size=root_kernel_size, root_residual=root_residual))
|
||||
self.tree1 = DlaTree(
|
||||
levels - 1, block, in_channels,
|
||||
out_channels, stride, root_dim=0, **cargs
|
||||
)
|
||||
self.tree2 = DlaTree(
|
||||
levels - 1, block, out_channels,
|
||||
out_channels, root_dim=root_dim + out_channels, **cargs
|
||||
)
|
||||
|
||||
if levels == 1:
|
||||
self.root = DlaRoot(root_dim, out_channels, root_kernel_size, root_residual)
|
||||
|
||||
self.level_root = level_root
|
||||
self.root_dim = root_dim
|
||||
self.levels = levels
|
||||
|
||||
def forward(self, x, residual=None, children=None):
|
||||
children = [] if children is None else children
|
||||
bottom = self.downsample(x)
|
||||
residual = self.project(bottom)
|
||||
|
||||
if self.level_root:
|
||||
children.append(bottom)
|
||||
x1 = self.tree1(x, residual)
|
||||
|
||||
if self.levels == 1:
|
||||
x2 = self.tree2(x1)
|
||||
x = self.root(x2, x1, *children)
|
||||
else:
|
||||
children.append(x1)
|
||||
x = self.tree2(x1, children=children)
|
||||
return x
|
||||
|
||||
|
||||
class DLA(nn.Layer):
|
||||
def __init__(self, levels, channels, in_chans=3, cardinality=1,
|
||||
base_width=64, block=DlaBottleneck, residual_root=False,
|
||||
drop_rate=0.0, class_dim=1000, with_pool=True):
|
||||
super(DLA, self).__init__()
|
||||
self.channels = channels
|
||||
self.class_dim = class_dim
|
||||
self.with_pool = with_pool
|
||||
self.cardinality = cardinality
|
||||
self.base_width = base_width
|
||||
self.drop_rate = drop_rate
|
||||
|
||||
self.base_layer = nn.Sequential(
|
||||
nn.Conv2D(
|
||||
in_chans, channels[0], kernel_size=7,
|
||||
stride=1, padding=3, bias_attr=False
|
||||
),
|
||||
nn.BatchNorm2D(channels[0]),
|
||||
nn.ReLU())
|
||||
|
||||
self.level0 = self._make_conv_level(channels[0], channels[0], levels[0])
|
||||
self.level1 = self._make_conv_level(channels[0], channels[1], levels[1], stride=2)
|
||||
|
||||
cargs = dict(
|
||||
cardinality=cardinality,
|
||||
base_width=base_width,
|
||||
root_residual=residual_root
|
||||
)
|
||||
|
||||
self.level2 = DlaTree(
|
||||
levels[2], block, channels[1],
|
||||
channels[2], 2, level_root=False, **cargs
|
||||
)
|
||||
self.level3 = DlaTree(
|
||||
levels[3], block, channels[2],
|
||||
channels[3], 2, level_root=True, **cargs
|
||||
)
|
||||
self.level4 = DlaTree(
|
||||
levels[4], block, channels[3],
|
||||
channels[4], 2, level_root=True, **cargs
|
||||
)
|
||||
self.level5 = DlaTree(
|
||||
levels[5], block, channels[4],
|
||||
channels[5], 2, level_root=True, **cargs
|
||||
)
|
||||
|
||||
self.feature_info = [
|
||||
# rare to have a meaningful stride 1 level
|
||||
dict(num_chs=channels[0], reduction=1, module='level0'),
|
||||
dict(num_chs=channels[1], reduction=2, module='level1'),
|
||||
dict(num_chs=channels[2], reduction=4, module='level2'),
|
||||
dict(num_chs=channels[3], reduction=8, module='level3'),
|
||||
dict(num_chs=channels[4], reduction=16, module='level4'),
|
||||
dict(num_chs=channels[5], reduction=32, module='level5'),
|
||||
]
|
||||
|
||||
self.num_features = channels[-1]
|
||||
|
||||
if with_pool:
|
||||
self.global_pool = nn.AdaptiveAvgPool2D(1)
|
||||
|
||||
if class_dim > 0:
|
||||
self.fc = nn.Conv2D(self.num_features, class_dim, 1)
|
||||
|
||||
for m in self.sublayers():
|
||||
if isinstance(m, nn.Conv2D):
|
||||
n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
|
||||
normal_ = Normal(mean=0.0, std=math.sqrt(2. / n))
|
||||
normal_(m.weight)
|
||||
elif isinstance(m, nn.BatchNorm2D):
|
||||
ones_(m.weight)
|
||||
zeros_(m.bias)
|
||||
|
||||
def _make_conv_level(self, inplanes, planes, convs, stride=1, dilation=1):
|
||||
modules = []
|
||||
for i in range(convs):
|
||||
modules.extend([
|
||||
nn.Conv2D(
|
||||
inplanes, planes, kernel_size=3,
|
||||
stride=stride if i == 0 else 1,
|
||||
padding=dilation, bias_attr=False, dilation=dilation
|
||||
),
|
||||
nn.BatchNorm2D(planes),
|
||||
nn.ReLU()])
|
||||
inplanes = planes
|
||||
return nn.Sequential(*modules)
|
||||
|
||||
def forward_features(self, x):
|
||||
x = self.base_layer(x)
|
||||
|
||||
x = self.level0(x)
|
||||
x = self.level1(x)
|
||||
x = self.level2(x)
|
||||
x = self.level3(x)
|
||||
x = self.level4(x)
|
||||
x = self.level5(x)
|
||||
|
||||
return x
|
||||
|
||||
def forward(self, x):
|
||||
x = self.forward_features(x)
|
||||
|
||||
if self.with_pool:
|
||||
x = self.global_pool(x)
|
||||
|
||||
if self.drop_rate > 0.:
|
||||
x = F.dropout(x, p=self.drop_rate, training=self.training)
|
||||
|
||||
if self.class_dim > 0:
|
||||
x = self.fc(x)
|
||||
x = x.flatten(1)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def DLA34(pretrained=False, **kwargs):
|
||||
model = DLA(
|
||||
levels=(1, 1, 1, 2, 2, 1),
|
||||
channels=(16, 32, 64, 128, 256, 512),
|
||||
block=DlaBasic,
|
||||
**kwargs
|
||||
)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA34"])
|
||||
return model
|
||||
|
||||
|
||||
def DLA46_c(pretrained=False, **kwargs):
|
||||
model = DLA(
|
||||
levels=(1, 1, 1, 2, 2, 1),
|
||||
channels=(16, 32, 64, 64, 128, 256),
|
||||
block=DlaBottleneck,
|
||||
**kwargs
|
||||
)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA46_c"])
|
||||
return model
|
||||
|
||||
|
||||
def DLA46x_c(pretrained=False, **kwargs):
|
||||
model = DLA(
|
||||
levels=(1, 1, 1, 2, 2, 1),
|
||||
channels=(16, 32, 64, 64, 128, 256),
|
||||
block=DlaBottleneck,
|
||||
cardinality=32,
|
||||
base_width=4,
|
||||
**kwargs
|
||||
)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA46x_c"])
|
||||
return model
|
||||
|
||||
|
||||
def DLA60(pretrained=False, **kwargs):
|
||||
model = DLA(
|
||||
levels=(1, 1, 1, 2, 3, 1),
|
||||
channels=(16, 32, 128, 256, 512, 1024),
|
||||
block=DlaBottleneck,
|
||||
**kwargs
|
||||
)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA60"])
|
||||
return model
|
||||
|
||||
|
||||
def DLA60x(pretrained=False, **kwargs):
|
||||
model = DLA(
|
||||
levels=(1, 1, 1, 2, 3, 1),
|
||||
channels=(16, 32, 128, 256, 512, 1024),
|
||||
block=DlaBottleneck,
|
||||
cardinality=32,
|
||||
base_width=4,
|
||||
**kwargs
|
||||
)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA60x"])
|
||||
return model
|
||||
|
||||
|
||||
def DLA60x_c(pretrained=False, **kwargs):
|
||||
model = DLA(
|
||||
levels=(1, 1, 1, 2, 3, 1),
|
||||
channels=(16, 32, 64, 64, 128, 256),
|
||||
block=DlaBottleneck,
|
||||
cardinality=32,
|
||||
base_width=4,
|
||||
**kwargs
|
||||
)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA60x_c"])
|
||||
return model
|
||||
|
||||
|
||||
def DLA102(pretrained=False, **kwargs):
|
||||
model = DLA(
|
||||
levels=(1, 1, 1, 3, 4, 1),
|
||||
channels=(16, 32, 128, 256, 512, 1024),
|
||||
block=DlaBottleneck,
|
||||
residual_root=True,
|
||||
**kwargs
|
||||
)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA102"])
|
||||
return model
|
||||
|
||||
|
||||
def DLA102x(pretrained=False, **kwargs):
|
||||
model = DLA(
|
||||
levels=(1, 1, 1, 3, 4, 1),
|
||||
channels=(16, 32, 128, 256, 512, 1024),
|
||||
block=DlaBottleneck,
|
||||
cardinality=32,
|
||||
base_width=4,
|
||||
residual_root=True,
|
||||
**kwargs
|
||||
)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA102x"])
|
||||
return model
|
||||
|
||||
|
||||
def DLA102x2(pretrained=False, **kwargs):
|
||||
model = DLA(
|
||||
levels=(1, 1, 1, 3, 4, 1),
|
||||
channels=(16, 32, 128, 256, 512, 1024),
|
||||
block=DlaBottleneck,
|
||||
cardinality=64,
|
||||
base_width=4,
|
||||
residual_root=True,
|
||||
**kwargs
|
||||
)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA102x2"])
|
||||
return model
|
||||
|
||||
|
||||
def DLA169(pretrained=False, **kwargs):
|
||||
model = DLA(
|
||||
levels=(1, 1, 2, 3, 5, 1),
|
||||
channels=(16, 32, 128, 256, 512, 1024),
|
||||
block=DlaBottleneck,
|
||||
residual_root=True,
|
||||
**kwargs
|
||||
)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["DLA169"])
|
||||
return model
|
|
@ -0,0 +1,248 @@
|
|||
import paddle
|
||||
import paddle.nn as nn
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
|
||||
MODEL_URLS = {
|
||||
'HarDNet39_ds':
|
||||
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/HarDNet39_ds_pretrained.pdparams',
|
||||
'HarDNet68_ds':
|
||||
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/HarDNet68_ds_pretrained.pdparams',
|
||||
'HarDNet68':
|
||||
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/HarDNet68_pretrained.pdparams',
|
||||
'HarDNet85':
|
||||
'https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/HarDNet85_pretrained.pdparams'
|
||||
}
|
||||
|
||||
|
||||
def ConvLayer(in_channels, out_channels, kernel_size=3, stride=1, bias_attr=False):
|
||||
layer = nn.Sequential(
|
||||
('conv', nn.Conv2D(
|
||||
in_channels, out_channels, kernel_size=kernel_size,
|
||||
stride=stride, padding=kernel_size//2, groups=1, bias_attr=bias_attr
|
||||
)),
|
||||
('norm', nn.BatchNorm2D(out_channels)),
|
||||
('relu', nn.ReLU6())
|
||||
)
|
||||
return layer
|
||||
|
||||
|
||||
def DWConvLayer(in_channels, out_channels, kernel_size=3, stride=1, bias_attr=False):
|
||||
layer = nn.Sequential(
|
||||
('dwconv', nn.Conv2D(
|
||||
in_channels, out_channels, kernel_size=kernel_size,
|
||||
stride=stride, padding=1, groups=out_channels, bias_attr=bias_attr
|
||||
)),
|
||||
('norm', nn.BatchNorm2D(out_channels))
|
||||
)
|
||||
return layer
|
||||
|
||||
|
||||
def CombConvLayer(in_channels, out_channels, kernel_size=1, stride=1):
|
||||
layer = nn.Sequential(
|
||||
('layer1', ConvLayer(in_channels, out_channels, kernel_size=kernel_size)),
|
||||
('layer2', DWConvLayer(out_channels, out_channels, stride=stride))
|
||||
)
|
||||
return layer
|
||||
|
||||
|
||||
class HarDBlock(nn.Layer):
|
||||
def __init__(self, in_channels, growth_rate, grmul, n_layers,
|
||||
keepBase=False, residual_out=False, dwconv=False):
|
||||
super().__init__()
|
||||
self.keepBase = keepBase
|
||||
self.links = []
|
||||
layers_ = []
|
||||
self.out_channels = 0 # if upsample else in_channels
|
||||
for i in range(n_layers):
|
||||
outch, inch, link = self.get_link(i+1, in_channels, growth_rate, grmul)
|
||||
self.links.append(link)
|
||||
if dwconv:
|
||||
layers_.append(CombConvLayer(inch, outch))
|
||||
else:
|
||||
layers_.append(ConvLayer(inch, outch))
|
||||
|
||||
if (i % 2 == 0) or (i == n_layers - 1):
|
||||
self.out_channels += outch
|
||||
# print("Blk out =",self.out_channels)
|
||||
self.layers = nn.LayerList(layers_)
|
||||
|
||||
def get_link(self, layer, base_ch, growth_rate, grmul):
|
||||
if layer == 0:
|
||||
return base_ch, 0, []
|
||||
out_channels = growth_rate
|
||||
|
||||
link = []
|
||||
for i in range(10):
|
||||
dv = 2 ** i
|
||||
if layer % dv == 0:
|
||||
k = layer - dv
|
||||
link.append(k)
|
||||
if i > 0:
|
||||
out_channels *= grmul
|
||||
|
||||
out_channels = int(int(out_channels + 1) / 2) * 2
|
||||
in_channels = 0
|
||||
|
||||
for i in link:
|
||||
ch, _, _ = self.get_link(i, base_ch, growth_rate, grmul)
|
||||
in_channels += ch
|
||||
|
||||
return out_channels, in_channels, link
|
||||
|
||||
def forward(self, x):
|
||||
layers_ = [x]
|
||||
|
||||
for layer in range(len(self.layers)):
|
||||
link = self.links[layer]
|
||||
tin = []
|
||||
for i in link:
|
||||
tin.append(layers_[i])
|
||||
if len(tin) > 1:
|
||||
x = paddle.concat(tin, 1)
|
||||
else:
|
||||
x = tin[0]
|
||||
out = self.layers[layer](x)
|
||||
layers_.append(out)
|
||||
|
||||
t = len(layers_)
|
||||
out_ = []
|
||||
for i in range(t):
|
||||
if (i == 0 and self.keepBase) or (i == t-1) or (i % 2 == 1):
|
||||
out_.append(layers_[i])
|
||||
out = paddle.concat(out_, 1)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class HarDNet(nn.Layer):
|
||||
def __init__(self, depth_wise=False, arch=85,
|
||||
class_dim=1000, with_pool=True):
|
||||
super().__init__()
|
||||
first_ch = [32, 64]
|
||||
second_kernel = 3
|
||||
max_pool = True
|
||||
grmul = 1.7
|
||||
drop_rate = 0.1
|
||||
|
||||
# HarDNet68
|
||||
ch_list = [128, 256, 320, 640, 1024]
|
||||
gr = [14, 16, 20, 40, 160]
|
||||
n_layers = [8, 16, 16, 16, 4]
|
||||
downSamp = [1, 0, 1, 1, 0]
|
||||
|
||||
if arch == 85:
|
||||
# HarDNet85
|
||||
first_ch = [48, 96]
|
||||
ch_list = [192, 256, 320, 480, 720, 1280]
|
||||
gr = [24, 24, 28, 36, 48, 256]
|
||||
n_layers = [8, 16, 16, 16, 16, 4]
|
||||
downSamp = [1, 0, 1, 0, 1, 0]
|
||||
drop_rate = 0.2
|
||||
|
||||
elif arch == 39:
|
||||
# HarDNet39
|
||||
first_ch = [24, 48]
|
||||
ch_list = [96, 320, 640, 1024]
|
||||
grmul = 1.6
|
||||
gr = [16, 20, 64, 160]
|
||||
n_layers = [4, 16, 8, 4]
|
||||
downSamp = [1, 1, 1, 0]
|
||||
|
||||
if depth_wise:
|
||||
second_kernel = 1
|
||||
max_pool = False
|
||||
drop_rate = 0.05
|
||||
|
||||
blks = len(n_layers)
|
||||
self.base = nn.LayerList([])
|
||||
|
||||
# First Layer: Standard Conv3x3, Stride=2
|
||||
self.base.append(
|
||||
ConvLayer(in_channels=3, out_channels=first_ch[0], kernel_size=3,
|
||||
stride=2, bias_attr=False))
|
||||
|
||||
# Second Layer
|
||||
self.base.append(
|
||||
ConvLayer(first_ch[0], first_ch[1], kernel_size=second_kernel))
|
||||
|
||||
# Maxpooling or DWConv3x3 downsampling
|
||||
if max_pool:
|
||||
self.base.append(nn.MaxPool2D(kernel_size=3, stride=2, padding=1))
|
||||
else:
|
||||
self.base.append(DWConvLayer(first_ch[1], first_ch[1], stride=2))
|
||||
|
||||
# Build all HarDNet blocks
|
||||
ch = first_ch[1]
|
||||
for i in range(blks):
|
||||
blk = HarDBlock(ch, gr[i], grmul, n_layers[i], dwconv=depth_wise)
|
||||
ch = blk.out_channels
|
||||
self.base.append(blk)
|
||||
|
||||
if i == blks-1 and arch == 85:
|
||||
self.base.append(nn.Dropout(0.1))
|
||||
|
||||
self.base.append(ConvLayer(ch, ch_list[i], kernel_size=1))
|
||||
ch = ch_list[i]
|
||||
if downSamp[i] == 1:
|
||||
if max_pool:
|
||||
self.base.append(nn.MaxPool2D(kernel_size=2, stride=2))
|
||||
else:
|
||||
self.base.append(DWConvLayer(ch, ch, stride=2))
|
||||
|
||||
ch = ch_list[blks-1]
|
||||
|
||||
layers = []
|
||||
|
||||
if with_pool:
|
||||
layers.append(nn.AdaptiveAvgPool2D((1, 1)))
|
||||
|
||||
if class_dim > 0:
|
||||
layers.append(nn.Flatten())
|
||||
layers.append(nn.Dropout(drop_rate))
|
||||
layers.append(nn.Linear(ch, class_dim))
|
||||
|
||||
self.base.append(nn.Sequential(*layers))
|
||||
|
||||
def forward(self, x):
|
||||
for layer in self.base:
|
||||
x = layer(x)
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def HarDNet39_ds(pretrained=False, **kwargs):
|
||||
model = HarDNet(arch=39, depth_wise=True, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet39_ds"])
|
||||
return model
|
||||
|
||||
|
||||
def HarDNet68_ds(pretrained=False, **kwargs):
|
||||
model = HarDNet(arch=68, depth_wise=True, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet68_ds"])
|
||||
return model
|
||||
|
||||
|
||||
def HarDNet68(pretrained=False, **kwargs):
|
||||
model = HarDNet(arch=68, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet68"])
|
||||
return model
|
||||
|
||||
|
||||
def HarDNet85(pretrained=False, **kwargs):
|
||||
model = HarDNet(arch=85, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["HarDNet85"])
|
||||
return model
|
|
@ -0,0 +1,189 @@
|
|||
import paddle
|
||||
import paddle.nn as nn
|
||||
|
||||
from paddle.vision.models import resnet
|
||||
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
|
||||
MODEL_URLS = {
|
||||
"RedNet26":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet26_pretrained.pdparams",
|
||||
"RedNet38":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet38_pretrained.pdparams",
|
||||
"RedNet50":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet50_pretrained.pdparams",
|
||||
"RedNet101":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet101_pretrained.pdparams",
|
||||
"RedNet152":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/RedNet152_pretrained.pdparams"
|
||||
}
|
||||
|
||||
|
||||
class Involution(nn.Layer):
|
||||
def __init__(self, channels, kernel_size, stride):
|
||||
super(Involution, self).__init__()
|
||||
self.kernel_size = kernel_size
|
||||
self.stride = stride
|
||||
self.channels = channels
|
||||
reduction_ratio = 4
|
||||
self.group_channels = 16
|
||||
self.groups = self.channels // self.group_channels
|
||||
self.conv1 = nn.Sequential(
|
||||
('conv', nn.Conv2D(
|
||||
in_channels=channels,
|
||||
out_channels=channels // reduction_ratio,
|
||||
kernel_size=1,
|
||||
bias_attr=False
|
||||
)),
|
||||
('bn', nn.BatchNorm2D(channels // reduction_ratio)),
|
||||
('activate', nn.ReLU())
|
||||
)
|
||||
self.conv2 = nn.Sequential(
|
||||
('conv', nn.Conv2D(
|
||||
in_channels=channels // reduction_ratio,
|
||||
out_channels=kernel_size**2 * self.groups,
|
||||
kernel_size=1,
|
||||
stride=1
|
||||
))
|
||||
)
|
||||
if stride > 1:
|
||||
self.avgpool = nn.AvgPool2D(stride, stride)
|
||||
|
||||
def forward(self, x):
|
||||
weight = self.conv2(self.conv1(x if self.stride == 1 else self.avgpool(x)))
|
||||
b, c, h, w = weight.shape
|
||||
weight = weight.reshape((b, self.groups, self.kernel_size**2, h, w)).unsqueeze(2)
|
||||
|
||||
out = nn.functional.unfold(x, self.kernel_size, self.stride, (self.kernel_size-1)//2, 1)
|
||||
out = out.reshape((b, self.groups, self.group_channels, self.kernel_size**2, h, w))
|
||||
out = (weight * out).sum(axis=3).reshape((b, self.channels, h, w))
|
||||
return out
|
||||
|
||||
|
||||
class BottleneckBlock(resnet.BottleneckBlock):
|
||||
def __init__(self, inplanes, planes, stride=1, downsample=None,
|
||||
groups=1, base_width=64, dilation=1, norm_layer=None):
|
||||
super(BottleneckBlock, self).__init__(
|
||||
inplanes, planes, stride, downsample,
|
||||
groups, base_width, dilation, norm_layer
|
||||
)
|
||||
width = int(planes * (base_width / 64.)) * groups
|
||||
self.conv2 = Involution(width, 7, stride)
|
||||
|
||||
|
||||
class RedNet(resnet.ResNet):
|
||||
def __init__(self, block, depth, class_dim=1000, with_pool=True):
|
||||
super(RedNet, self).__init__(
|
||||
block=block, depth=50,
|
||||
num_classes=class_dim, with_pool=with_pool
|
||||
)
|
||||
layer_cfg = {
|
||||
26: [1, 2, 4, 1],
|
||||
38: [2, 3, 5, 2],
|
||||
50: [3, 4, 6, 3],
|
||||
101: [3, 4, 23, 3],
|
||||
152: [3, 8, 36, 3]
|
||||
}
|
||||
layers = layer_cfg[depth]
|
||||
|
||||
self.conv1 = None
|
||||
self.bn1 = None
|
||||
self.relu = None
|
||||
self.inplanes = 64
|
||||
self.class_dim = class_dim
|
||||
self.stem = nn.Sequential(
|
||||
nn.Sequential(
|
||||
('conv', nn.Conv2D(
|
||||
in_channels=3,
|
||||
out_channels=self.inplanes // 2,
|
||||
kernel_size=3,
|
||||
stride=2,
|
||||
padding=1,
|
||||
bias_attr=False
|
||||
)),
|
||||
('bn', nn.BatchNorm2D(self.inplanes // 2)),
|
||||
('activate', nn.ReLU())
|
||||
),
|
||||
Involution(self.inplanes // 2, 3, 1),
|
||||
nn.BatchNorm2D(self.inplanes // 2),
|
||||
nn.ReLU(),
|
||||
nn.Sequential(
|
||||
('conv', nn.Conv2D(
|
||||
in_channels=self.inplanes // 2,
|
||||
out_channels=self.inplanes,
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
bias_attr=False
|
||||
)),
|
||||
('bn', nn.BatchNorm2D(self.inplanes)),
|
||||
('activate', nn.ReLU())
|
||||
)
|
||||
)
|
||||
|
||||
self.layer1 = self._make_layer(block, 64, layers[0])
|
||||
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
||||
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
|
||||
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.stem(x)
|
||||
x = self.maxpool(x)
|
||||
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
|
||||
if self.with_pool:
|
||||
x = self.avgpool(x)
|
||||
|
||||
if self.class_dim > 0:
|
||||
x = paddle.flatten(x, 1)
|
||||
x = self.fc(x)
|
||||
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def RedNet26(pretrained=False, **kwargs):
|
||||
model = RedNet(BottleneckBlock, 26, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["RedNet26"])
|
||||
return model
|
||||
|
||||
|
||||
def RedNet38(pretrained=False, **kwargs):
|
||||
model = RedNet(BottleneckBlock, 38, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["RedNet38"])
|
||||
return model
|
||||
|
||||
|
||||
def RedNet50(pretrained=False, **kwargs):
|
||||
model = RedNet(BottleneckBlock, 50, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["RedNet50"])
|
||||
return model
|
||||
|
||||
|
||||
def RedNet101(pretrained=False, **kwargs):
|
||||
model = RedNet(BottleneckBlock, 101, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["RedNet101"])
|
||||
return model
|
||||
|
||||
|
||||
def RedNet152(pretrained=False, **kwargs):
|
||||
model = RedNet(BottleneckBlock, 152, **kwargs)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["RedNet152"])
|
||||
return model
|
|
@ -0,0 +1,301 @@
|
|||
import math
|
||||
import numpy as np
|
||||
|
||||
import paddle
|
||||
import paddle.nn as nn
|
||||
|
||||
from paddle.nn.initializer import TruncatedNormal, Constant
|
||||
|
||||
from ppcls.arch.backbone.base.theseus_layer import Identity
|
||||
from ppcls.utils.save_load import load_dygraph_pretrain, load_dygraph_pretrain_from_url
|
||||
|
||||
|
||||
MODEL_URLS = {
|
||||
"TNT_small":
|
||||
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/model_zoo/TNT_small_pretrained.pdparams"
|
||||
}
|
||||
|
||||
|
||||
trunc_normal_ = TruncatedNormal(std=.02)
|
||||
zeros_ = Constant(value=0.)
|
||||
ones_ = Constant(value=1.)
|
||||
|
||||
|
||||
def drop_path(x, drop_prob=0., training=False):
|
||||
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
|
||||
the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
|
||||
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
|
||||
"""
|
||||
if drop_prob == 0. or not training:
|
||||
return x
|
||||
keep_prob = paddle.to_tensor(1 - drop_prob)
|
||||
shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
|
||||
random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
|
||||
random_tensor = paddle.floor(random_tensor) # binarize
|
||||
output = x.divide(keep_prob) * random_tensor
|
||||
return output
|
||||
|
||||
|
||||
class DropPath(nn.Layer):
|
||||
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
|
||||
"""
|
||||
|
||||
def __init__(self, drop_prob=None):
|
||||
super(DropPath, self).__init__()
|
||||
self.drop_prob = drop_prob
|
||||
|
||||
def forward(self, x):
|
||||
return drop_path(x, self.drop_prob, self.training)
|
||||
|
||||
|
||||
class Mlp(nn.Layer):
|
||||
def __init__(self, in_features, hidden_features=None,
|
||||
out_features=None, act_layer=nn.GELU, drop=0.):
|
||||
super().__init__()
|
||||
out_features = out_features or in_features
|
||||
hidden_features = hidden_features or in_features
|
||||
self.fc1 = nn.Linear(in_features, hidden_features)
|
||||
self.act = act_layer()
|
||||
self.fc2 = nn.Linear(hidden_features, out_features)
|
||||
self.drop = nn.Dropout(drop)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.fc1(x)
|
||||
x = self.act(x)
|
||||
x = self.drop(x)
|
||||
x = self.fc2(x)
|
||||
x = self.drop(x)
|
||||
return x
|
||||
|
||||
|
||||
class Attention(nn.Layer):
|
||||
def __init__(self, dim, hidden_dim, num_heads=8,
|
||||
qkv_bias=False, attn_drop=0., proj_drop=0.):
|
||||
super().__init__()
|
||||
self.hidden_dim = hidden_dim
|
||||
self.num_heads = num_heads
|
||||
head_dim = hidden_dim // num_heads
|
||||
self.head_dim = head_dim
|
||||
self.scale = head_dim ** -0.5
|
||||
|
||||
self.qk = nn.Linear(dim, hidden_dim * 2, bias_attr=qkv_bias)
|
||||
self.v = nn.Linear(dim, dim, bias_attr=qkv_bias)
|
||||
self.attn_drop = nn.Dropout(attn_drop)
|
||||
self.proj = nn.Linear(dim, dim)
|
||||
self.proj_drop = nn.Dropout(proj_drop)
|
||||
|
||||
def forward(self, x):
|
||||
B, N, C = x.shape
|
||||
qk = self.qk(x).reshape((B, N, 2, self.num_heads, self.head_dim)).transpose((2, 0, 3, 1, 4))
|
||||
|
||||
q, k = qk[0], qk[1]
|
||||
v = self.v(x).reshape((B, N, self.num_heads, -1)).transpose((0, 2, 1, 3))
|
||||
|
||||
attn = (q @ k.transpose((0, 1, 3, 2))) * self.scale
|
||||
attn = nn.functional.softmax(attn, axis=-1)
|
||||
attn = self.attn_drop(attn)
|
||||
|
||||
x = (attn @ v).transpose((0, 2, 1, 3)).reshape((B, N, -1))
|
||||
x = self.proj(x)
|
||||
x = self.proj_drop(x)
|
||||
return x
|
||||
|
||||
|
||||
class Block(nn.Layer):
|
||||
def __init__(self, dim, in_dim, num_pixel, num_heads=12, in_num_head=4, mlp_ratio=4.,
|
||||
qkv_bias=False, drop=0., attn_drop=0., drop_path=0., act_layer=nn.GELU,
|
||||
norm_layer=nn.LayerNorm):
|
||||
super().__init__()
|
||||
# Inner transformer
|
||||
self.norm_in = norm_layer(in_dim)
|
||||
self.attn_in = Attention(
|
||||
in_dim, in_dim, num_heads=in_num_head,
|
||||
qkv_bias=qkv_bias, attn_drop=attn_drop,
|
||||
proj_drop=drop
|
||||
)
|
||||
|
||||
self.norm_mlp_in = norm_layer(in_dim)
|
||||
self.mlp_in = Mlp(
|
||||
in_features=in_dim, hidden_features=int(in_dim * 4),
|
||||
out_features=in_dim, act_layer=act_layer, drop=drop
|
||||
)
|
||||
|
||||
self.norm1_proj = norm_layer(in_dim)
|
||||
self.proj = nn.Linear(in_dim * num_pixel, dim)
|
||||
# Outer transformer
|
||||
self.norm_out = norm_layer(dim)
|
||||
self.attn_out = Attention(
|
||||
dim, dim, num_heads=num_heads, qkv_bias=qkv_bias,
|
||||
attn_drop=attn_drop, proj_drop=drop
|
||||
)
|
||||
|
||||
self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
|
||||
|
||||
self.norm_mlp = norm_layer(dim)
|
||||
self.mlp = Mlp(
|
||||
in_features=dim, hidden_features=int(dim * mlp_ratio),
|
||||
out_features=dim, act_layer=act_layer, drop=drop
|
||||
)
|
||||
|
||||
def forward(self, pixel_embed, patch_embed):
|
||||
# inner
|
||||
pixel_embed = pixel_embed + self.drop_path(self.attn_in(self.norm_in(pixel_embed)))
|
||||
pixel_embed = pixel_embed + self.drop_path(self.mlp_in(self.norm_mlp_in(pixel_embed)))
|
||||
# outer
|
||||
B, N, C = patch_embed.shape
|
||||
patch_embed[:, 1:] = patch_embed[:, 1:] + self.proj(self.norm1_proj(pixel_embed).reshape((B, N - 1, -1)))
|
||||
patch_embed = patch_embed + self.drop_path(self.attn_out(self.norm_out(patch_embed)))
|
||||
patch_embed = patch_embed + self.drop_path(self.mlp(self.norm_mlp(patch_embed)))
|
||||
return pixel_embed, patch_embed
|
||||
|
||||
|
||||
class PixelEmbed(nn.Layer):
|
||||
def __init__(self, img_size=224, patch_size=16, in_chans=3, in_dim=48, stride=4):
|
||||
super().__init__()
|
||||
num_patches = (img_size // patch_size) ** 2
|
||||
self.img_size = img_size
|
||||
self.num_patches = num_patches
|
||||
self.in_dim = in_dim
|
||||
new_patch_size = math.ceil(patch_size / stride)
|
||||
self.new_patch_size = new_patch_size
|
||||
|
||||
self.proj = nn.Conv2D(
|
||||
in_chans, self.in_dim,
|
||||
kernel_size=7, padding=3,
|
||||
stride=stride
|
||||
)
|
||||
|
||||
def forward(self, x, pixel_pos):
|
||||
B, C, H, W = x.shape
|
||||
assert H == self.img_size and W == self.img_size, f"Input image size ({H}*{W}) doesn't match model ({self.img_size}*{self.img_size})."
|
||||
|
||||
x = self.proj(x)
|
||||
x = nn.functional.unfold(x, self.new_patch_size, self.new_patch_size)
|
||||
x = x.transpose((0, 2, 1)).reshape((B * self.num_patches, self.in_dim, self.new_patch_size, self.new_patch_size))
|
||||
x = x + pixel_pos
|
||||
x = x.reshape((B * self.num_patches, self.in_dim, -1)).transpose((0, 2, 1))
|
||||
return x
|
||||
|
||||
|
||||
class TNT(nn.Layer):
|
||||
def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, in_dim=48, depth=12,
|
||||
num_heads=12, in_num_head=4, mlp_ratio=4., qkv_bias=False, drop_rate=0., attn_drop_rate=0.,
|
||||
drop_path_rate=0., norm_layer=nn.LayerNorm, first_stride=4, class_dim=1000):
|
||||
super().__init__()
|
||||
self.class_dim = class_dim
|
||||
# num_features for consistency with other models
|
||||
self.num_features = self.embed_dim = embed_dim
|
||||
|
||||
self.pixel_embed = PixelEmbed(
|
||||
img_size=img_size, patch_size=patch_size,
|
||||
in_chans=in_chans, in_dim=in_dim, stride=first_stride
|
||||
)
|
||||
num_patches = self.pixel_embed.num_patches
|
||||
self.num_patches = num_patches
|
||||
new_patch_size = self.pixel_embed.new_patch_size
|
||||
num_pixel = new_patch_size ** 2
|
||||
|
||||
self.norm1_proj = norm_layer(num_pixel * in_dim)
|
||||
self.proj = nn.Linear(num_pixel * in_dim, embed_dim)
|
||||
self.norm2_proj = norm_layer(embed_dim)
|
||||
|
||||
self.cls_token = self.create_parameter(
|
||||
shape=(1, 1, embed_dim),
|
||||
default_initializer=zeros_
|
||||
)
|
||||
self.add_parameter("cls_token", self.cls_token)
|
||||
|
||||
self.patch_pos = self.create_parameter(
|
||||
shape=(1, num_patches + 1, embed_dim),
|
||||
default_initializer=zeros_
|
||||
)
|
||||
self.add_parameter("patch_pos", self.patch_pos)
|
||||
|
||||
self.pixel_pos = self.create_parameter(
|
||||
shape=(1, in_dim, new_patch_size, new_patch_size),
|
||||
default_initializer=zeros_
|
||||
)
|
||||
self.add_parameter("pixel_pos", self.pixel_pos)
|
||||
|
||||
self.pos_drop = nn.Dropout(p=drop_rate)
|
||||
|
||||
# stochastic depth decay rule
|
||||
dpr = np.linspace(0, drop_path_rate, depth)
|
||||
|
||||
blocks = []
|
||||
for i in range(depth):
|
||||
blocks.append(Block(
|
||||
dim=embed_dim, in_dim=in_dim, num_pixel=num_pixel, num_heads=num_heads,
|
||||
in_num_head=in_num_head, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias,
|
||||
drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i],
|
||||
norm_layer=norm_layer
|
||||
))
|
||||
self.blocks = nn.LayerList(blocks)
|
||||
self.norm = norm_layer(embed_dim)
|
||||
|
||||
if class_dim > 0:
|
||||
self.head = nn.Linear(embed_dim, class_dim)
|
||||
|
||||
trunc_normal_(self.cls_token)
|
||||
trunc_normal_(self.patch_pos)
|
||||
trunc_normal_(self.pixel_pos)
|
||||
self.apply(self._init_weights)
|
||||
|
||||
def _init_weights(self, m):
|
||||
if isinstance(m, nn.Linear):
|
||||
trunc_normal_(m.weight)
|
||||
if isinstance(m, nn.Linear) and m.bias is not None:
|
||||
zeros_(m.bias)
|
||||
elif isinstance(m, nn.LayerNorm):
|
||||
zeros_(m.bias)
|
||||
ones_(m.weight)
|
||||
|
||||
def forward_features(self, x):
|
||||
B = x.shape[0]
|
||||
pixel_embed = self.pixel_embed(x, self.pixel_pos)
|
||||
|
||||
patch_embed = self.norm2_proj(self.proj(self.norm1_proj(pixel_embed.reshape((B, self.num_patches, -1)))))
|
||||
patch_embed = paddle.concat((self.cls_token.expand((B, -1, -1)), patch_embed), axis=1)
|
||||
patch_embed = patch_embed + self.patch_pos
|
||||
patch_embed = self.pos_drop(patch_embed)
|
||||
|
||||
for blk in self.blocks:
|
||||
pixel_embed, patch_embed = blk(pixel_embed, patch_embed)
|
||||
|
||||
patch_embed = self.norm(patch_embed)
|
||||
return patch_embed[:, 0]
|
||||
|
||||
def forward(self, x):
|
||||
x = self.forward_features(x)
|
||||
|
||||
if self.class_dim > 0:
|
||||
x = self.head(x)
|
||||
return x
|
||||
|
||||
|
||||
def _load_pretrained(pretrained, model, model_url, use_ssld=False):
|
||||
if pretrained is False:
|
||||
pass
|
||||
elif pretrained is True:
|
||||
load_dygraph_pretrain_from_url(model, model_url, use_ssld=use_ssld)
|
||||
elif isinstance(pretrained, str):
|
||||
load_dygraph_pretrain(model, pretrained)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"pretrained type is not available. Please use `string` or `boolean` type."
|
||||
)
|
||||
|
||||
|
||||
def TNT_small(pretrained=False, **kwargs):
|
||||
model = TNT(
|
||||
patch_size=16,
|
||||
embed_dim=384,
|
||||
in_dim=24,
|
||||
depth=12,
|
||||
num_heads=6,
|
||||
in_num_head=4,
|
||||
qkv_bias=False,
|
||||
**kwargs
|
||||
)
|
||||
_load_pretrained(pretrained, model, MODEL_URLS["TNT_small"])
|
||||
return model
|
Loading…
Reference in New Issue