mirror of
https://github.com/PyRetri/PyRetri.git
synced 2025-06-03 14:49:50 +08:00
222 lines
7.2 KiB
Python
222 lines
7.2 KiB
Python
|
import torch
|
||
|
import torch.nn as nn
|
||
|
from torch.nn import init
|
||
|
from torchvision import models
|
||
|
from torch.autograd import Variable
|
||
|
|
||
|
from ..backbone_base import BackboneBase
|
||
|
from ...registry import BACKBONES
|
||
|
|
||
|
|
||
|
######################################################################
|
||
|
def weights_init_kaiming(m):
|
||
|
classname = m.__class__.__name__
|
||
|
# print(classname)
|
||
|
if classname.find('Conv') != -1:
|
||
|
init.kaiming_normal_(m.weight.data, a=0, mode='fan_in') # For old pytorch, you may use kaiming_normal.
|
||
|
elif classname.find('Linear') != -1:
|
||
|
init.kaiming_normal_(m.weight.data, a=0, mode='fan_out')
|
||
|
init.constant_(m.bias.data, 0.0)
|
||
|
elif classname.find('BatchNorm1d') != -1:
|
||
|
init.normal_(m.weight.data, 1.0, 0.02)
|
||
|
init.constant_(m.bias.data, 0.0)
|
||
|
|
||
|
|
||
|
def weights_init_classifier(m):
|
||
|
classname = m.__class__.__name__
|
||
|
if classname.find('Linear') != -1:
|
||
|
init.normal_(m.weight.data, std=0.001)
|
||
|
init.constant_(m.bias.data, 0.0)
|
||
|
|
||
|
|
||
|
# Defines the new fc layer and classification layer
|
||
|
# |--Linear--|--bn--|--relu--|--Linear--|
|
||
|
class ClassBlock(nn.Module):
|
||
|
def __init__(self, input_dim, class_num, droprate, relu=False, bnorm=True, num_bottleneck=512, linear=True,
|
||
|
return_f=False):
|
||
|
super(ClassBlock, self).__init__()
|
||
|
self.return_f = return_f
|
||
|
add_block = []
|
||
|
if linear:
|
||
|
add_block += [nn.Linear(input_dim, num_bottleneck)]
|
||
|
else:
|
||
|
num_bottleneck = input_dim
|
||
|
if bnorm:
|
||
|
add_block += [nn.BatchNorm1d(num_bottleneck)]
|
||
|
if relu:
|
||
|
add_block += [nn.LeakyReLU(0.1)]
|
||
|
if droprate > 0:
|
||
|
add_block += [nn.Dropout(p=droprate)]
|
||
|
add_block = nn.Sequential(*add_block)
|
||
|
add_block.apply(weights_init_kaiming)
|
||
|
|
||
|
classifier = []
|
||
|
classifier += [nn.Linear(num_bottleneck, class_num)]
|
||
|
classifier = nn.Sequential(*classifier)
|
||
|
classifier.apply(weights_init_classifier)
|
||
|
|
||
|
self.add_block = add_block
|
||
|
self.classifier = classifier
|
||
|
|
||
|
def forward(self, x):
|
||
|
x = self.add_block(x)
|
||
|
if self.return_f:
|
||
|
f = x
|
||
|
x = self.classifier(x)
|
||
|
return x, f
|
||
|
else:
|
||
|
x = self.classifier(x)
|
||
|
return x
|
||
|
|
||
|
|
||
|
# Define the ResNet50-based Model
|
||
|
@BACKBONES.register
|
||
|
class ft_net(BackboneBase):
|
||
|
def __init__(self, class_num=751, droprate=0.5, stride=2):
|
||
|
super(ft_net, self).__init__()
|
||
|
model_ft = models.resnet50(pretrained=True)
|
||
|
# avg pooling to global pooling
|
||
|
if stride == 1:
|
||
|
self.model.layer4[0].downsample[0].stride = (1, 1)
|
||
|
self.model.layer4[0].conv2.stride = (1, 1)
|
||
|
model_ft.avgpool = nn.AdaptiveAvgPool2d((1, 1))
|
||
|
self.model = model_ft
|
||
|
self.classifier = ClassBlock(2048, class_num, droprate)
|
||
|
|
||
|
def forward(self, x):
|
||
|
x = self.model.conv1(x)
|
||
|
x = self.model.bn1(x)
|
||
|
x = self.model.relu(x)
|
||
|
x = self.model.maxpool(x)
|
||
|
x = self.model.layer1(x)
|
||
|
x = self.model.layer2(x)
|
||
|
x = self.model.layer3(x)
|
||
|
x = self.model.layer4(x)
|
||
|
x = self.model.avgpool(x)
|
||
|
x = x.view(x.size(0), x.size(1))
|
||
|
x = self.classifier(x)
|
||
|
return x
|
||
|
|
||
|
|
||
|
# Define the DenseNet121-based Model
|
||
|
class ft_net_dense(nn.Module):
|
||
|
|
||
|
def __init__(self, class_num, droprate=0.5):
|
||
|
super().__init__()
|
||
|
model_ft = models.densenet121(pretrained=True)
|
||
|
model_ft.features.avgpool = nn.AdaptiveAvgPool2d((1, 1))
|
||
|
model_ft.fc = nn.Sequential()
|
||
|
self.model = model_ft
|
||
|
# For DenseNet, the feature dim is 1024
|
||
|
self.classifier = ClassBlock(1024, class_num, droprate)
|
||
|
|
||
|
def forward(self, x):
|
||
|
x = self.model.features(x)
|
||
|
x = x.view(x.size(0), x.size(1))
|
||
|
x = self.classifier(x)
|
||
|
return x
|
||
|
|
||
|
|
||
|
# Define the ResNet50-based Model (Middle-Concat)
|
||
|
# In the spirit of "The Devil is in the Middle: Exploiting Mid-level Representations for Cross-Domain Instance Matching." Yu, Qian, et al. arXiv:1711.08106 (2017).
|
||
|
class ft_net_middle(nn.Module):
|
||
|
|
||
|
def __init__(self, class_num, droprate=0.5):
|
||
|
super(ft_net_middle, self).__init__()
|
||
|
model_ft = models.resnet50(pretrained=True)
|
||
|
# avg pooling to global pooling
|
||
|
model_ft.avgpool = nn.AdaptiveAvgPool2d((1, 1))
|
||
|
self.model = model_ft
|
||
|
self.classifier = ClassBlock(2048 + 1024, class_num, droprate)
|
||
|
|
||
|
def forward(self, x):
|
||
|
x = self.model.conv1(x)
|
||
|
x = self.model.bn1(x)
|
||
|
x = self.model.relu(x)
|
||
|
x = self.model.maxpool(x)
|
||
|
x = self.model.layer1(x)
|
||
|
x = self.model.layer2(x)
|
||
|
x = self.model.layer3(x)
|
||
|
# x0 n*1024*1*1
|
||
|
x0 = self.model.avgpool(x)
|
||
|
x = self.model.layer4(x)
|
||
|
# x1 n*2048*1*1
|
||
|
x1 = self.model.avgpool(x)
|
||
|
x = torch.cat((x0, x1), 1)
|
||
|
x = x.view(x.size(0), x.size(1))
|
||
|
x = self.classifier(x)
|
||
|
return x
|
||
|
|
||
|
# Part Model proposed in Yifan Sun etal. (2018)
|
||
|
class PCB(nn.Module):
|
||
|
def __init__(self, class_num):
|
||
|
super(PCB, self).__init__()
|
||
|
|
||
|
self.part = 6 # We cut the pool5 to 6 parts
|
||
|
model_ft = models.resnet50(pretrained=True)
|
||
|
self.model = model_ft
|
||
|
self.avgpool = nn.AdaptiveAvgPool2d((self.part, 1))
|
||
|
self.dropout = nn.Dropout(p=0.5)
|
||
|
# remove the final downsample
|
||
|
self.model.layer4[0].downsample[0].stride = (1, 1)
|
||
|
self.model.layer4[0].conv2.stride = (1, 1)
|
||
|
# define 6 classifiers
|
||
|
for i in range(self.part):
|
||
|
name = 'classifier' + str(i)
|
||
|
setattr(self, name, ClassBlock(2048, class_num, droprate=0.5, relu=False, bnorm=True, num_bottleneck=256))
|
||
|
|
||
|
def forward(self, x):
|
||
|
x = self.model.conv1(x)
|
||
|
x = self.model.bn1(x)
|
||
|
x = self.model.relu(x)
|
||
|
x = self.model.maxpool(x)
|
||
|
|
||
|
x = self.model.layer1(x)
|
||
|
x = self.model.layer2(x)
|
||
|
x = self.model.layer3(x)
|
||
|
x = self.model.layer4(x)
|
||
|
x = self.avgpool(x)
|
||
|
x = self.dropout(x)
|
||
|
part = {}
|
||
|
predict = {}
|
||
|
# get six part feature batchsize*2048*6
|
||
|
for i in range(self.part):
|
||
|
part[i] = torch.squeeze(x[:, :, i])
|
||
|
name = 'classifier' + str(i)
|
||
|
c = getattr(self, name)
|
||
|
predict[i] = c(part[i])
|
||
|
|
||
|
# sum prediction
|
||
|
# y = predict[0]
|
||
|
# for i in range(self.part-1):
|
||
|
# y += predict[i+1]
|
||
|
y = []
|
||
|
for i in range(self.part):
|
||
|
y.append(predict[i])
|
||
|
return y
|
||
|
|
||
|
|
||
|
class PCB_test(nn.Module):
|
||
|
def __init__(self, model):
|
||
|
super(PCB_test, self).__init__()
|
||
|
self.part = 6
|
||
|
self.model = model.model
|
||
|
self.avgpool = nn.AdaptiveAvgPool2d((self.part, 1))
|
||
|
# remove the final downsample
|
||
|
self.model.layer4[0].downsample[0].stride = (1, 1)
|
||
|
self.model.layer4[0].conv2.stride = (1, 1)
|
||
|
|
||
|
def forward(self, x):
|
||
|
x = self.model.conv1(x)
|
||
|
x = self.model.bn1(x)
|
||
|
x = self.model.relu(x)
|
||
|
x = self.model.maxpool(x)
|
||
|
|
||
|
x = self.model.layer1(x)
|
||
|
x = self.model.layer2(x)
|
||
|
x = self.model.layer3(x)
|
||
|
x = self.model.layer4(x)
|
||
|
x = self.avgpool(x)
|
||
|
y = x.view(x.size(0), x.size(1), x.size(2))
|
||
|
return y
|