2020-07-08 22:30:48 +08:00
|
|
|
import torch
|
|
|
|
import torch.nn as nn
|
|
|
|
|
|
|
|
from openselfsup.utils import print_log
|
|
|
|
|
|
|
|
from . import builder
|
|
|
|
from .registry import MODELS
|
|
|
|
|
|
|
|
|
|
|
|
@MODELS.register_module
|
|
|
|
class RelativeLoc(nn.Module):
|
2020-09-02 18:49:39 +08:00
|
|
|
"""Relative patch location.
|
|
|
|
|
|
|
|
Implementation of "Unsupervised Visual Representation Learning
|
|
|
|
by Context Prediction (https://arxiv.org/abs/1505.05192)".
|
|
|
|
|
|
|
|
Args:
|
|
|
|
backbone (nn.Module): Module of backbone ConvNet.
|
|
|
|
neck (nn.Module): Module of deep features to compact feature vectors.
|
|
|
|
head (nn.Module): Module of loss functions.
|
|
|
|
pretrained (str, optional): Path to pre-trained weights. Default: None.
|
|
|
|
"""
|
2020-07-08 22:30:48 +08:00
|
|
|
|
2020-07-09 15:34:22 +08:00
|
|
|
def __init__(self, backbone, neck=None, head=None, pretrained=None):
|
2020-07-08 22:30:48 +08:00
|
|
|
super(RelativeLoc, self).__init__()
|
|
|
|
self.backbone = builder.build_backbone(backbone)
|
2020-07-09 15:34:22 +08:00
|
|
|
if neck is not None:
|
|
|
|
self.neck = builder.build_neck(neck)
|
2020-07-08 22:30:48 +08:00
|
|
|
if head is not None:
|
|
|
|
self.head = builder.build_head(head)
|
|
|
|
self.init_weights(pretrained=pretrained)
|
|
|
|
|
|
|
|
def init_weights(self, pretrained=None):
|
2020-09-02 18:49:39 +08:00
|
|
|
"""Initialize the weights of model.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
pretrained (str, optional): Path to pre-trained weights.
|
|
|
|
Default: None.
|
|
|
|
"""
|
2020-07-08 22:30:48 +08:00
|
|
|
if pretrained is not None:
|
|
|
|
print_log('load model from: {}'.format(pretrained), logger='root')
|
|
|
|
self.backbone.init_weights(pretrained=pretrained)
|
2020-07-09 15:34:22 +08:00
|
|
|
self.neck.init_weights(init_linear='normal')
|
|
|
|
self.head.init_weights(init_linear='normal', std=0.005)
|
2020-07-08 22:30:48 +08:00
|
|
|
|
|
|
|
def forward_backbone(self, img):
|
2020-09-02 18:49:39 +08:00
|
|
|
"""Forward backbone.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
img (Tensor): Input images of shape (N, C, H, W).
|
|
|
|
Typically these should be mean centered and std scaled.
|
2020-07-08 22:30:48 +08:00
|
|
|
|
|
|
|
Returns:
|
2020-09-02 18:49:39 +08:00
|
|
|
tuple[Tensor]: backbone outputs.
|
2020-07-08 22:30:48 +08:00
|
|
|
"""
|
|
|
|
x = self.backbone(img)
|
|
|
|
return x
|
|
|
|
|
|
|
|
def forward_train(self, img, patch_label, **kwargs):
|
2020-09-02 18:49:39 +08:00
|
|
|
"""Forward computation during training.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
img (Tensor): Input images of shape (N, C, H, W).
|
|
|
|
Typically these should be mean centered and std scaled.
|
|
|
|
patch_label (Tensor): Labels for the relative patch locations.
|
|
|
|
kwargs: Any keyword arguments to be used to forward.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
dict[str, Tensor]: A dictionary of loss components.
|
|
|
|
"""
|
2020-07-08 22:30:48 +08:00
|
|
|
img1, img2 = torch.chunk(img, 2, dim=1)
|
|
|
|
x1 = self.forward_backbone(img1) # tuple
|
|
|
|
x2 = self.forward_backbone(img2) # tuple
|
2020-07-09 15:34:22 +08:00
|
|
|
x = (torch.cat((x1[0], x2[0]), dim=1),)
|
|
|
|
x = self.neck(x)
|
2020-07-08 22:30:48 +08:00
|
|
|
outs = self.head(x)
|
|
|
|
loss_inputs = (outs, patch_label)
|
|
|
|
losses = self.head.loss(*loss_inputs)
|
|
|
|
return losses
|
|
|
|
|
|
|
|
def forward_test(self, img, **kwargs):
|
|
|
|
img1, img2 = torch.chunk(img, 2, dim=1)
|
|
|
|
x1 = self.forward_backbone(img1) # tuple
|
|
|
|
x2 = self.forward_backbone(img2) # tuple
|
2020-07-09 15:34:22 +08:00
|
|
|
x = (torch.cat((x1[0], x2[0]), dim=1),)
|
|
|
|
x = self.neck(x)
|
2020-07-08 22:30:48 +08:00
|
|
|
outs = self.head(x)
|
|
|
|
keys = ['head{}'.format(i) for i in range(len(outs))]
|
2020-07-09 15:34:22 +08:00
|
|
|
out_tensors = [out.cpu() for out in outs]
|
2020-07-08 22:30:48 +08:00
|
|
|
return dict(zip(keys, out_tensors))
|
|
|
|
|
|
|
|
def forward(self, img, patch_label=None, mode='train', **kwargs):
|
2020-09-02 18:49:39 +08:00
|
|
|
if mode != "extract" and img.dim() == 5: # Nx8x(2C)xHxW
|
|
|
|
assert patch_label.dim() == 2 # Nx8
|
2020-07-08 22:30:48 +08:00
|
|
|
img = img.view(
|
|
|
|
img.size(0) * img.size(1), img.size(2), img.size(3),
|
2020-09-02 18:49:39 +08:00
|
|
|
img.size(4)) # (8N)x(2C)xHxW
|
|
|
|
patch_label = torch.flatten(patch_label) # (8N)
|
2020-07-08 22:30:48 +08:00
|
|
|
if mode == 'train':
|
|
|
|
return self.forward_train(img, patch_label, **kwargs)
|
|
|
|
elif mode == 'test':
|
|
|
|
return self.forward_test(img, **kwargs)
|
|
|
|
elif mode == 'extract':
|
|
|
|
return self.forward_backbone(img)
|
|
|
|
else:
|
|
|
|
raise Exception("No such mode: {}".format(mode))
|