mmselfsup/openselfsup/models/relative_loc.py

import torch
import torch.nn as nn

from openselfsup.utils import print_log

from . import builder
from .registry import MODELS


@MODELS.register_module
class RelativeLoc(nn.Module):
    """Relative patch location.

    Implementation of "Unsupervised Visual Representation Learning
    by Context Prediction (https://arxiv.org/abs/1505.05192)".

    Args:
        backbone (nn.Module): Module of backbone ConvNet.
        neck (nn.Module): Module of deep features to compact feature vectors.
        head (nn.Module): Module of loss functions.
        pretrained (str, optional): Path to pre-trained weights. Default: None.
    """

    def __init__(self, backbone, neck=None, head=None, pretrained=None):
        super(RelativeLoc, self).__init__()
        self.backbone = builder.build_backbone(backbone)
        if neck is not None:
            self.neck = builder.build_neck(neck)
        if head is not None:
            self.head = builder.build_head(head)
        self.init_weights(pretrained=pretrained)

    def init_weights(self, pretrained=None):
        """Initialize the weights of model.

        Args:
            pretrained (str, optional): Path to pre-trained weights.
                Default: None.
        """
        if pretrained is not None:
            print_log('load model from: {}'.format(pretrained), logger='root')
        self.backbone.init_weights(pretrained=pretrained)
        self.neck.init_weights(init_linear='normal')
        self.head.init_weights(init_linear='normal', std=0.005)

    def forward_backbone(self, img):
        """Forward backbone.

        Args:
            img (Tensor): Input images of shape (N, C, H, W).
                Typically these should be mean centered and std scaled.

        Returns:
            tuple[Tensor]: backbone outputs.
        """
        x = self.backbone(img)
        return x

    def forward_train(self, img, patch_label, **kwargs):
        """Forward computation during training.

        Args:
            img (Tensor): Input images of shape (N, C, H, W).
                Typically these should be mean centered and std scaled.
            patch_label (Tensor): Labels for the relative patch locations.
            kwargs: Any keyword arguments to be used to forward.

        Returns:
            dict[str, Tensor]: A dictionary of loss components.
        """
        img1, img2 = torch.chunk(img, 2, dim=1)
        x1 = self.forward_backbone(img1)  # tuple
        x2 = self.forward_backbone(img2)  # tuple
        x = (torch.cat((x1[0], x2[0]), dim=1),)
        x = self.neck(x)
        outs = self.head(x)
        loss_inputs = (outs, patch_label)
        losses = self.head.loss(*loss_inputs)
        return losses

    def forward_test(self, img, **kwargs):
        img1, img2 = torch.chunk(img, 2, dim=1)
        x1 = self.forward_backbone(img1)  # tuple
        x2 = self.forward_backbone(img2)  # tuple
        x = (torch.cat((x1[0], x2[0]), dim=1),)
        x = self.neck(x)
        outs = self.head(x)
        keys = ['head{}'.format(i) for i in range(len(outs))]
        out_tensors = [out.cpu() for out in outs]
        return dict(zip(keys, out_tensors))

    def forward(self, img, patch_label=None, mode='train', **kwargs):
        if mode != "extract" and img.dim() == 5:  # Nx8x(2C)xHxW
            assert patch_label.dim() == 2  # Nx8
            img = img.view(
                img.size(0) * img.size(1), img.size(2), img.size(3),
                img.size(4))  # (8N)x(2C)xHxW
            patch_label = torch.flatten(patch_label)  # (8N)
        if mode == 'train':
            return self.forward_train(img, patch_label, **kwargs)
        elif mode == 'test':
            return self.forward_test(img, **kwargs)
        elif mode == 'extract':
            return self.forward_backbone(img)
        else:
            raise Exception("No such mode: {}".format(mode))
relative patch location 2020-07-08 22:30:48 +08:00			`import torch`
			`import torch.nn as nn`

			`from openselfsup.utils import print_log`

			`from . import builder`
			`from .registry import MODELS`


			`@MODELS.register_module`
			`class RelativeLoc(nn.Module):`
add docstring 2020-09-02 18:49:39 +08:00			`"""Relative patch location.`

			`Implementation of "Unsupervised Visual Representation Learning`
			`by Context Prediction (https://arxiv.org/abs/1505.05192)".`

			`Args:`
			`backbone (nn.Module): Module of backbone ConvNet.`
			`neck (nn.Module): Module of deep features to compact feature vectors.`
			`head (nn.Module): Module of loss functions.`
			`pretrained (str, optional): Path to pre-trained weights. Default: None.`
			`"""`
relative patch location 2020-07-08 22:30:48 +08:00
relative patch location 2020-07-09 15:34:22 +08:00			`def __init__(self, backbone, neck=None, head=None, pretrained=None):`
relative patch location 2020-07-08 22:30:48 +08:00			`super(RelativeLoc, self).__init__()`
			`self.backbone = builder.build_backbone(backbone)`
relative patch location 2020-07-09 15:34:22 +08:00			`if neck is not None:`
			`self.neck = builder.build_neck(neck)`
relative patch location 2020-07-08 22:30:48 +08:00			`if head is not None:`
			`self.head = builder.build_head(head)`
			`self.init_weights(pretrained=pretrained)`

			`def init_weights(self, pretrained=None):`
add docstring 2020-09-02 18:49:39 +08:00			`"""Initialize the weights of model.`

			`Args:`
			`pretrained (str, optional): Path to pre-trained weights.`
			`Default: None.`
			`"""`
relative patch location 2020-07-08 22:30:48 +08:00			`if pretrained is not None:`
			`print_log('load model from: {}'.format(pretrained), logger='root')`
			`self.backbone.init_weights(pretrained=pretrained)`
relative patch location 2020-07-09 15:34:22 +08:00			`self.neck.init_weights(init_linear='normal')`
			`self.head.init_weights(init_linear='normal', std=0.005)`
relative patch location 2020-07-08 22:30:48 +08:00
			`def forward_backbone(self, img):`
add docstring 2020-09-02 18:49:39 +08:00			`"""Forward backbone.`

			`Args:`
			`img (Tensor): Input images of shape (N, C, H, W).`
			`Typically these should be mean centered and std scaled.`
relative patch location 2020-07-08 22:30:48 +08:00
			`Returns:`
add docstring 2020-09-02 18:49:39 +08:00			`tuple[Tensor]: backbone outputs.`
relative patch location 2020-07-08 22:30:48 +08:00			`"""`
			`x = self.backbone(img)`
			`return x`

			`def forward_train(self, img, patch_label, **kwargs):`
add docstring 2020-09-02 18:49:39 +08:00			`"""Forward computation during training.`

			`Args:`
			`img (Tensor): Input images of shape (N, C, H, W).`
			`Typically these should be mean centered and std scaled.`
			`patch_label (Tensor): Labels for the relative patch locations.`
			`kwargs: Any keyword arguments to be used to forward.`

			`Returns:`
			`dict[str, Tensor]: A dictionary of loss components.`
			`"""`
relative patch location 2020-07-08 22:30:48 +08:00			`img1, img2 = torch.chunk(img, 2, dim=1)`
			`x1 = self.forward_backbone(img1) # tuple`
			`x2 = self.forward_backbone(img2) # tuple`
relative patch location 2020-07-09 15:34:22 +08:00			`x = (torch.cat((x1[0], x2[0]), dim=1),)`
			`x = self.neck(x)`
relative patch location 2020-07-08 22:30:48 +08:00			`outs = self.head(x)`
			`loss_inputs = (outs, patch_label)`
			`losses = self.head.loss(*loss_inputs)`
			`return losses`

			`def forward_test(self, img, **kwargs):`
			`img1, img2 = torch.chunk(img, 2, dim=1)`
			`x1 = self.forward_backbone(img1) # tuple`
			`x2 = self.forward_backbone(img2) # tuple`
relative patch location 2020-07-09 15:34:22 +08:00			`x = (torch.cat((x1[0], x2[0]), dim=1),)`
			`x = self.neck(x)`
relative patch location 2020-07-08 22:30:48 +08:00			`outs = self.head(x)`
			`keys = ['head{}'.format(i) for i in range(len(outs))]`
relative patch location 2020-07-09 15:34:22 +08:00			`out_tensors = [out.cpu() for out in outs]`
relative patch location 2020-07-08 22:30:48 +08:00			`return dict(zip(keys, out_tensors))`

			`def forward(self, img, patch_label=None, mode='train', **kwargs):`
add docstring 2020-09-02 18:49:39 +08:00			`if mode != "extract" and img.dim() == 5: # Nx8x(2C)xHxW`
			`assert patch_label.dim() == 2 # Nx8`
relative patch location 2020-07-08 22:30:48 +08:00			`img = img.view(`
			`img.size(0) * img.size(1), img.size(2), img.size(3),`
add docstring 2020-09-02 18:49:39 +08:00			`img.size(4)) # (8N)x(2C)xHxW`
			`patch_label = torch.flatten(patch_label) # (8N)`
relative patch location 2020-07-08 22:30:48 +08:00			`if mode == 'train':`
			`return self.forward_train(img, patch_label, **kwargs)`
			`elif mode == 'test':`
			`return self.forward_test(img, **kwargs)`
			`elif mode == 'extract':`
			`return self.forward_backbone(img)`
			`else:`
			`raise Exception("No such mode: {}".format(mode))`