mmclassification/mmcls/models/heads/conformer_head.py

# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Sequence, Tuple

import torch
import torch.nn as nn

from mmcls.evaluation.metrics import Accuracy
from mmcls.registry import MODELS
from mmcls.structures import ClsDataSample
from .cls_head import ClsHead


@MODELS.register_module()
class ConformerHead(ClsHead):
    """Linear classifier head.

    Args:
        num_classes (int): Number of categories excluding the background
            category.
        in_channels (Sequence[int]): Number of channels in the input
            feature map.
        init_cfg (dict | optional): The extra init config of layers.
            Defaults to use ``dict(type='Normal', layer='Linear', std=0.01)``.
    """

    def __init__(
            self,
            num_classes: int,
            in_channels: Sequence[int],  # [conv_dim, trans_dim]
            init_cfg: dict = dict(type='TruncNormal', layer='Linear', std=.02),
            **kwargs):
        super(ConformerHead, self).__init__(init_cfg=init_cfg, **kwargs)

        self.in_channels = in_channels
        self.num_classes = num_classes
        self.init_cfg = init_cfg

        if self.num_classes <= 0:
            raise ValueError(
                f'num_classes={num_classes} must be a positive integer')

        self.conv_cls_head = nn.Linear(self.in_channels[0], num_classes)
        self.trans_cls_head = nn.Linear(self.in_channels[1], num_classes)

    def pre_logits(self, feats: Tuple[List[torch.Tensor]]) -> torch.Tensor:
        """The process before the final classification head.

        The input ``feats`` is a tuple of tensor, and each tensor is the
        feature of a backbone stage. In ``ConformerHead``, we just obtain the
        feature of the last stage.
        """
        # The ConformerHead doesn't have other module,
        # just return after unpacking.
        return feats[-1]

    def forward(self, feats: Tuple[List[torch.Tensor]]) -> Tuple[torch.Tensor]:
        """The forward process."""
        x = self.pre_logits(feats)
        # There are two outputs in the Conformer model
        assert len(x) == 2

        conv_cls_score = self.conv_cls_head(x[0])
        tran_cls_score = self.trans_cls_head(x[1])

        return conv_cls_score, tran_cls_score

    def predict(
            self,
            feats: Tuple[List[torch.Tensor]],
            data_samples: List[ClsDataSample] = None) -> List[ClsDataSample]:
        """Inference without augmentation.

        Args:
            feats (tuple[Tensor]): The features extracted from the backbone.
                Multiple stage inputs are acceptable but only the last stage
                will be used to classify. The shape of every item should be
                ``(num_samples, num_classes)``.
            data_samples (List[ClsDataSample], optional): The annotation
                data of every samples. If not None, set ``pred_label`` of
                the input data samples. Defaults to None.

        Returns:
            List[ClsDataSample]: A list of data samples which contains the
            predicted results.
        """
        # The part can be traced by torch.fx
        conv_cls_score, tran_cls_score = self(feats)
        cls_score = conv_cls_score + tran_cls_score

        # The part can not be traced by torch.fx
        predictions = self._get_predictions(cls_score, data_samples)
        return predictions

    def _get_loss(self, cls_score: Tuple[torch.Tensor],
                  data_samples: List[ClsDataSample], **kwargs) -> dict:
        """Unpack data samples and compute loss."""
        # Unpack data samples and pack targets
        if 'score' in data_samples[0].gt_label:
            # Batch augmentation may convert labels to one-hot format scores.
            target = torch.stack([i.gt_label.score for i in data_samples])
        else:
            target = torch.cat([i.gt_label.label for i in data_samples])

        # compute loss
        losses = dict()
        loss = sum([
            self.loss_module(
                score, target, avg_factor=score.size(0), **kwargs)
            for score in cls_score
        ])
        losses['loss'] = loss

        # compute accuracy
        if self.cal_acc:
            assert target.ndim == 1, 'If you enable batch augmentation ' \
                'like mixup during training, `cal_acc` is pointless.'
            acc = Accuracy.calculate(
                cls_score[0] + cls_score[1], target, topk=self.topk)
            losses.update(
                {f'accuracy_top-{k}': a
                 for k, a in zip(self.topk, acc)})

        return losses
[Feature] Implement the conformer backbone. (#494) * implement the conformer * format code style * format code style * reuse the TransformerEncoderLayer in the vision_transformer.py * Modify variable name * delete unused params * Remove warning info in Conformer head since it already exists in Conformer. * Rename some variables * Add unit tests * Use `getattr` instead of `get_submodule`. * Remove some useless layers * Refactor conformer and add configs * Update configs and add metafile. * Fix unit tests * Update README Co-authored-by: mzr1996 <mzr1996@163.com> 2021-12-07 14:00:17 +08:00			`# Copyright (c) OpenMMLab. All rights reserved.`
[Refactor] refactor ViTHead, DeiTHead, ConformerHead, StackedHead 2022-06-21 09:01:19 +00:00			`from typing import List, Sequence, Tuple`

			`import torch`
[Feature] Implement the conformer backbone. (#494) * implement the conformer * format code style * format code style * reuse the TransformerEncoderLayer in the vision_transformer.py * Modify variable name * delete unused params * Remove warning info in Conformer head since it already exists in Conformer. * Rename some variables * Add unit tests * Use `getattr` instead of `get_submodule`. * Remove some useless layers * Refactor conformer and add configs * Update configs and add metafile. * Fix unit tests * Update README Co-authored-by: mzr1996 <mzr1996@163.com> 2021-12-07 14:00:17 +08:00			`import torch.nn as nn`

refactor evaluation 2022-07-14 12:58:06 +08:00			`from mmcls.evaluation.metrics import Accuracy`
[Refactor] Inherit all registries from MMEngine and use inherited registries. 2022-05-10 09:45:10 +00:00			`from mmcls.registry import MODELS`
[Improve] Rename `mmcls.data` to `mmcls.structures`. (#941) 2022-07-29 14:18:13 +08:00			`from mmcls.structures import ClsDataSample`
[Feature] Implement the conformer backbone. (#494) * implement the conformer * format code style * format code style * reuse the TransformerEncoderLayer in the vision_transformer.py * Modify variable name * delete unused params * Remove warning info in Conformer head since it already exists in Conformer. * Rename some variables * Add unit tests * Use `getattr` instead of `get_submodule`. * Remove some useless layers * Refactor conformer and add configs * Update configs and add metafile. * Fix unit tests * Update README Co-authored-by: mzr1996 <mzr1996@163.com> 2021-12-07 14:00:17 +08:00			`from .cls_head import ClsHead`


[Refactor] Inherit all registries from MMEngine and use inherited registries. 2022-05-10 09:45:10 +00:00			`@MODELS.register_module()`
[Feature] Implement the conformer backbone. (#494) * implement the conformer * format code style * format code style * reuse the TransformerEncoderLayer in the vision_transformer.py * Modify variable name * delete unused params * Remove warning info in Conformer head since it already exists in Conformer. * Rename some variables * Add unit tests * Use `getattr` instead of `get_submodule`. * Remove some useless layers * Refactor conformer and add configs * Update configs and add metafile. * Fix unit tests * Update README Co-authored-by: mzr1996 <mzr1996@163.com> 2021-12-07 14:00:17 +08:00			`class ConformerHead(ClsHead):`
			`"""Linear classifier head.`

			`Args:`
			`num_classes (int): Number of categories excluding the background`
			`category.`
[Refactor] refactor ViTHead, DeiTHead, ConformerHead, StackedHead 2022-06-21 09:01:19 +00:00			`in_channels (Sequence[int]): Number of channels in the input`
			`feature map.`
[Feature] Implement the conformer backbone. (#494) * implement the conformer * format code style * format code style * reuse the TransformerEncoderLayer in the vision_transformer.py * Modify variable name * delete unused params * Remove warning info in Conformer head since it already exists in Conformer. * Rename some variables * Add unit tests * Use `getattr` instead of `get_submodule`. * Remove some useless layers * Refactor conformer and add configs * Update configs and add metafile. * Fix unit tests * Update README Co-authored-by: mzr1996 <mzr1996@163.com> 2021-12-07 14:00:17 +08:00			`init_cfg (dict \| optional): The extra init config of layers.`
[Enhance] Enhance feature extraction function. (#593) * Fix MobileNet V3 configs * Refactor to support more powerful feature extraction. * Add unit tests * Fix unit test * Imporve according to comments * Update checkpoints path * Fix unit tests * Add docstring of `simple_test` * Add docstring of `extract_feat` * Update model zoo 2021-12-17 15:55:02 +08:00			Defaults to use ``dict(type='Normal', layer='Linear', std=0.01)``.
[Feature] Implement the conformer backbone. (#494) * implement the conformer * format code style * format code style * reuse the TransformerEncoderLayer in the vision_transformer.py * Modify variable name * delete unused params * Remove warning info in Conformer head since it already exists in Conformer. * Rename some variables * Add unit tests * Use `getattr` instead of `get_submodule`. * Remove some useless layers * Refactor conformer and add configs * Update configs and add metafile. * Fix unit tests * Update README Co-authored-by: mzr1996 <mzr1996@163.com> 2021-12-07 14:00:17 +08:00			`"""`

			`def __init__(`
			`self,`
[Refactor] refactor ViTHead, DeiTHead, ConformerHead, StackedHead 2022-06-21 09:01:19 +00:00			`num_classes: int,`
			`in_channels: Sequence[int], # [conv_dim, trans_dim]`
			`init_cfg: dict = dict(type='TruncNormal', layer='Linear', std=.02),`
[Feature] Implement the conformer backbone. (#494) * implement the conformer * format code style * format code style * reuse the TransformerEncoderLayer in the vision_transformer.py * Modify variable name * delete unused params * Remove warning info in Conformer head since it already exists in Conformer. * Rename some variables * Add unit tests * Use `getattr` instead of `get_submodule`. * Remove some useless layers * Refactor conformer and add configs * Update configs and add metafile. * Fix unit tests * Update README Co-authored-by: mzr1996 <mzr1996@163.com> 2021-12-07 14:00:17 +08:00			`**kwargs):`
[Refactor] refactor ViTHead, DeiTHead, ConformerHead, StackedHead 2022-06-21 09:01:19 +00:00			`super(ConformerHead, self).__init__(init_cfg=init_cfg, **kwargs)`
[Feature] Implement the conformer backbone. (#494) * implement the conformer * format code style * format code style * reuse the TransformerEncoderLayer in the vision_transformer.py * Modify variable name * delete unused params * Remove warning info in Conformer head since it already exists in Conformer. * Rename some variables * Add unit tests * Use `getattr` instead of `get_submodule`. * Remove some useless layers * Refactor conformer and add configs * Update configs and add metafile. * Fix unit tests * Update README Co-authored-by: mzr1996 <mzr1996@163.com> 2021-12-07 14:00:17 +08:00
			`self.in_channels = in_channels`
			`self.num_classes = num_classes`
			`self.init_cfg = init_cfg`

			`if self.num_classes <= 0:`
			`raise ValueError(`
			`f'num_classes={num_classes} must be a positive integer')`

			`self.conv_cls_head = nn.Linear(self.in_channels[0], num_classes)`
			`self.trans_cls_head = nn.Linear(self.in_channels[1], num_classes)`

[Refactor] refactor ViTHead, DeiTHead, ConformerHead, StackedHead 2022-06-21 09:01:19 +00:00			`def pre_logits(self, feats: Tuple[List[torch.Tensor]]) -> torch.Tensor:`
			`"""The process before the final classification head.`
[Enhance] Enhance feature extraction function. (#593) * Fix MobileNet V3 configs * Refactor to support more powerful feature extraction. * Add unit tests * Fix unit test * Imporve according to comments * Update checkpoints path * Fix unit tests * Add docstring of `simple_test` * Add docstring of `extract_feat` * Update model zoo 2021-12-17 15:55:02 +08:00
[Refactor] refactor ViTHead, DeiTHead, ConformerHead, StackedHead 2022-06-21 09:01:19 +00:00			The input ``feats`` is a tuple of tensor, and each tensor is the
			feature of a backbone stage. In ``ConformerHead``, we just obtain the
			`feature of the last stage.`
[Enhance] Enhance feature extraction function. (#593) * Fix MobileNet V3 configs * Refactor to support more powerful feature extraction. * Add unit tests * Fix unit test * Imporve according to comments * Update checkpoints path * Fix unit tests * Add docstring of `simple_test` * Add docstring of `extract_feat` * Update model zoo 2021-12-17 15:55:02 +08:00			`"""`
[Refactor] refactor ViTHead, DeiTHead, ConformerHead, StackedHead 2022-06-21 09:01:19 +00:00			`# The ConformerHead doesn't have other module,`
			`# just return after unpacking.`
			`return feats[-1]`

			`def forward(self, feats: Tuple[List[torch.Tensor]]) -> Tuple[torch.Tensor]:`
			`"""The forward process."""`
			`x = self.pre_logits(feats)`
[Enhance] Enhance feature extraction function. (#593) * Fix MobileNet V3 configs * Refactor to support more powerful feature extraction. * Add unit tests * Fix unit test * Imporve according to comments * Update checkpoints path * Fix unit tests * Add docstring of `simple_test` * Add docstring of `extract_feat` * Update model zoo 2021-12-17 15:55:02 +08:00			`# There are two outputs in the Conformer model`
			`assert len(x) == 2`
[Feature] Implement the conformer backbone. (#494) * implement the conformer * format code style * format code style * reuse the TransformerEncoderLayer in the vision_transformer.py * Modify variable name * delete unused params * Remove warning info in Conformer head since it already exists in Conformer. * Rename some variables * Add unit tests * Use `getattr` instead of `get_submodule`. * Remove some useless layers * Refactor conformer and add configs * Update configs and add metafile. * Fix unit tests * Update README Co-authored-by: mzr1996 <mzr1996@163.com> 2021-12-07 14:00:17 +08:00
			`conv_cls_score = self.conv_cls_head(x[0])`
			`tran_cls_score = self.trans_cls_head(x[1])`

[Refactor] refactor ViTHead, DeiTHead, ConformerHead, StackedHead 2022-06-21 09:01:19 +00:00			`return conv_cls_score, tran_cls_score`
[Feature] Implement the conformer backbone. (#494) * implement the conformer * format code style * format code style * reuse the TransformerEncoderLayer in the vision_transformer.py * Modify variable name * delete unused params * Remove warning info in Conformer head since it already exists in Conformer. * Rename some variables * Add unit tests * Use `getattr` instead of `get_submodule`. * Remove some useless layers * Refactor conformer and add configs * Update configs and add metafile. * Fix unit tests * Update README Co-authored-by: mzr1996 <mzr1996@163.com> 2021-12-07 14:00:17 +08:00
[Refactor] refactor ViTHead, DeiTHead, ConformerHead, StackedHead 2022-06-21 09:01:19 +00:00			`def predict(`
			`self,`
			`feats: Tuple[List[torch.Tensor]],`
			`data_samples: List[ClsDataSample] = None) -> List[ClsDataSample]:`
			`"""Inference without augmentation.`
[Feature] Implement the conformer backbone. (#494) * implement the conformer * format code style * format code style * reuse the TransformerEncoderLayer in the vision_transformer.py * Modify variable name * delete unused params * Remove warning info in Conformer head since it already exists in Conformer. * Rename some variables * Add unit tests * Use `getattr` instead of `get_submodule`. * Remove some useless layers * Refactor conformer and add configs * Update configs and add metafile. * Fix unit tests * Update README Co-authored-by: mzr1996 <mzr1996@163.com> 2021-12-07 14:00:17 +08:00
[Refactor] refactor ViTHead, DeiTHead, ConformerHead, StackedHead 2022-06-21 09:01:19 +00:00			`Args:`
			`feats (tuple[Tensor]): The features extracted from the backbone.`
			`Multiple stage inputs are acceptable but only the last stage`
			`will be used to classify. The shape of every item should be`
			``(num_samples, num_classes)``.
			`data_samples (List[ClsDataSample], optional): The annotation`
			data of every samples. If not None, set ``pred_label`` of
			`the input data samples. Defaults to None.`
[Feature] Implement the conformer backbone. (#494) * implement the conformer * format code style * format code style * reuse the TransformerEncoderLayer in the vision_transformer.py * Modify variable name * delete unused params * Remove warning info in Conformer head since it already exists in Conformer. * Rename some variables * Add unit tests * Use `getattr` instead of `get_submodule`. * Remove some useless layers * Refactor conformer and add configs * Update configs and add metafile. * Fix unit tests * Update README Co-authored-by: mzr1996 <mzr1996@163.com> 2021-12-07 14:00:17 +08:00
[Refactor] refactor ViTHead, DeiTHead, ConformerHead, StackedHead 2022-06-21 09:01:19 +00:00			`Returns:`
			`List[ClsDataSample]: A list of data samples which contains the`
			`predicted results.`
			`"""`
			`# The part can be traced by torch.fx`
			`conv_cls_score, tran_cls_score = self(feats)`
			`cls_score = conv_cls_score + tran_cls_score`

			`# The part can not be traced by torch.fx`
			`predictions = self._get_predictions(cls_score, data_samples)`
			`return predictions`

			`def _get_loss(self, cls_score: Tuple[torch.Tensor],`
			`data_samples: List[ClsDataSample], **kwargs) -> dict:`
			`"""Unpack data samples and compute loss."""`
			`# Unpack data samples and pack targets`
			`if 'score' in data_samples[0].gt_label:`
			`# Batch augmentation may convert labels to one-hot format scores.`
			`target = torch.stack([i.gt_label.score for i in data_samples])`
			`else:`
[CI] Add Circle CI for mmcls 1.x. (#970) * [CI] Add Circle CI for mmcls 1.x * Fix circle-ci config * Use `torch.cat` instead of `torch.hstack` to compat with PyTorch 1.6 * Compat the behavior of LongTensor in PyTorch 1.6 * Avoid random behavior in the unit test of Lighting * Fix cuda ci * Remove github workflow temporarily. 2022-08-22 15:02:08 +08:00			`target = torch.cat([i.gt_label.label for i in data_samples])`
[Feature] Implement the conformer backbone. (#494) * implement the conformer * format code style * format code style * reuse the TransformerEncoderLayer in the vision_transformer.py * Modify variable name * delete unused params * Remove warning info in Conformer head since it already exists in Conformer. * Rename some variables * Add unit tests * Use `getattr` instead of `get_submodule`. * Remove some useless layers * Refactor conformer and add configs * Update configs and add metafile. * Fix unit tests * Update README Co-authored-by: mzr1996 <mzr1996@163.com> 2021-12-07 14:00:17 +08:00
			`# compute loss`
[Refactor] refactor ViTHead, DeiTHead, ConformerHead, StackedHead 2022-06-21 09:01:19 +00:00			`losses = dict()`
[Feature] Implement the conformer backbone. (#494) * implement the conformer * format code style * format code style * reuse the TransformerEncoderLayer in the vision_transformer.py * Modify variable name * delete unused params * Remove warning info in Conformer head since it already exists in Conformer. * Rename some variables * Add unit tests * Use `getattr` instead of `get_submodule`. * Remove some useless layers * Refactor conformer and add configs * Update configs and add metafile. * Fix unit tests * Update README Co-authored-by: mzr1996 <mzr1996@163.com> 2021-12-07 14:00:17 +08:00			`loss = sum([`
[Refactor] refactor ViTHead, DeiTHead, ConformerHead, StackedHead 2022-06-21 09:01:19 +00:00			`self.loss_module(`
			`score, target, avg_factor=score.size(0), **kwargs)`
			`for score in cls_score`
[Feature] Implement the conformer backbone. (#494) * implement the conformer * format code style * format code style * reuse the TransformerEncoderLayer in the vision_transformer.py * Modify variable name * delete unused params * Remove warning info in Conformer head since it already exists in Conformer. * Rename some variables * Add unit tests * Use `getattr` instead of `get_submodule`. * Remove some useless layers * Refactor conformer and add configs * Update configs and add metafile. * Fix unit tests * Update README Co-authored-by: mzr1996 <mzr1996@163.com> 2021-12-07 14:00:17 +08:00			`])`
			`losses['loss'] = loss`
[Refactor] refactor ViTHead, DeiTHead, ConformerHead, StackedHead 2022-06-21 09:01:19 +00:00
			`# compute accuracy`
			`if self.cal_acc:`
			`assert target.ndim == 1, 'If you enable batch augmentation ' \`
			'like mixup during training, `cal_acc` is pointless.'
			`acc = Accuracy.calculate(`
			`cls_score[0] + cls_score[1], target, topk=self.topk)`
			`losses.update(`
			`{f'accuracy_top-{k}': a`
			`for k, a in zip(self.topk, acc)})`

[Feature] Implement the conformer backbone. (#494) * implement the conformer * format code style * format code style * reuse the TransformerEncoderLayer in the vision_transformer.py * Modify variable name * delete unused params * Remove warning info in Conformer head since it already exists in Conformer. * Rename some variables * Add unit tests * Use `getattr` instead of `get_submodule`. * Remove some useless layers * Refactor conformer and add configs * Update configs and add metafile. * Fix unit tests * Update README Co-authored-by: mzr1996 <mzr1996@163.com> 2021-12-07 14:00:17 +08:00			`return losses`