mmocr/projects/ABCNet/abcnet/model/bezier_roi_extractor.py
liukuikun 79a4b2042c
[Feature] abcnet v1 infer (#1598)
* bezier align

* Update projects/ABCNet/README.md

* Update projects/ABCNet/README.md

* update

* updata home readme

Co-authored-by: Tong Gao <gaotongxiao@gmail.com>
2022-12-06 16:47:02 +08:00

121 lines
4.6 KiB
Python

# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Tuple
import torch
from mmengine.structures import InstanceData
from torch import Tensor
from mmocr.registry import MODELS
from mmocr.utils import ConfigType, OptMultiConfig
from .base_roi_extractor import BaseRoIExtractor
@MODELS.register_module()
class BezierRoIExtractor(BaseRoIExtractor):
"""Extract RoI features from a single level feature map.
If there are multiple input feature levels, each RoI is mapped to a level
according to its scale. The mapping rule is proposed in
`FPN <https://arxiv.org/abs/1612.03144>`_.
Args:
roi_layer (:obj:`ConfigDict` or dict): Specify RoI layer type and
arguments.
out_channels (int): Output channels of RoI layers.
featmap_strides (List[int]): Strides of input feature maps.
finest_scale (int): Scale threshold of mapping to level 0.
Defaults to 56.
init_cfg (:obj:`ConfigDict` or dict or list[:obj:`ConfigDict` or \
dict], optional): Initialization config dict. Defaults to None.
"""
def __init__(self,
roi_layer: ConfigType,
out_channels: int,
featmap_strides: List[int],
finest_scale: int = 96,
init_cfg: OptMultiConfig = None) -> None:
super().__init__(
roi_layer=roi_layer,
out_channels=out_channels,
featmap_strides=featmap_strides,
init_cfg=init_cfg)
self.finest_scale = finest_scale
def to_roi(self, beziers: Tensor) -> Tensor:
rois_list = []
for img_id, bezier in enumerate(beziers):
img_inds = bezier.new_full((bezier.size(0), 1), img_id)
rois = torch.cat([img_inds, bezier], dim=-1)
rois_list.append(rois)
rois = torch.cat(rois_list, 0)
return rois
def map_roi_levels(self, beziers: Tensor, num_levels: int) -> Tensor:
"""Map rois to corresponding feature levels by scales.
- scale < finest_scale * 2: level 0
- finest_scale * 2 <= scale < finest_scale * 4: level 1
- finest_scale * 4 <= scale < finest_scale * 8: level 2
- scale >= finest_scale * 8: level 3
Args:
beziers (Tensor): Input bezier control points, shape (k, 17).
num_levels (int): Total level number.
Returns:
Tensor: Level index (0-based) of each RoI, shape (k, )
"""
p1 = beziers[:, 1:3]
p2 = beziers[:, 15:]
scale = ((p1 - p2)**2).sum(dim=1).sqrt() * 2
target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6))
target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long()
return target_lvls
def forward(self, feats: Tuple[Tensor],
proposal_instances: List[InstanceData]) -> Tensor:
"""Extractor ROI feats.
Args:
feats (Tuple[Tensor]): Multi-scale features.
proposal_instances(List[InstanceData]): Proposal instances.
Returns:
Tensor: RoI feature.
"""
beziers = [p_i.beziers for p_i in proposal_instances]
rois = self.to_roi(beziers)
# convert fp32 to fp16 when amp is on
rois = rois.type_as(feats[0])
out_size = self.roi_layers[0].output_size
feats = feats[:3]
num_levels = len(feats)
roi_feats = feats[0].new_zeros(
rois.size(0), self.out_channels, *out_size)
if num_levels == 1:
if len(rois) == 0:
return roi_feats
return self.roi_layers[0](feats[0], rois)
target_lvls = self.map_roi_levels(rois, num_levels)
for i in range(num_levels):
mask = target_lvls == i
inds = mask.nonzero(as_tuple=False).squeeze(1)
if inds.numel() > 0:
rois_ = rois[inds]
roi_feats_t = self.roi_layers[i](feats[i], rois_)
roi_feats[inds] = roi_feats_t
else:
# Sometimes some pyramid levels will not be used for RoI
# feature extraction and this will cause an incomplete
# computation graph in one GPU, which is different from those
# in other GPUs and will cause a hanging error.
# Therefore, we add it to ensure each feature pyramid is
# included in the computation graph to avoid runtime bugs.
roi_feats += sum(
x.view(-1)[0]
for x in self.parameters()) * 0. + feats[i].sum() * 0.
return roi_feats