[Fix] support ncnn faster-rcnn (#304)

* wtf * Support fcos ncnn end2end * support ncnn two stage detector * fix test
2025-01-14 08:09:43 +08:00 · 2021-12-20 15:43:38 +08:00 · 2021-12-20 15:43:38 +08:00 · abdf64a576
commit abdf64a576
parent fabdb473bb
3 changed files with 122 additions and 4 deletions
--- a/mmdeploy/codebase/mmdet/core/bbox/delta_xywh_bbox_coder.py
+++ b/mmdeploy/codebase/mmdet/core/bbox/delta_xywh_bbox_coder.py
@ -141,3 +141,121 @@ def delta2bbox(ctx,
    bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view(deltas.size())
    return bboxes
@FUNCTION_REWRITER.register_rewriter(
    func_name='mmdet.core.bbox.coder.delta_xywh_bbox_coder.delta2bbox',  # noqa
    backend='ncnn')
 def delta2bbox__ncnn(ctx,
                     rois,
                     deltas,
                     means=(0., 0., 0., 0.),
                     stds=(1., 1., 1., 1.),
                     max_shape=None,
                     wh_ratio_clip=16 / 1000,
                     clip_border=True,
                     add_ctr_clamp=False,
                     ctr_clamp=32):
    """Rewrite `delta2bbox` for ncnn backend.
    Batch dimension is not supported by ncnn, but supported by pytorch.
    NCNN regards the lowest two dimensions as continuous address with byte
    alignment, so the lowest two dimensions are not absolutely independent.
    Reshape operator with -1 arguments should operates ncnn::Mat with
    dimension >= 3.
    Args:
        ctx (ContextCaller): The context with additional information.
        rois (Tensor): Boxes to be transformed. Has shape (N, 4) or (B, N, 4)
        deltas (Tensor): Encoded offsets with respect to each roi.
            Has shape (B, N, num_classes * 4) or (B, N, 4) or
            (N, num_classes * 4) or (N, 4). Note N = num_anchors * W * H
            when rois is a grid of anchors.Offset encoding follows [1]_.
        means (Sequence[float]): Denormalizing means for delta coordinates
        stds (Sequence[float]): Denormalizing standard deviation for delta
            coordinates
        max_shape (Sequence[int] or torch.Tensor or Sequence[
            Sequence[int]],optional): Maximum bounds for boxes, specifies
            (H, W, C) or (H, W). If rois shape is (B, N, 4), then
            the max_shape should be a Sequence[Sequence[int]]
            and the length of max_shape should also be B.
        wh_ratio_clip (float): Maximum aspect ratio for boxes.
        clip_border (bool, optional): Whether clip the objects outside the
            border of the image. Defaults to True.
        add_ctr_clamp (bool): Whether to add center clamp, when added, the
            predicted box is clamped is its center is too far away from
            the original anchor's center. Only used by YOLOF. Default False.
        ctr_clamp (int): the maximum pixel shift to clamp. Only used by YOLOF.
            Default 32.
    Return:
        bboxes (Tensor): Boxes with shape (B, N, num_classes * 4) or (B, N, 4)
            or (N, num_classes * 4) or (N, 4), where 4 represent tl_x, tl_y,
            br_x, br_y.
    """
    means = deltas.new_tensor(means).view(1, 1,
                                          -1).repeat(1, deltas.size(-2),
                                                     deltas.size(-1) // 4).data
    stds = deltas.new_tensor(stds).view(1, 1,
                                        -1).repeat(1, deltas.size(-2),
                                                   deltas.size(-1) // 4).data
    denorm_deltas = deltas * stds + means
    if denorm_deltas.shape[-1] == 4:
        dx = denorm_deltas[..., 0:1]
        dy = denorm_deltas[..., 1:2]
        dw = denorm_deltas[..., 2:3]
        dh = denorm_deltas[..., 3:4]
    else:
        dx = denorm_deltas[..., 0::4]
        dy = denorm_deltas[..., 1::4]
        dw = denorm_deltas[..., 2::4]
        dh = denorm_deltas[..., 3::4]
    x1, y1 = rois[..., 0:1], rois[..., 1:2]
    x2, y2 = rois[..., 2:3], rois[..., 3:4]
    # Compute center of each roi
    px = (x1 + x2) * 0.5
    py = (y1 + y2) * 0.5
    # Compute width/height of each roi
    pw = x2 - x1
    ph = y2 - y1
    # do not use expand unless necessary
    # since expand is a custom ops
    if px.shape[-1] != 4:
        px = px.expand_as(dx)
    if py.shape[-1] != 4:
        py = py.expand_as(dy)
    if pw.shape[-1] != 4:
        pw = pw.expand_as(dw)
    if px.shape[-1] != 4:
        ph = ph.expand_as(dh)
    dx_width = pw * dx
    dy_height = ph * dy
    max_ratio = np.abs(np.log(wh_ratio_clip))
    if add_ctr_clamp:
        dx_width = torch.clamp(dx_width, max=ctr_clamp, min=-ctr_clamp)
        dy_height = torch.clamp(dy_height, max=ctr_clamp, min=-ctr_clamp)
        dw = torch.clamp(dw, max=max_ratio)
        dh = torch.clamp(dh, max=max_ratio)
    else:
        dw = dw.clamp(min=-max_ratio, max=max_ratio)
        dh = dh.clamp(min=-max_ratio, max=max_ratio)
    # Use exp(network energy) to enlarge/shrink each roi
    gw = pw * dw.exp()
    gh = ph * dh.exp()
    # Use network energy to shift the center of each roi
    gx = px + dx_width
    gy = py + dy_height
    # Convert center-xy/width/height to top-left, bottom-right
    x1 = gx - gw * 0.5
    y1 = gy - gh * 0.5
    x2 = gx + gw * 0.5
    y2 = gy + gh * 0.5
    if clip_border and max_shape is not None:
        from mmdeploy.codebase.mmdet.deploy import clip_bboxes
        x1, y1, x2, y2 = clip_bboxes(x1, y1, x2, y2, max_shape)
    bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view(deltas.size())
    return bboxes
--- a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py
+++ b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py
@ -483,8 +483,8 @@ class PartitionTwoStageModel(End2EndModel):
            rois,
            cls_score,
            bbox_pred,
-            img_metas[0]['img_shape'],
+            img_metas[0][0]['img_shape'],
-            img_metas[0]['scale_factor'],
+            img_metas[0][0]['scale_factor'],
            cfg=rcnn_test_cfg)
    def forward_test(self, imgs: torch.Tensor, img_metas: Sequence[dict],
--- a/tests/test_codebase/test_mmdet/test_object_detection_model.py
+++ b/tests/test_codebase/test_mmdet/test_object_detection_model.py
@ -338,11 +338,11 @@ class TestPartitionTwoStageModel:
        rois = torch.rand(1, 10, 5)
        cls_score = torch.rand(10, 81)
        bbox_pred = torch.rand(10, 320)
-        img_metas = [{
+        img_metas = [[{
            'ori_shape': [32, 32, 3],
            'img_shape': [32, 32, 3],
            'scale_factor': [1, 1, 1, 1],
-        }]
+        }]]
        results = self.model.partition1_postprocess(
            rois=rois,
            cls_score=cls_score,