EasyCV/easycv/models/detection/utils/misc.py

# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Optional

import numpy as np
import torch
import torchvision
from packaging import version
from torch import Tensor

if version.parse(torchvision.__version__) < version.parse('0.7'):
    from torchvision.ops import _new_empty_tensor
    from torchvision.ops.misc import _output_size


@torch.no_grad()
def accuracy(output, target, topk=(1, )):
    """Computes the precision@k for the specified values of k"""
    if target.numel() == 0:
        return [torch.zeros([], device=output.device)]
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True)
    pred = pred.t()
    correct = pred.eq(target.view(1, -1).expand_as(pred))

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)
        res.append(correct_k.mul_(100.0 / batch_size))
    return res


def interpolate(input,
                size=None,
                scale_factor=None,
                mode='nearest',
                align_corners=None):
    # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor
    """
    Equivalent to nn.functional.interpolate, but with support for empty batch sizes.
    This will eventually be supported natively by PyTorch, and this
    class can go away.
    """
    if version.parse(torchvision.__version__) < version.parse('0.7'):
        if input.numel() > 0:
            return torch.nn.functional.interpolate(input, size, scale_factor,
                                                   mode, align_corners)

        output_shape = _output_size(2, input, size, scale_factor)
        output_shape = list(input.shape[:-2]) + list(output_shape)
        return _new_empty_tensor(input, output_shape)
    else:
        return torchvision.ops.misc.interpolate(input, size, scale_factor,
                                                mode, align_corners)


def select_single_mlvl(mlvl_tensors, batch_id, detach=True):
    """Extract a multi-scale single image tensor from a multi-scale batch
    tensor based on batch index.

    Note: The default value of detach is True, because the proposal gradient
    needs to be detached during the training of the two-stage model. E.g
    Cascade Mask R-CNN.

    Args:
        mlvl_tensors (list[Tensor]): Batch tensor for all scale levels,
           each is a 4D-tensor.
        batch_id (int): Batch index.
        detach (bool): Whether detach gradient. Default True.

    Returns:
        list[Tensor]: Multi-scale single image tensor.
    """
    assert isinstance(mlvl_tensors, (list, tuple))
    num_levels = len(mlvl_tensors)

    if detach:
        mlvl_tensor_list = [
            mlvl_tensors[i][batch_id].detach() for i in range(num_levels)
        ]
    else:
        mlvl_tensor_list = [
            mlvl_tensors[i][batch_id] for i in range(num_levels)
        ]
    return mlvl_tensor_list


def filter_scores_and_topk(scores, score_thr, topk, results=None):
    """Filter results using score threshold and topk candidates.

    Args:
        scores (Tensor): The scores, shape (num_bboxes, K).
        score_thr (float): The score filter threshold.
        topk (int): The number of topk candidates.
        results (dict or list or Tensor, Optional): The results to
           which the filtering rule is to be applied. The shape
           of each item is (num_bboxes, N).

    Returns:
        tuple: Filtered results

            - scores (Tensor): The scores after being filtered, \
                shape (num_bboxes_filtered, ).
            - labels (Tensor): The class labels, shape \
                (num_bboxes_filtered, ).
            - anchor_idxs (Tensor): The anchor indexes, shape \
                (num_bboxes_filtered, ).
            - filtered_results (dict or list or Tensor, Optional): \
                The filtered results. The shape of each item is \
                (num_bboxes_filtered, N).
    """
    valid_mask = scores > score_thr
    scores = scores[valid_mask]
    valid_idxs = torch.nonzero(valid_mask)

    num_topk = min(topk, valid_idxs.size(0))
    # torch.sort is actually faster than .topk (at least on GPUs)
    scores, idxs = scores.sort(descending=True)
    scores = scores[:num_topk]
    topk_idxs = valid_idxs[idxs[:num_topk]]
    keep_idxs, labels = topk_idxs.unbind(dim=1)

    filtered_results = None
    if results is not None:
        if isinstance(results, dict):
            filtered_results = {k: v[keep_idxs] for k, v in results.items()}
        elif isinstance(results, list):
            filtered_results = [result[keep_idxs] for result in results]
        elif isinstance(results, torch.Tensor):
            filtered_results = results[keep_idxs]
        else:
            raise NotImplementedError(f'Only supports dict or list or Tensor, '
                                      f'but get {type(results)}.')
    return scores, labels, keep_idxs, filtered_results


def output_postprocess(outputs, img_metas=None):
    detection_boxes = []
    detection_scores = []
    detection_classes = []
    img_metas_list = []

    for i in range(len(outputs)):
        if img_metas:
            img_metas_list.append(img_metas[i])
        if outputs[i] is not None:
            bboxes = outputs[i][:, 0:4] if outputs[i] is not None else None
            if img_metas:
                bboxes /= img_metas[i]['scale_factor'][0]
            detection_boxes.append(bboxes.cpu().numpy())
            detection_scores.append(
                (outputs[i][:, 4] * outputs[i][:, 5]).cpu().numpy())
            detection_classes.append(outputs[i][:, 6].cpu().numpy().astype(
                np.int32))
        else:
            detection_boxes.append(None)
            detection_scores.append(None)
            detection_classes.append(None)

    test_outputs = {
        'detection_boxes': detection_boxes,
        'detection_scores': detection_scores,
        'detection_classes': detection_classes,
        'img_metas': img_metas_list
    }

    return test_outputs


def fp16_clamp(x, min=None, max=None):
    if not x.is_cuda and x.dtype == torch.float16:
        # clamp for cpu float16, tensor fp16 has no clamp implementation
        return x.float().clamp(min, max).half()

    return x.clamp(min, max)


def inverse_sigmoid(x, eps=1e-3):
    x = x.clamp(min=0, max=1)
    x1 = x.clamp(min=eps)
    x2 = (1 - x).clamp(min=eps)
    return torch.log(x1 / x2)
refactor detection, support detr and dab-detr (#104) refactor detection, support detr and dab-detr 2022-07-11 14:52:49 +08:00			`# Copyright (c) OpenMMLab. All rights reserved.`
			`from typing import List, Optional`

			`import numpy as np`
			`import torch`
			`import torchvision`
			`from packaging import version`
			`from torch import Tensor`

			`if version.parse(torchvision.__version__) < version.parse('0.7'):`
			`from torchvision.ops import _new_empty_tensor`
			`from torchvision.ops.misc import _output_size`


			`@torch.no_grad()`
			`def accuracy(output, target, topk=(1, )):`
			`"""Computes the precision@k for the specified values of k"""`
			`if target.numel() == 0:`
			`return [torch.zeros([], device=output.device)]`
			`maxk = max(topk)`
			`batch_size = target.size(0)`

			`_, pred = output.topk(maxk, 1, True, True)`
			`pred = pred.t()`
			`correct = pred.eq(target.view(1, -1).expand_as(pred))`

			`res = []`
			`for k in topk:`
			`correct_k = correct[:k].view(-1).float().sum(0)`
			`res.append(correct_k.mul_(100.0 / batch_size))`
			`return res`


			`def interpolate(input,`
			`size=None,`
			`scale_factor=None,`
			`mode='nearest',`
			`align_corners=None):`
			`# type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor`
			`"""`
			`Equivalent to nn.functional.interpolate, but with support for empty batch sizes.`
			`This will eventually be supported natively by PyTorch, and this`
			`class can go away.`
			`"""`
			`if version.parse(torchvision.__version__) < version.parse('0.7'):`
			`if input.numel() > 0:`
			`return torch.nn.functional.interpolate(input, size, scale_factor,`
			`mode, align_corners)`

			`output_shape = _output_size(2, input, size, scale_factor)`
			`output_shape = list(input.shape[:-2]) + list(output_shape)`
			`return _new_empty_tensor(input, output_shape)`
			`else:`
			`return torchvision.ops.misc.interpolate(input, size, scale_factor,`
			`mode, align_corners)`


			`def select_single_mlvl(mlvl_tensors, batch_id, detach=True):`
			`"""Extract a multi-scale single image tensor from a multi-scale batch`
			`tensor based on batch index.`

			`Note: The default value of detach is True, because the proposal gradient`
			`needs to be detached during the training of the two-stage model. E.g`
			`Cascade Mask R-CNN.`

			`Args:`
			`mlvl_tensors (list[Tensor]): Batch tensor for all scale levels,`
			`each is a 4D-tensor.`
			`batch_id (int): Batch index.`
			`detach (bool): Whether detach gradient. Default True.`

			`Returns:`
			`list[Tensor]: Multi-scale single image tensor.`
			`"""`
			`assert isinstance(mlvl_tensors, (list, tuple))`
			`num_levels = len(mlvl_tensors)`

			`if detach:`
			`mlvl_tensor_list = [`
			`mlvl_tensors[i][batch_id].detach() for i in range(num_levels)`
			`]`
			`else:`
			`mlvl_tensor_list = [`
			`mlvl_tensors[i][batch_id] for i in range(num_levels)`
			`]`
			`return mlvl_tensor_list`


			`def filter_scores_and_topk(scores, score_thr, topk, results=None):`
			`"""Filter results using score threshold and topk candidates.`

			`Args:`
			`scores (Tensor): The scores, shape (num_bboxes, K).`
			`score_thr (float): The score filter threshold.`
			`topk (int): The number of topk candidates.`
			`results (dict or list or Tensor, Optional): The results to`
			`which the filtering rule is to be applied. The shape`
			`of each item is (num_bboxes, N).`

			`Returns:`
			`tuple: Filtered results`

			`- scores (Tensor): The scores after being filtered, \`
			`shape (num_bboxes_filtered, ).`
			`- labels (Tensor): The class labels, shape \`
			`(num_bboxes_filtered, ).`
			`- anchor_idxs (Tensor): The anchor indexes, shape \`
			`(num_bboxes_filtered, ).`
			`- filtered_results (dict or list or Tensor, Optional): \`
			`The filtered results. The shape of each item is \`
			`(num_bboxes_filtered, N).`
			`"""`
			`valid_mask = scores > score_thr`
			`scores = scores[valid_mask]`
			`valid_idxs = torch.nonzero(valid_mask)`

			`num_topk = min(topk, valid_idxs.size(0))`
			`# torch.sort is actually faster than .topk (at least on GPUs)`
			`scores, idxs = scores.sort(descending=True)`
			`scores = scores[:num_topk]`
			`topk_idxs = valid_idxs[idxs[:num_topk]]`
			`keep_idxs, labels = topk_idxs.unbind(dim=1)`

			`filtered_results = None`
			`if results is not None:`
			`if isinstance(results, dict):`
			`filtered_results = {k: v[keep_idxs] for k, v in results.items()}`
			`elif isinstance(results, list):`
			`filtered_results = [result[keep_idxs] for result in results]`
			`elif isinstance(results, torch.Tensor):`
			`filtered_results = results[keep_idxs]`
			`else:`
			`raise NotImplementedError(f'Only supports dict or list or Tensor, '`
			`f'but get {type(results)}.')`
			`return scores, labels, keep_idxs, filtered_results`


			`def output_postprocess(outputs, img_metas=None):`
			`detection_boxes = []`
			`detection_scores = []`
			`detection_classes = []`
			`img_metas_list = []`

			`for i in range(len(outputs)):`
			`if img_metas:`
			`img_metas_list.append(img_metas[i])`
			`if outputs[i] is not None:`
			`bboxes = outputs[i][:, 0:4] if outputs[i] is not None else None`
			`if img_metas:`
			`bboxes /= img_metas[i]['scale_factor'][0]`
			`detection_boxes.append(bboxes.cpu().numpy())`
			`detection_scores.append(`
			`(outputs[i][:, 4] * outputs[i][:, 5]).cpu().numpy())`
			`detection_classes.append(outputs[i][:, 6].cpu().numpy().astype(`
			`np.int32))`
			`else:`
			`detection_boxes.append(None)`
			`detection_scores.append(None)`
			`detection_classes.append(None)`

			`test_outputs = {`
			`'detection_boxes': detection_boxes,`
			`'detection_scores': detection_scores,`
			`'detection_classes': detection_classes,`
			`'img_metas': img_metas_list`
			`}`

			`return test_outputs`


			`def fp16_clamp(x, min=None, max=None):`
			`if not x.is_cuda and x.dtype == torch.float16:`
			`# clamp for cpu float16, tensor fp16 has no clamp implementation`
			`return x.float().clamp(min, max).half()`

			`return x.clamp(min, max)`


			`def inverse_sigmoid(x, eps=1e-3):`
			`x = x.clamp(min=0, max=1)`
			`x1 = x.clamp(min=eps)`
			`x2 = (1 - x).clamp(min=eps)`
			`return torch.log(x1 / x2)`