MMCV deprecates the support for parrots (#2748)

* Fix * Revove parrots jit * Fix doc * Fix lint * fix lint * Fix * fix * decouple with mmengine parrots_wrapper * Remove parrots macro * Fix cpp lint * Update mmcv/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh commit suggestion test Co-authored-by: Zaida Zhou <58739961+zhouzaida@users.noreply.github.com> * Fix lint * Fix yapf * Fix clang format --------- Co-authored-by: Zaida Zhou <58739961+zhouzaida@users.noreply.github.com>
2023-04-20 10:38:55 +08:00 · 2023-04-20 10:38:55 +08:00 · 86d50f113f
parent 733e6ff84e
commit 86d50f113f
239 changed files with 334 additions and 10122 deletions
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -1,6 +1,5 @@
 include requirements/runtime.txt
 include mmcv/ops/csrc/common/cuda/*.cuh mmcv/ops/csrc/common/cuda/*.hpp mmcv/ops/csrc/common/*.hpp
 include mmcv/ops/csrc/pytorch/*.cpp mmcv/ops/csrc/pytorch/cuda/*.cu mmcv/ops/csrc/pytorch/cuda/*.cpp mmcv/ops/csrc/pytorch/cpu/*.cpp
-include mmcv/ops/csrc/parrots/*.h mmcv/ops/csrc/parrots/*.cpp
 include mmcv/ops/csrc/pytorch/mps/*.mm mmcv/ops/csrc/common/mps/*.h mmcv/ops/csrc/common/mps/*.mm
 recursive-include mmcv/ops/csrc/ *.h *.hpp *.cpp *.cuh *.cu *.mm
--- a/docs/en/api/utils.rst
+++ b/docs/en/api/utils.rst
@ -19,5 +19,3 @@ mmcv.utils
   IS_MLU_AVAILABLE
   IS_MPS_AVAILABLE
   collect_env
-   jit
-   skip_no_elena
--- a/docs/zh_cn/api/utils.rst
+++ b/docs/zh_cn/api/utils.rst
@ -19,5 +19,3 @@ mmcv.utils
   IS_MLU_AVAILABLE
   IS_MPS_AVAILABLE
   collect_env
-   jit
-   skip_no_elena
--- a/mmcv/cnn/bricks/activation.py
+++ b/mmcv/cnn/bricks/activation.py
@ -92,8 +92,7 @@ class GELU(nn.Module):
        return F.gelu(input)


-if (TORCH_VERSION == 'parrots'
-        or digit_version(TORCH_VERSION) < digit_version('1.4')):
+if digit_version(TORCH_VERSION) < digit_version('1.4'):
    MODELS.register_module(module=GELU)
 else:
    MODELS.register_module(module=nn.GELU)
--- a/mmcv/cnn/bricks/conv_module.py
+++ b/mmcv/cnn/bricks/conv_module.py
@ -6,7 +6,8 @@ import torch
 import torch.nn as nn
 from mmengine.model import constant_init, kaiming_init
 from mmengine.registry import MODELS
-from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm, _InstanceNorm
+from torch.nn.modules.batchnorm import _BatchNorm
+from torch.nn.modules.instancenorm import _InstanceNorm

 from .activation import build_activation_layer
 from .conv import build_conv_layer
--- a/mmcv/cnn/bricks/hswish.py
+++ b/mmcv/cnn/bricks/hswish.py
@ -30,8 +30,7 @@ class HSwish(nn.Module):
        return x * self.act(x + 3) / 6


-if (TORCH_VERSION == 'parrots'
-        or digit_version(TORCH_VERSION) < digit_version('1.7')):
+if digit_version(TORCH_VERSION) < digit_version('1.7'):
    # Hardswish is not supported when PyTorch version < 1.6.
    # And Hardswish in PyTorch 1.6 does not support inplace.
    MODELS.register_module(module=HSwish)
--- a/mmcv/cnn/bricks/norm.py
+++ b/mmcv/cnn/bricks/norm.py
@ -5,14 +5,14 @@ from typing import Dict, Tuple, Union
 import torch.nn as nn
 from mmengine.registry import MODELS
 from mmengine.utils import is_tuple_of
-from mmengine.utils.dl_utils.parrots_wrapper import (SyncBatchNorm, _BatchNorm,
-                                                     _InstanceNorm)
+from torch.nn.modules.batchnorm import _BatchNorm
+from torch.nn.modules.instancenorm import _InstanceNorm

 MODELS.register_module('BN', module=nn.BatchNorm2d)
 MODELS.register_module('BN1d', module=nn.BatchNorm1d)
 MODELS.register_module('BN2d', module=nn.BatchNorm2d)
 MODELS.register_module('BN3d', module=nn.BatchNorm3d)
-MODELS.register_module('SyncBN', module=SyncBatchNorm)
+MODELS.register_module('SyncBN', module=nn.SyncBatchNorm)
 MODELS.register_module('GN', module=nn.GroupNorm)
 MODELS.register_module('LN', module=nn.LayerNorm)
 MODELS.register_module('IN', module=nn.InstanceNorm2d)
--- a/mmcv/cnn/bricks/wrappers.py
+++ b/mmcv/cnn/bricks/wrappers.py
@ -12,16 +12,13 @@ import torch.nn as nn
 from mmengine.registry import MODELS
 from torch.nn.modules.utils import _pair, _triple

-if torch.__version__ == 'parrots':
-    TORCH_VERSION = torch.__version__
-else:
-    # torch.__version__ could be 1.3.1+cu92, we only need the first two
-    # for comparison
-    TORCH_VERSION = tuple(int(x) for x in torch.__version__.split('.')[:2])
+# torch.__version__ could be 1.3.1+cu92, we only need the first two
+# for comparison
+TORCH_VERSION = tuple(int(x) for x in torch.__version__.split('.')[:2])


 def obsolete_torch_version(torch_version, version_threshold) -> bool:
-    return torch_version == 'parrots' or torch_version <= version_threshold
+    return torch_version <= version_threshold


 class NewEmptyTensorOp(torch.autograd.Function):
--- a/mmcv/ops/ball_query.py
+++ b/mmcv/ops/ball_query.py
@ -75,8 +75,7 @@ class BallQuery(Function):
                min_radius=min_radius,
                max_radius=max_radius,
                nsample=sample_num)
-        if torch.__version__ != 'parrots':
-            ctx.mark_non_differentiable(idx)
+        ctx.mark_non_differentiable(idx)
        return idx

    @staticmethod
--- a/mmcv/ops/bbox.py
+++ b/mmcv/ops/bbox.py
@ -116,10 +116,6 @@ def bbox_overlaps(bboxes1: torch.Tensor,
    if rows * cols == 0:
        return ious

-    if bboxes1.device.type == 'cpu' and torch.__version__ == 'parrots':
-        return _bbox_overlaps_cpu(
-            bboxes1, bboxes2, mode=mode, aligned=aligned, offset=offset)
-
    ext_module.bbox_overlaps(
        bboxes1, bboxes2, ious, mode=mode_flag, aligned=aligned, offset=offset)

--- a/mmcv/ops/carafe.py
+++ b/mmcv/ops/carafe.py
@ -56,8 +56,7 @@ class CARAFENaiveFunction(Function):
            group_size=group_size,
            scale_factor=scale_factor)

-        if features.requires_grad or masks.requires_grad or \
-                torch.__version__ == 'parrots':
+        if features.requires_grad or masks.requires_grad:
            ctx.save_for_backward(features, masks)
        return output

@ -150,8 +149,7 @@ class CARAFEFunction(Function):
            group_size=group_size,
            scale_factor=scale_factor)

-        if features.requires_grad or masks.requires_grad or \
-                torch.__version__ == 'parrots':
+        if features.requires_grad or masks.requires_grad:
            ctx.save_for_backward(features, masks, rfeatures)
        return output

--- a/mmcv/ops/contour_expand.py
+++ b/mmcv/ops/contour_expand.py
@ -36,17 +36,6 @@ def contour_expand(kernel_mask: Union[np.array, torch.Tensor],
    if isinstance(internal_kernel_label, np.ndarray):
        internal_kernel_label = torch.from_numpy(internal_kernel_label)

-    if torch.__version__ == 'parrots':
-        if kernel_mask.shape[0] == 0 or internal_kernel_label.shape[0] == 0:
-            label = []
-        else:
-            label = ext_module.contour_expand(
-                kernel_mask,
-                internal_kernel_label,
-                min_kernel_area=min_kernel_area,
-                kernel_num=kernel_num)
-            label = label.tolist()  # type: ignore
-    else:
-        label = ext_module.contour_expand(kernel_mask, internal_kernel_label,
-                                          min_kernel_area, kernel_num)
+    label = ext_module.contour_expand(kernel_mask, internal_kernel_label,
+                                      min_kernel_area, kernel_num)
    return label
--- a/mmcv/ops/corner_pool.py
+++ b/mmcv/ops/corner_pool.py
@ -70,7 +70,7 @@ class CornerPool(nn.Module):
        self.mode = mode

    def forward(self, x: Tensor) -> Tensor:
-        if torch.__version__ != 'parrots' and torch.__version__ >= '1.5.0':
+        if torch.__version__ >= '1.5.0':
            dim, flip = self.cummax_dim_flip[self.mode]
            if flip:
                x = x.flip(dim)
--- a/mmcv/ops/csrc/README.md
+++ b/mmcv/ops/csrc/README.md
@ -8,14 +8,11 @@ This folder contains all non-python code for MMCV custom ops. Please follow the
 .
 ├── common
 │   ├── box_iou_rotated_utils.hpp
-│   ├── parrots_cpp_helper.hpp
-│   ├── parrots_cuda_helper.hpp
 │   ├── pytorch_cpp_helper.hpp
 │   ├── pytorch_cuda_helper.hpp
 │   ├── pytorch_device_registry.hpp
 │   ├── cuda
 │   │   ├── common_cuda_helper.hpp
-│   │   ├── parrots_cudawarpfunction.cuh
 │   │   ├── ...
 │   │   └── ops_cuda_kernel.cuh
 |   ├── mps
@ -26,11 +23,6 @@ This folder contains all non-python code for MMCV custom ops. Please follow the
 │   │   └── ...
 |   └── utils
 │   │   └── ...
-├── parrots
-│   ├── ...
-│   ├── ops.cpp
-│   ├── ops_parrots.cpp
-│   └── ops_pytorch.h
 └── pytorch
     ├── info.cpp
     ├── pybind.cpp
@ -57,7 +49,6 @@ This folder contains all non-python code for MMCV custom ops. Please follow the
  - `mps`: The tools used to support MPS ops. **NOTE** that MPS support is **experimental**.
  - `mlu`: The MLU kernels used to support [Cambricon](https://www.cambricon.com/) device.
  - `utils`: The kernels and utils of spconv.
- `parrots`: **Parrots** is a deep learning frame for model training and inference. Parrots custom ops are placed in this directory.
 - `pytorch`: **PyTorch** custom ops are supported by binding C++ to Python with **pybind11**. The ops implementation and binding codes are placed in this directory.
  - `cuda`: This directory contains cuda kernel launchers, which feed memory pointers of tensor to the cuda kernel in `common/cuda`. The launchers provide c++ interface of cuda implementation of corresponding custom ops.
  - `cpu`: This directory contain cpu implementations of corresponding custom ops.
--- a/mmcv/ops/csrc/common/cuda/active_rotated_filter_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/active_rotated_filter_cuda_kernel.cuh
@ -4,11 +4,7 @@
 #ifndef ACTIVE_ROTATED_FILTER_CUDA_KERNEL_CUH
 #define ACTIVE_ROTATED_FILTER_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 template <typename scalar_t>
 __global__ void active_rotated_filter_forward_cuda_kernel(
--- a/mmcv/ops/csrc/common/cuda/assign_score_withk_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/assign_score_withk_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef ASSIGN_SCORE_WITHK_CUDA_KERNEL_CUH
 #define ASSIGN_SCORE_WITHK_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 // input: points(B,N0,M,O), centers(B,N0,M,O), scores(B,N1,K,M), knn_idx(B,N1,K)
 // output: fout(B,O,N)
--- a/mmcv/ops/csrc/common/cuda/ball_query_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/ball_query_cuda_kernel.cuh
@ -4,11 +4,7 @@
 #ifndef BALL_QUERY_CUDA_KERNEL_CUH
 #define BALL_QUERY_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 template <typename T>
 __global__ void ball_query_forward_cuda_kernel(int b, int n, int m,
--- a/mmcv/ops/csrc/common/cuda/bbox_overlaps_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/bbox_overlaps_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef BBOX_OVERLAPS_CUDA_KERNEL_CUH
 #define BBOX_OVERLAPS_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 template <typename T>
 __device__ __forceinline__ void load_bbox(const T* bbox, const int base, T& x1,
--- a/mmcv/ops/csrc/common/cuda/bezier_align_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/bezier_align_cuda_kernel.cuh
@ -8,11 +8,7 @@
 #ifdef MMCV_WITH_TRT
 #include "common_cuda_helper.hpp"
 #else  // MMCV_WITH_TRT
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else  // MMCV_USE_PARROTS
 #include "pytorch_cuda_helper.hpp"
-#endif  // MMCV_USE_PARROTS
 #endif  // MMCV_WITH_TRT

 template <typename T>
--- a/mmcv/ops/csrc/common/cuda/border_align_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/border_align_cuda_kernel.cuh
@ -12,11 +12,7 @@
 #ifdef MMCV_WITH_TRT
 #include "common_cuda_helper.hpp"
 #else  // MMCV_WITH_TRT
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else  // MMCV_USE_PARROTS
 #include "pytorch_cuda_helper.hpp"
-#endif  // MMCV_USE_PARROTS
 #endif  // MMCV_WITH_TRT

 enum BorderMode { Top = 0, Left = 1, Bottom = 2, Right = 3 };
--- a/mmcv/ops/csrc/common/cuda/box_iou_quadri_cuda.cuh
+++ b/mmcv/ops/csrc/common/cuda/box_iou_quadri_cuda.cuh
@ -2,12 +2,8 @@
 #ifndef BOX_IOU_QUADRI_CUDA_CUH
 #define BOX_IOU_QUADRI_CUDA_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
-#include "pytorch_cuda_helper.hpp"
-#endif
 #include "box_iou_rotated_utils.hpp"
+#include "pytorch_cuda_helper.hpp"

 // 2D block with 32 * 16 = 512 threads per block
 const int BLOCK_DIM_X = 32;
--- a/mmcv/ops/csrc/common/cuda/box_iou_rotated_cuda.cuh
+++ b/mmcv/ops/csrc/common/cuda/box_iou_rotated_cuda.cuh
@ -4,12 +4,8 @@
 #ifndef BOX_IOU_ROTATED_CUDA_CUH
 #define BOX_IOU_ROTATED_CUDA_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
-#include "pytorch_cuda_helper.hpp"
-#endif
 #include "box_iou_rotated_utils.hpp"
+#include "pytorch_cuda_helper.hpp"

 // 2D block with 32 * 16 = 512 threads per block
 const int BLOCK_DIM_X = 32;
--- a/mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef CARAFE_CUDA_KERNEL_CUH
 #define CARAFE_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 #ifdef MMCV_WITH_HIP
 #define WARP_SIZE 64
--- a/mmcv/ops/csrc/common/cuda/carafe_naive_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/carafe_naive_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef CARAFE_NAIVE_CUDA_KERNEL_CUH
 #define CARAFE_NAIVE_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 __device__ inline int Loc2Index(const int n, const int c, const int h,
                                const int w, const int channel_num,
--- a/mmcv/ops/csrc/common/cuda/chamfer_distance_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/chamfer_distance_cuda_kernel.cuh
@ -4,11 +4,7 @@
 #ifndef CHAMFER_DISTANCE_CUDA_KERNEL_CUH
 #define CHAMFER_DISTANCE_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 #define MAX_SHARED_SCALAR_T 6144  // 49152 / 8 = 6144

--- a/mmcv/ops/csrc/common/cuda/convex_iou_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/convex_iou_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef CONVEX_IOU_CUDA_KERNEL_CUH
 #define CONVEX_IOU_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 #define MAXN 100
 #define NMAX 512
--- a/mmcv/ops/csrc/common/cuda/correlation_cuda.cuh
+++ b/mmcv/ops/csrc/common/cuda/correlation_cuda.cuh
@ -6,14 +6,10 @@
 #ifndef CORRELATION_CUDA
 #define CORRELATION_CUDA

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
-#include "pytorch_cuda_helper.hpp"
-#endif
-
 #include <cuda.h>
 #include <cuda_runtime.h>
+
+#include "pytorch_cuda_helper.hpp"
 // Using <torch/extension.h> is recommended in the official documentation in
 // https://pytorch.org/tutorials/advanced/cpp_extension.html#writing-the-c-op.
 // However, we use <torch/types.h> for compatibility with CUDA 9.0
--- a/mmcv/ops/csrc/common/cuda/deform_conv_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/deform_conv_cuda_kernel.cuh
@ -70,11 +70,7 @@
 #ifdef MMCV_WITH_TRT
 #include "common_cuda_helper.hpp"
 #else  // MMCV_WITH_TRT
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else  // MMCV_USE_PARROTS
 #include "pytorch_cuda_helper.hpp"
-#endif  // MMCV_USE_PARROTS
 #endif  // MMCV_WITH_TRT

 template <typename T>
--- a/mmcv/ops/csrc/common/cuda/deform_roi_pool_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/deform_roi_pool_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef DEFORM_ROI_POOL_CUDA_KERNEL_CUH
 #define DEFORM_ROI_POOL_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 template <typename T>
 __global__ void deform_roi_pool_forward_cuda_kernel(
--- a/mmcv/ops/csrc/common/cuda/diff_iou_rotated_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/diff_iou_rotated_cuda_kernel.cuh
@ -1,11 +1,7 @@
 // Copyright (c) OpenMMLab. All rights reserved
 // Adapted from
 // https://github.com/lilanxiao/Rotated_IoU/cuda_op/sort_vert_kernel.cu  # noqa
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 #define MAX_NUM_VERT_IDX 9
 #define INTERSECTION_OFFSET 8
--- a/mmcv/ops/csrc/common/cuda/furthest_point_sample_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/furthest_point_sample_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef FURTHEST_POINT_SAMPLE_CUDA_KERNEL_CUH
 #define FURTHEST_POINT_SAMPLE_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i,
                         int idx1, int idx2) {
--- a/mmcv/ops/csrc/common/cuda/gather_points_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/gather_points_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef GATHER_POINTS_CUDA_KERNEL_CUH
 #define GATHER_POINTS_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 #define TOTAL_THREADS 1024

--- a/mmcv/ops/csrc/common/cuda/group_points_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/group_points_cuda_kernel.cuh
@ -4,11 +4,7 @@
 #ifndef GROUP_POINTS_CUDA_KERNEL_CUH
 #define GROUP_POINTS_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 template <typename T>
 __global__ void group_points_forward_cuda_kernel(int b, int c, int n,
--- a/mmcv/ops/csrc/common/cuda/iou3d_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/iou3d_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef IOU3D_CUDA_KERNEL_CUH
 #define IOU3D_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 const int THREADS_PER_BLOCK_IOU3D = 16;
 const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;
--- a/mmcv/ops/csrc/common/cuda/knn_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/knn_cuda_kernel.cuh
@ -4,11 +4,7 @@
 #ifndef KNN_CUDA_KERNEL_CUH
 #define KNN_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 inline __device__ void swap_float(float *x, float *y) {
  float tmp = *x;
--- a/mmcv/ops/csrc/common/cuda/masked_conv2d_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/masked_conv2d_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef MASKED_CONV2D_CUDA_KERNEL_CUH
 #define MASKED_CONV2D_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 template <typename scalar_t>
 __global__ void MaskedIm2colForward(const int n, const scalar_t *data_im,
--- a/mmcv/ops/csrc/common/cuda/min_area_polygons_cuda.cuh
+++ b/mmcv/ops/csrc/common/cuda/min_area_polygons_cuda.cuh
@ -2,11 +2,7 @@
 #ifndef MIN_AREA_POLYGONS_CUDA_KERNEL_CUH
 #define MIN_AREA_POLYGONS_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 #define MAXN 20
 __device__ const float PI = 3.1415926;
--- a/mmcv/ops/csrc/common/cuda/modulated_deform_conv_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/modulated_deform_conv_cuda_kernel.cuh
@ -70,11 +70,7 @@
 #ifdef MMCV_WITH_TRT
 #include "common_cuda_helper.hpp"
 #else  // MMCV_WITH_TRT
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else  // MMCV_USE_PARROTS
 #include "pytorch_cuda_helper.hpp"
-#endif  // MMCV_USE_PARROTS
 #endif  // MMCV_WITH_TRT

 template <typename T>
--- a/mmcv/ops/csrc/common/cuda/nms_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/nms_cuda_kernel.cuh
@ -6,11 +6,7 @@
 #ifdef MMCV_WITH_TRT
 #include "common_cuda_helper.hpp"
 #else  // MMCV_WITH_TRT
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else  // MMCV_USE_PARROTS
 #include "pytorch_cuda_helper.hpp"
-#endif  // MMCV_USE_PARROTS
 #endif  // MMCV_WITH_TRT

 int const threadsPerBlock = sizeof(unsigned long long int) * 8;
--- a/mmcv/ops/csrc/common/cuda/nms_quadri_cuda.cuh
+++ b/mmcv/ops/csrc/common/cuda/nms_quadri_cuda.cuh
@ -2,12 +2,8 @@
 #ifndef NMS_QUADRI_CUDA_CUH
 #define NMS_QUADRI_CUDA_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
-#include "pytorch_cuda_helper.hpp"
-#endif
 #include "box_iou_rotated_utils.hpp"
+#include "pytorch_cuda_helper.hpp"

 __host__ __device__ inline int divideUP(const int x, const int y) {
  return (((x) + (y)-1) / (y));
--- a/mmcv/ops/csrc/common/cuda/nms_rotated_cuda.cuh
+++ b/mmcv/ops/csrc/common/cuda/nms_rotated_cuda.cuh
@ -4,12 +4,8 @@
 #ifndef NMS_ROTATED_CUDA_CUH
 #define NMS_ROTATED_CUDA_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
-#include "pytorch_cuda_helper.hpp"
-#endif
 #include "box_iou_rotated_utils.hpp"
+#include "pytorch_cuda_helper.hpp"

 __host__ __device__ inline int divideUP(const int x, const int y) {
  return (((x) + (y)-1) / (y));
--- a/mmcv/ops/csrc/common/cuda/parrots_cudawarpfunction.cuh
+++ b/mmcv/ops/csrc/common/cuda/parrots_cudawarpfunction.cuh
@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2019, SenseTime.
- */
-
-#ifndef INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_
-#define INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_
-
-#ifndef __CUDACC__
-#error cudawarpfunction.cuh should only be included by .cu files
-#endif
-#include <cuda.h>
-
-#include <parrots/foundation/common.hpp>
-
-#ifdef PARROTS_USE_HALF
-#include <cuda_fp16.h>
-#endif
-#ifdef __CUDA_ARCH__
-#define CUDA_INTRINSIC_FUNC(Expr) Expr
-#else
-#define CUDA_INTRINSIC_FUNC(Expr)
-#endif
-
-#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
-
-#ifdef PARROTS_USE_HALF
-
-#if CUDA_VERSION < 9000
-
-__device__ inline float16 __shfl(float16 var, int srcLane, int width) {
-  CUDA_INTRINSIC_FUNC(return __shfl(var.y, srcLane, width););
-}
-
-__device__ inline float16 __shfl_up(float16 var, unsigned delta, int width) {
-  CUDA_INTRINSIC_FUNC(return __shfl_up(var.y, delta, width););
-}
-
-__device__ inline float16 __shfl_down(float16 var, unsigned delta, int width) {
-  CUDA_INTRINSIC_FUNC(return __shfl_down(var.y, delta, width););
-}
-
-__device__ inline float16 __shfl_xor(float16 var, int laneMask, int width) {
-  CUDA_INTRINSIC_FUNC(return __shfl_xor(var.y, laneMask, width););
-}
-
-#else  // CUDA_VERSION >= 9000
-
-__device__ inline float16 __shfl_sync(unsigned mask, float16 var, int srcLane,
-                                      int width = warpSize) {
-  CUDA_INTRINSIC_FUNC(float16 r; r.y = __shfl_sync(mask, var.y, srcLane, width);
-                      return r;);
-}
-
-__device__ inline float16 __shfl_up_sync(unsigned mask, float16 var,
-                                         unsigned delta, int width = warpSize) {
-  CUDA_INTRINSIC_FUNC(
-      float16 r; r.y = __shfl_up_sync(mask, var.y, delta, width); return r;);
-}
-
-__device__ inline float16 __shfl_down_sync(unsigned mask, float16 var,
-                                           unsigned delta,
-                                           int width = warpSize) {
-  CUDA_INTRINSIC_FUNC(
-      float16 r; r.y = __shfl_down_sync(mask, var.y, delta, width); return r;);
-}
-
-__device__ inline float16 __shfl_xor_sync(unsigned mask, float16 var,
-                                          int laneMask, int width) {
-  CUDA_INTRINSIC_FUNC(float16 r;
-                      r.y = __shfl_xor_sync(mask, var.y, laneMask, width);
-                      return r;);
-}
-
-#endif  // CUDA_VERSION < 9000
-
-#endif  // PARROTS_USE_HALF
-
-// warp shuffle interface with a dummy mask
-#if CUDA_VERSION < 9000
-
-template <typename T>
-__device__ inline T __shfl_sync(unsigned mask, T var, int srcLane,
-                                int width = warpSize) {
-  CUDA_INTRINSIC_FUNC(return __shfl(var, srcLane, width););
-}
-
-template <typename T>
-__device__ inline T __shfl_up_sync(unsigned mask, T var, unsigned delta,
-                                   int width = warpSize) {
-  CUDA_INTRINSIC_FUNC(return __shfl_up(var, delta, width););
-}
-
-template <typename T>
-__device__ inline T __shfl_down_sync(unsigned mask, T var, unsigned delta,
-                                     int width = warpSize) {
-  CUDA_INTRINSIC_FUNC(return __shfl_down(var, delta, width););
-}
-
-template <typename T>
-__device__ inline T __shfl_xor_sync(unsigned mask, T var, int laneMask,
-                                    int width = warpSize) {
-  CUDA_INTRINSIC_FUNC(return __shfl_xor(var, laneMask, width););
-}
-
-#endif  // CUDA_VERSION < 9000
-
-#endif  // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
-
-#endif  // INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_
--- a/mmcv/ops/csrc/common/cuda/points_in_boxes_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/points_in_boxes_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef POINT_IN_BOXES_CUDA_KERNEL_CUH
 #define POINT_IN_BOXES_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 template <typename T>
 __device__ inline void lidar_to_local_coords(T shift_x, T shift_y, T rz,
--- a/mmcv/ops/csrc/common/cuda/points_in_polygons_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/points_in_polygons_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef POINTS_IN_POLYGONS_CUDA_KERNEL_CUH
 #define POINTS_IN_POLYGONS_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 struct point {
  float x, y;
--- a/mmcv/ops/csrc/common/cuda/prroi_pool_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/prroi_pool_cuda_kernel.cuh
@ -5,11 +5,7 @@
 #ifndef PRROI_POOL_CUDA_KERNEL_CUH
 #define PRROI_POOL_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 template <typename T>
 __device__ static __forceinline__ T PrRoIPoolingGetData(const T *data,
--- a/mmcv/ops/csrc/common/cuda/psamask_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/psamask_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef PSAMASK_CUDA_KERNEL_CUH
 #define PSAMASK_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 // CUDA: grid stride looping
 #ifndef CUDA_KERNEL_LOOP
--- a/mmcv/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh
@ -4,11 +4,8 @@
 #define RIROI_ALIGN_ROTATED_CUDA_KERNEL_CUH

 #include <float.h>
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else  // MMCV_USE_PARROTS
+
 #include "pytorch_cuda_helper.hpp"
-#endif  // MMCV_USE_PARROTS

 /*** Forward ***/
 template <typename scalar_t>
--- a/mmcv/ops/csrc/common/cuda/roi_align_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/roi_align_cuda_kernel.cuh
@ -6,11 +6,7 @@
 #ifdef MMCV_WITH_TRT
 #include "common_cuda_helper.hpp"
 #else  // MMCV_WITH_TRT
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else  // MMCV_USE_PARROTS
 #include "pytorch_cuda_helper.hpp"
-#endif  // MMCV_USE_PARROTS
 #endif  // MMCV_WITH_TRT

 /*** Forward ***/
--- a/mmcv/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh
@ -8,11 +8,7 @@
 #ifdef MMCV_WITH_TRT
 #include "common_cuda_helper.hpp"
 #else  // MMCV_WITH_TRT
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else  // MMCV_USE_PARROTS
 #include "pytorch_cuda_helper.hpp"
-#endif  // MMCV_USE_PARROTS
 #endif  // MMCV_WITH_TRT

 /*** Forward ***/
--- a/mmcv/ops/csrc/common/cuda/roi_pool_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/roi_pool_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef ROI_POOL_CUDA_KERNEL_CUH
 #define ROI_POOL_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 template <typename T>
 __global__ void roi_pool_forward_cuda_kernel(
--- a/mmcv/ops/csrc/common/cuda/roiaware_pool3d_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/roiaware_pool3d_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef ROIAWARE_POOL3D_CUDA_KERNEL_CUH
 #define ROIAWARE_POOL3D_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 template <typename T>
 __device__ inline void lidar_to_local_coords(T shift_x, T shift_y, T rz,
--- a/mmcv/ops/csrc/common/cuda/roipoint_pool3d_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/roipoint_pool3d_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef ROIPOINT_POOL3D_CUDA_KERNEL_CUH
 #define ROIPOINT_POOL3D_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 template <typename T>
 __device__ inline void lidar_to_local_coords(T shift_x, T shift_y, T rz,
--- a/mmcv/ops/csrc/common/cuda/rotated_feature_align_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/rotated_feature_align_cuda_kernel.cuh
@ -4,11 +4,7 @@
 #ifndef ROTATED_FEATURE_ALIGN_CUDA_KERNEL_CUH
 #define ROTATED_FEATURE_ALIGN_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 template <typename scalar_t>
 __global__ void rotated_feature_align_forward_kernel(
--- a/mmcv/ops/csrc/common/cuda/scatter_points_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/scatter_points_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef SCATTER_POINTS_CUDA_KERNEL_CUH
 #define SCATTER_POINTS_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t;
 int const maxGridDim = 50000;
--- a/mmcv/ops/csrc/common/cuda/sigmoid_focal_loss_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/sigmoid_focal_loss_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef SIGMOID_FOCAL_LOSS_CUDA_KERNEL_CUH
 #define SIGMOID_FOCAL_LOSS_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 template <typename T>
 __global__ void sigmoid_focal_loss_forward_cuda_kernel(
--- a/mmcv/ops/csrc/common/cuda/softmax_focal_loss_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/softmax_focal_loss_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef SOFTMAX_FOCAL_LOSS_CUDA_KERNEL_CUH
 #define SOFTMAX_FOCAL_LOSS_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 template <typename T>
 __global__ void softmax_focal_loss_forward_cuda_kernel(
--- a/mmcv/ops/csrc/common/cuda/stack_ball_query_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/stack_ball_query_cuda_kernel.cuh
@ -4,11 +4,7 @@
 #ifndef STACK_BALL_QUERY_CUDA_KERNEL_CUH
 #define STACK_BALL_QUERY_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 template <typename T>
 __global__ void stack_ball_query_forward_cuda_kernel(
--- a/mmcv/ops/csrc/common/cuda/stack_group_points_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/stack_group_points_cuda_kernel.cuh
@ -3,12 +3,9 @@
 // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points_gpu.cu
 #ifndef STACK_GROUP_POINTS_CUDA_KERNEL_CUH
 #define STACK_GROUP_POINTS_CUDA_KERNEL_CUH
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
-#include "pytorch_cuda_helper.hpp"
-#endif
 #include <stdio.h>
+
+#include "pytorch_cuda_helper.hpp"
 template <typename T>
 __global__ void stack_group_points_forward_cuda_kernel(
    int b, int c, int m, int nsample, const T *features,
--- a/mmcv/ops/csrc/common/cuda/sync_bn_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/sync_bn_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef SYNCBN_CUDA_KERNEL_CUH
 #define SYNCBN_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 template <typename T>
 __global__ void sync_bn_forward_mean_cuda_kernel(const T *input, float *mean,
--- a/mmcv/ops/csrc/common/cuda/three_interpolate_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/three_interpolate_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef THREE_INTERPOLATE_CUDA_KERNEL_CUH
 #define THREE_INTERPOLATE_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 template <typename T>
 __global__ void three_interpolate_forward_cuda_kernel(
--- a/mmcv/ops/csrc/common/cuda/three_nn_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/three_nn_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef THREE_NN_CUDA_KERNEL_CUH
 #define THREE_NN_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 template <typename T>
 __global__ void three_nn_forward_cuda_kernel(int b, int n, int m,
--- a/mmcv/ops/csrc/common/cuda/tin_shift_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/tin_shift_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef TIN_SHIFT_CUDA_KERNEL_CUH
 #define TIN_SHIFT_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 template <typename T>
 __global__ void tin_shift_forward_cuda_kernel(
--- a/mmcv/ops/csrc/common/cuda/voxelization_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/voxelization_cuda_kernel.cuh
@ -2,11 +2,7 @@
 #ifndef VOXELIZATION_CUDA_KERNEL_CUH
 #define VOXELIZATION_CUDA_KERNEL_CUH

-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif

 typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t;

--- a/mmcv/ops/csrc/common/parrots_cpp_helper.hpp
+++ b/mmcv/ops/csrc/common/parrots_cpp_helper.hpp
@ -1,40 +0,0 @@
-#ifndef PARROTS_CPP_HELPER
-#define PARROTS_CPP_HELPER
-#include <parrots/darray/darraymath.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/darraylite.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-#include <vector>
-
-using namespace parrots;
-
-#define PARROTS_PRIVATE_CASE_TYPE(prim_type, type, ...) \
-  case prim_type: {                                     \
-    using scalar_t = type;                              \
-    return __VA_ARGS__();                               \
-  }
-
-#define PARROTS_DISPATCH_FLOATING_TYPES(TYPE, ...)                  \
-  [&] {                                                             \
-    const auto& the_type = TYPE;                                    \
-    switch (the_type) {                                             \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__) \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__)  \
-      default:                                                      \
-        PARROTS_NOTSUPPORTED;                                       \
-    }                                                               \
-  }()
-
-#define PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(TYPE, ...)          \
-  [&] {                                                              \
-    const auto& the_type = TYPE;                                     \
-    switch (the_type) {                                              \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__)  \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__)   \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float16, float16, __VA_ARGS__) \
-      default:                                                       \
-        PARROTS_NOTSUPPORTED;                                        \
-    }                                                                \
-  }()
-
-#endif  // PARROTS_CPP_HELPER
--- a/mmcv/ops/csrc/common/parrots_cuda_helper.hpp
+++ b/mmcv/ops/csrc/common/parrots_cuda_helper.hpp
@ -1,111 +0,0 @@
-#ifndef PARROTS_CUDA_HELPER
-#define PARROTS_CUDA_HELPER
-
-#include <cuda.h>
-#include <float.h>
-
-#include <parrots/darray/darraymath.hpp>
-#include <parrots/darray/mathfunctions.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/darrayutil.hpp>
-#include <parrots/foundation/exceptions.hpp>
-#include <parrots/foundation/float16.hpp>
-#include <parrots/foundation/mathfunction.hpp>
-
-#include "common_cuda_helper.hpp"
-#include "parrots_cudawarpfunction.cuh"
-
-using namespace parrots;
-using phalf = float16;
-
-#define __PHALF(x) (x.y)
-
-#define PARROTS_CUDA_CHECK(exp)                         \
-  do {                                                  \
-    cudaError_t err = exp;                              \
-    if (err != cudaSuccess) {                           \
-      fprintf(stderr, "cudaCheckError() failed : %s\n", \
-              cudaGetErrorString(err));                 \
-      exit(-1);                                         \
-    }                                                   \
-  } while (0)
-
-#define PARROTS_PRIVATE_CASE_TYPE(prim_type, type, ...) \
-  case prim_type: {                                     \
-    using scalar_t = type;                              \
-    return __VA_ARGS__();                               \
-  }
-
-#define PARROTS_DISPATCH_FLOATING_TYPES(TYPE, ...)                  \
-  [&] {                                                             \
-    const auto& the_type = TYPE;                                    \
-    switch (the_type) {                                             \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__) \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__)  \
-      default:                                                      \
-        PARROTS_NOTSUPPORTED;                                       \
-    }                                                               \
-  }()
-
-#define PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(TYPE, ...)          \
-  [&] {                                                              \
-    const auto& the_type = TYPE;                                     \
-    switch (the_type) {                                              \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__)  \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__)   \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float16, float16, __VA_ARGS__) \
-      default:                                                       \
-        PARROTS_NOTSUPPORTED;                                        \
-    }                                                                \
-  }()
-
-/** atomicAdd **/
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
-
-static __inline__ __device__ double atomicAdd(double* address, double val) {
-  unsigned long long int* address_as_ull = (unsigned long long int*)address;
-  unsigned long long int old = *address_as_ull, assumed;
-  if (val == 0.0) return __longlong_as_double(old);
-  do {
-    assumed = old;
-    old = atomicCAS(address_as_ull, assumed,
-                    __double_as_longlong(val + __longlong_as_double(assumed)));
-  } while (assumed != old);
-  return __longlong_as_double(old);
-}
-
-#endif
-
-static __inline__ __device__ float16 atomicAdd(float16* address, float16 val) {
-  unsigned int* aligned =
-      (unsigned int*)((size_t)address - ((size_t)address & 2));
-  unsigned int old = *aligned;
-  unsigned int assumed;
-  unsigned short old_as_us;
-  do {
-    assumed = old;
-    old_as_us =
-        (unsigned short)((size_t)address & 2 ? old >> 16 : old & 0xffff);
-
-#if __CUDACC_VER_MAJOR__ >= 9
-    float16 tmp;
-    tmp.x = old_as_us;
-    float16 sum = tmp + val;
-    unsigned short sum_as_us = sum.x;
-//         half sum = __float2half_rn(__half2float(__ushort_as_half(old_as_us))
-//         + (float)(val)); unsigned short sum_as_us = __half_as_ushort(sum);
-#else
-    unsigned short sum_as_us =
-        __float2half_rn(__half2float(old_as_us) + (float)(val));
-#endif
-
-    unsigned int sum_as_ui = (size_t)address & 2
-                                 ? (sum_as_us << 16) | (old & 0xffff)
-                                 : (old & 0xffff0000) | sum_as_us;
-    old = atomicCAS(aligned, assumed, sum_as_ui);
-  } while (assumed != old);
-  //__half_raw raw = {old_as_us};
-  // return float16(raw);
-  return *reinterpret_cast<float16*>(&old_as_us);
-}
-#endif  // PARROTS_CUDA_HELPER
--- a/mmcv/ops/csrc/parrots/active_rotated_filter.cpp
+++ b/mmcv/ops/csrc/parrots/active_rotated_filter.cpp
@ -1,28 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-// Modified from
-// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/orn/src/ActiveRotatingFilter.h
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void active_rotated_filter_forward_impl(const Tensor input,
-                                        const Tensor indices, Tensor output) {
-  DISPATCH_DEVICE_IMPL(active_rotated_filter_forward_impl, input, indices,
-                       output);
-}
-
-void active_rotated_filter_backward_impl(const Tensor grad_out,
-                                         const Tensor indices, Tensor grad_in) {
-  DISPATCH_DEVICE_IMPL(active_rotated_filter_backward_impl, grad_out, indices,
-                       grad_in);
-}
-
-void active_rotated_filter_forward(const Tensor input, const Tensor indices,
-                                   Tensor output) {
-  active_rotated_filter_forward_impl(input, indices, output);
-}
-
-void active_rotated_filter_backward(const Tensor grad_out, const Tensor indices,
-                                    Tensor grad_in) {
-  active_rotated_filter_backward_impl(grad_out, indices, grad_in);
-}
--- a/mmcv/ops/csrc/parrots/active_rotated_filter_parrots.cpp
+++ b/mmcv/ops/csrc/parrots/active_rotated_filter_parrots.cpp
@ -1,63 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "active_rotated_filter_pytorch.h"
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void active_rotated_filter_forward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  auto input = buildATensor(ctx, ins[0]);
-  auto indices = buildATensor(ctx, ins[1]);
-  auto output = buildATensor(ctx, outs[0]);
-  active_rotated_filter_forward(input, indices, output);
-}
-
-void active_rotated_filter_backward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  auto grad_out = buildATensor(ctx, ins[0]);
-  auto indices = buildATensor(ctx, ins[1]);
-  auto grad_in = buildATensor(ctx, outs[0]);
-  active_rotated_filter_backward(grad_out, indices, grad_in);
-}
-#endif
-
-void active_rotated_filter_forward_cpu_parrots(
-    HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  auto input = buildATensor(ctx, ins[0]);
-  auto indices = buildATensor(ctx, ins[1]);
-  auto output = buildATensor(ctx, outs[0]);
-  active_rotated_filter_forward(input, indices, output);
-}
-
-void active_rotated_filter_backward_cpu_parrots(
-    HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  auto grad_out = buildATensor(ctx, ins[0]);
-  auto indices = buildATensor(ctx, ins[1]);
-  auto grad_in = buildATensor(ctx, outs[0]);
-  active_rotated_filter_backward(grad_out, indices, grad_in);
-}
-
-PARROTS_EXTENSION_REGISTER(active_rotated_filter_forward)
-    .input(2)
-    .output(1)
-    .apply(active_rotated_filter_forward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(active_rotated_filter_forward_cuda_parrots)
-#endif
-    .done();
-
-PARROTS_EXTENSION_REGISTER(active_rotated_filter_backward)
-    .input(2)
-    .output(1)
-    .apply(active_rotated_filter_backward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(active_rotated_filter_backward_cuda_parrots)
-#endif
-    .done();
--- a/mmcv/ops/csrc/parrots/active_rotated_filter_pytorch.h
+++ b/mmcv/ops/csrc/parrots/active_rotated_filter_pytorch.h
@ -1,13 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef ACTIVE_ROTATED_FILTER_PYTORCH_H
-#define ACTIVE_ROTATED_FILTER_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void active_rotated_filter_forward(const Tensor input, const Tensor indices,
-                                   Tensor output);
-
-void active_rotated_filter_backward(const Tensor grad_out, const Tensor indices,
-                                    Tensor grad_in);
-
-#endif  // ACTIVE_ROTATED_FILTER_PYTORCH_H
--- a/mmcv/ops/csrc/parrots/assign_score_withk.cpp
+++ b/mmcv/ops/csrc/parrots/assign_score_withk.cpp
@ -1,42 +0,0 @@
-// Modified from
-// https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/paconv_lib/src/gpu
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void assign_score_withk_forward_impl(int B, int N0, int N1, int M, int K, int O,
-                                     int aggregate, const Tensor& points,
-                                     const Tensor& centers,
-                                     const Tensor& scores,
-                                     const Tensor& knn_idx, Tensor& output) {
-  DISPATCH_DEVICE_IMPL(assign_score_withk_forward_impl, B, N0, N1, M, K, O,
-                       aggregate, points, centers, scores, knn_idx, output);
-}
-
-void assign_score_withk_backward_impl(
-    int B, int N0, int N1, int M, int K, int O, int aggregate,
-    const Tensor& grad_out, const Tensor& points, const Tensor& centers,
-    const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
-    Tensor& grad_centers, Tensor& grad_scores) {
-  DISPATCH_DEVICE_IMPL(assign_score_withk_backward_impl, B, N0, N1, M, K, O,
-                       aggregate, grad_out, points, centers, scores, knn_idx,
-                       grad_points, grad_centers, grad_scores);
-}
-
-void assign_score_withk_forward(const Tensor& points, const Tensor& centers,
-                                const Tensor& scores, const Tensor& knn_idx,
-                                Tensor& output, int B, int N0, int N1, int M,
-                                int K, int O, int aggregate) {
-  assign_score_withk_forward_impl(B, N0, N1, M, K, O, aggregate, points,
-                                  centers, scores, knn_idx, output);
-}
-
-void assign_score_withk_backward(const Tensor& grad_out, const Tensor& points,
-                                 const Tensor& centers, const Tensor& scores,
-                                 const Tensor& knn_idx, Tensor& grad_points,
-                                 Tensor& grad_centers, Tensor& grad_scores,
-                                 int B, int N0, int N1, int M, int K, int O,
-                                 int aggregate) {
-  assign_score_withk_backward_impl(B, N0, N1, M, K, O, aggregate, grad_out,
-                                   points, centers, scores, knn_idx,
-                                   grad_points, grad_centers, grad_scores);
-}
--- a/mmcv/ops/csrc/parrots/assign_score_withk_parrots.cpp
+++ b/mmcv/ops/csrc/parrots/assign_score_withk_parrots.cpp
@ -1,89 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "assign_score_withk_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void assign_score_withk_forward_cuda_parrots(CudaContext& ctx,
-                                             const SSElement& attr,
-                                             const OperatorBase::in_list_t& ins,
-                                             OperatorBase::out_list_t& outs) {
-  int B, N0, N1, M, K, O, aggregate;
-  SSAttrs(attr)
-      .get<int>("B", B)
-      .get<int>("N0", N0)
-      .get<int>("N1", N1)
-      .get<int>("M", M)
-      .get<int>("K", K)
-      .get<int>("O", O)
-      .get<int>("aggregate", aggregate)
-      .done();
-
-  const auto& points = buildATensor(ctx, ins[0]);
-  const auto& centers = buildATensor(ctx, ins[1]);
-  const auto& scores = buildATensor(ctx, ins[2]);
-  const auto& knn_idx = buildATensor(ctx, ins[3]);
-
-  auto output = buildATensor(ctx, outs[0]);
-  assign_score_withk_forward(points, centers, scores, knn_idx, output, B, N0,
-                             N1, M, K, O, aggregate);
-}
-
-void assign_score_withk_backward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  int B, N0, N1, M, K, O, aggregate;
-  SSAttrs(attr)
-      .get<int>("B", B)
-      .get<int>("N0", N0)
-      .get<int>("N1", N1)
-      .get<int>("M", M)
-      .get<int>("K", K)
-      .get<int>("O", O)
-      .get<int>("aggregate", aggregate)
-      .done();
-
-  const auto& grad_out = buildATensor(ctx, ins[0]);
-  const auto& points = buildATensor(ctx, ins[1]);
-  const auto& centers = buildATensor(ctx, ins[2]);
-  const auto& scores = buildATensor(ctx, ins[3]);
-  const auto& knn_idx = buildATensor(ctx, ins[4]);
-
-  auto grad_points = buildATensor(ctx, outs[0]);
-  auto grad_centers = buildATensor(ctx, outs[1]);
-  auto grad_scores = buildATensor(ctx, outs[2]);
-  assign_score_withk_backward(grad_out, points, centers, scores, knn_idx,
-                              grad_points, grad_centers, grad_scores, B, N0, N1,
-                              M, K, O, aggregate);
-}
-
-PARROTS_EXTENSION_REGISTER(assign_score_withk_forward)
-    .attr("B")
-    .attr("N0")
-    .attr("N1")
-    .attr("M")
-    .attr("K")
-    .attr("O")
-    .attr("aggregate")
-    .input(4)
-    .output(1)
-    .apply(assign_score_withk_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(assign_score_withk_backward)
-    .attr("B")
-    .attr("N0")
-    .attr("N1")
-    .attr("M")
-    .attr("K")
-    .attr("O")
-    .attr("aggregate")
-    .input(5)
-    .output(3)
-    .apply(assign_score_withk_backward_cuda_parrots)
-    .done();
-#endif
--- a/mmcv/ops/csrc/parrots/assign_score_withk_pytorch.h
+++ b/mmcv/ops/csrc/parrots/assign_score_withk_pytorch.h
@ -1,19 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef ASSIGN_SCORE_WITHK_PYTORCH_H
-#define ASSIGN_SCORE_WITHK_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void assign_score_withk_forward(const Tensor& points, const Tensor& centers,
-                                const Tensor& scores, const Tensor& knn_idx,
-                                Tensor& output, int B, int N0, int N1, int M,
-                                int K, int O, int aggregate);
-
-void assign_score_withk_backward(const Tensor& grad_out, const Tensor& points,
-                                 const Tensor& centers, const Tensor& scores,
-                                 const Tensor& knn_idx, Tensor& grad_points,
-                                 Tensor& grad_centers, Tensor& grad_scores,
-                                 int B, int N0, int N1, int M, int K, int O,
-                                 int aggregate);
-
-#endif  // ASSIGN_SCORE_WITHK_PYTORCH_H
--- a/mmcv/ops/csrc/parrots/ball_query._parrots.cpp
+++ b/mmcv/ops/csrc/parrots/ball_query._parrots.cpp
@ -1,43 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "ball_query_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void ball_query_parrots(CudaContext& ctx, const SSElement& attr,
-                        const OperatorBase::in_list_t& ins,
-                        OperatorBase::out_list_t& outs) {
-  int b, n, m, nsample;
-  float min_radius, max_radius;
-  SSAttrs(attr)
-      .get<int>("b", b)
-      .get<int>("n", n)
-      .get<int>("m", m)
-      .get<int>("nsample", nsample)
-      .get<float>("min_radius", min_radius)
-      .get<float>("max_radius", max_radius)
-      .done();
-
-  const auto& center_xyz = buildATensor(ctx, ins[0]);
-  const auto& xyz = buildATensor(ctx, ins[1]);
-  auto idx = buildATensor(ctx, outs[0]);
-  ball_query_forward(center_xyz, xyz, idx, b, n, m, min_radius, max_radius,
-                     nsample);
-}
-
-PARROTS_EXTENSION_REGISTER(ball_query_forward)
-    .attr("b")
-    .attr("n")
-    .attr("m")
-    .attr("nsample")
-    .attr("min_radius")
-    .attr("max_radius")
-    .input(2)
-    .output(1)
-    .apply(ball_query_parrots)
-    .done();
-#endif
--- a/mmcv/ops/csrc/parrots/ball_query.cpp
+++ b/mmcv/ops/csrc/parrots/ball_query.cpp
@ -1,20 +0,0 @@
-// Modified from
-// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query.cpp
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void ball_query_forward_impl(int b, int n, int m, float min_radius,
-                             float max_radius, int nsample,
-                             const Tensor new_xyz, const Tensor xyz,
-                             Tensor idx) {
-  DISPATCH_DEVICE_IMPL(ball_query_forward_impl, b, n, m, min_radius, max_radius,
-                       nsample, new_xyz, xyz, idx);
-}
-
-void ball_query_forward(Tensor new_xyz_tensor, Tensor xyz_tensor,
-                        Tensor idx_tensor, int b, int n, int m,
-                        float min_radius, float max_radius, int nsample) {
-  ball_query_forward_impl(b, n, m, min_radius, max_radius, nsample,
-                          new_xyz_tensor, xyz_tensor, idx_tensor);
-}
--- a/mmcv/ops/csrc/parrots/ball_query_pytorch.h
+++ b/mmcv/ops/csrc/parrots/ball_query_pytorch.h
@ -1,11 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef BALL_QUERY_PYTORCH_H
-#define BALL_QUERY_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void ball_query_forward(const Tensor new_xyz, const Tensor xyz, Tensor idx,
-                        int b, int n, int m, float min_radius, float max_radius,
-                        int nsample);
-
-#endif  // BALL_QUERY_PYTORCH_H
--- a/mmcv/ops/csrc/parrots/bbox_overlaps.cpp
+++ b/mmcv/ops/csrc/parrots/bbox_overlaps.cpp
@ -1,14 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void bbox_overlaps_impl(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
-                        const int mode, const bool aligned, const int offset) {
-  DISPATCH_DEVICE_IMPL(bbox_overlaps_impl, bboxes1, bboxes2, ious, mode,
-                       aligned, offset);
-}
-
-void bbox_overlaps(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
-                   const int mode, const bool aligned, const int offset) {
-  bbox_overlaps_impl(bboxes1, bboxes2, ious, mode, aligned, offset);
-}
--- a/mmcv/ops/csrc/parrots/bbox_overlaps_parrots.cpp
+++ b/mmcv/ops/csrc/parrots/bbox_overlaps_parrots.cpp
@ -1,40 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "bbox_overlaps_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-/*
- * void bbox_overlaps_cuda(const Tensor bboxes1, const Tensor bboxes2, Tensor
- * ious, const int mode, const bool aligned, const int offset);
- */
-void bbox_overlaps_parrots(CudaContext& ctx, const SSElement& attr,
-                           const OperatorBase::in_list_t& ins,
-                           OperatorBase::out_list_t& outs) {
-  int mode, offset;
-  bool aligned;
-  SSAttrs(attr)
-      .get<int>("mode", mode)
-      .get<bool>("aligned", aligned)
-      .get<int>("offset", offset)
-      .done();
-
-  const auto& bboxes1 = buildATensor(ctx, ins[0]);
-  const auto& bboxes2 = buildATensor(ctx, ins[1]);
-  auto ious = buildATensor(ctx, outs[0]);
-  bbox_overlaps_cuda(bboxes1, bboxes2, ious, mode, aligned, offset);
-}
-
-PARROTS_EXTENSION_REGISTER(bbox_overlaps)
-    .attr("mode")
-    .attr("aligned")
-    .attr("offset")
-    .input(2)
-    .output(1)
-    .apply(bbox_overlaps_parrots)
-    .done();
-#endif
--- a/mmcv/ops/csrc/parrots/bbox_overlaps_pytorch.h
+++ b/mmcv/ops/csrc/parrots/bbox_overlaps_pytorch.h
@ -1,10 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef BBOX_OVERLAPS_PYTORCH_H
-#define BBOX_OVERLAPS_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void bbox_overlaps_cuda(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
-                        const int mode, const bool aligned, const int offset);
-
-#endif  // BBOX_OVERLAPS_PYTORCH_H
--- a/mmcv/ops/csrc/parrots/border_align.cpp
+++ b/mmcv/ops/csrc/parrots/border_align.cpp
@ -1,30 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void border_align_forward_impl(const Tensor &input, const Tensor &boxes,
-                               Tensor output, Tensor argmax_idx,
-                               const int pool_size) {
-  DISPATCH_DEVICE_IMPL(border_align_forward_impl, input, boxes, output,
-                       argmax_idx, pool_size);
-}
-
-void border_align_backward_impl(const Tensor &grad_output, const Tensor &boxes,
-                                const Tensor &argmax_idx, Tensor grad_input,
-                                const int pool_size) {
-  DISPATCH_DEVICE_IMPL(border_align_backward_impl, grad_output, boxes,
-                       argmax_idx, grad_input, pool_size);
-}
-
-void border_align_forward(const Tensor &input, const Tensor &boxes,
-                          Tensor output, Tensor argmax_idx,
-                          const int pool_size) {
-  border_align_forward_impl(input, boxes, output, argmax_idx, pool_size);
-}
-
-void border_align_backward(const Tensor &grad_output, const Tensor &boxes,
-                           const Tensor &argmax_idx, Tensor grad_input,
-                           const int pool_size) {
-  border_align_backward_impl(grad_output, boxes, argmax_idx, grad_input,
-                             pool_size);
-}
--- a/mmcv/ops/csrc/parrots/border_align_parrots.cpp
+++ b/mmcv/ops/csrc/parrots/border_align_parrots.cpp
@ -1,53 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "border_align_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void border_align_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                       const OperatorBase::in_list_t& ins,
-                                       OperatorBase::out_list_t& outs) {
-  int pool_size;
-  SSAttrs(attr).get<int>("pool_size", pool_size).done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& boxes = buildATensor(ctx, ins[1]);
-
-  auto output = buildATensor(ctx, outs[0]);
-  auto argmax_idx = buildATensor(ctx, outs[1]);
-  border_align_forward_cuda(input, boxes, output, argmax_idx, pool_size);
-}
-
-void border_align_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                        const OperatorBase::in_list_t& ins,
-                                        OperatorBase::out_list_t& outs) {
-  int pool_size;
-  SSAttrs(attr).get<int>("pool_size", pool_size).done();
-
-  const auto& top_grad = buildATensor(ctx, ins[0]);
-  const auto& boxes = buildATensor(ctx, ins[1]);
-  const auto& argmax_idx = buildATensor(ctx, ins[2]);
-
-  auto bottom_grad = buildATensor(ctx, outs[0]);
-  border_align_backward_cuda(top_grad, boxes, argmax_idx, bottom_grad,
-                             pool_size);
-}
-
-PARROTS_EXTENSION_REGISTER(border_align_forward)
-    .attr("pool_size")
-    .input(2)
-    .output(2)
-    .apply(border_align_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(border_align_backward)
-    .attr("pool_size")
-    .input(3)
-    .output(1)
-    .apply(border_align_backward_cuda_parrots)
-    .done();
-#endif
--- a/mmcv/ops/csrc/parrots/border_align_pytorch.h
+++ b/mmcv/ops/csrc/parrots/border_align_pytorch.h
@ -1,17 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef BORDER_ALIGN_PYTORCH_H
-#define BORDER_ALIGN_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-#ifdef MMCV_WITH_CUDA
-void border_align_forward_cuda(const Tensor &input, const Tensor &boxes,
-                               Tensor output, Tensor argmax_idx,
-                               const int pool_size);
-
-void border_align_backward_cuda(const Tensor &grad_output, const Tensor &boxes,
-                                const Tensor &argmax_idx, Tensor grad_input,
-                                const int pool_size);
-#endif
-
-#endif  // BORDER_ALIGN_PYTORCH_H
--- a/mmcv/ops/csrc/parrots/box_iou_rotated.cpp
+++ b/mmcv/ops/csrc/parrots/box_iou_rotated.cpp
@ -1,19 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-// modified from
-// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void box_iou_rotated_impl(const Tensor boxes1, const Tensor boxes2, Tensor ious,
-                          const int mode_flag, const bool aligned) {
-  DISPATCH_DEVICE_IMPL(box_iou_rotated_impl, boxes1, boxes2, ious, mode_flag,
-                       aligned);
-}
-
-// Interface for Python
-// inline is needed to prevent multiple function definitions when this header is
-// included by different cpps
-void box_iou_rotated(const Tensor boxes1, const Tensor boxes2, Tensor ious,
-                     const int mode_flag, const bool aligned) {
-  box_iou_rotated_impl(boxes1, boxes2, ious, mode_flag, aligned);
-}
--- a/mmcv/ops/csrc/parrots/box_iou_rotated_parrots.cpp
+++ b/mmcv/ops/csrc/parrots/box_iou_rotated_parrots.cpp
@ -1,61 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "box_iou_rotated_pytorch.h"
-
-using namespace parrots;
-
-/*
- * void box_iou_rotated_cpu(const Tensor boxes1, const Tensor boxes2, Tensor
- * ious, const int mode_flag, const bool aligned);
- */
-void box_iou_rotated_cpu_parrots(HostContext& ctx, const SSElement& attr,
-                                 const OperatorBase::in_list_t& ins,
-                                 OperatorBase::out_list_t& outs) {
-  bool aligned;
-  int mode_flag;
-  SSAttrs(attr)
-      .get<bool>("aligned", aligned)
-      .get<int>("mode_flag", mode_flag)
-      .done();
-
-  const auto& boxes1 = buildATensor(ctx, ins[0]);
-  const auto& boxes2 = buildATensor(ctx, ins[1]);
-  auto ious = buildATensor(ctx, outs[0]);
-  box_iou_rotated_cpu(boxes1, boxes2, ious, mode_flag, aligned);
-}
-
-#ifdef MMCV_WITH_CUDA
-/*
- * void box_iou_rotated_cuda(const Tensor boxes1, const Tensor boxes2, Tensor
- * ious, const int mode_flag, const bool aligned);
- */
-void box_iou_rotated_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                  const OperatorBase::in_list_t& ins,
-                                  OperatorBase::out_list_t& outs) {
-  bool aligned;
-  int mode_flag;
-  SSAttrs(attr)
-      .get<bool>("aligned", aligned)
-      .get<int>("mode_flag", mode_flag)
-      .done();
-
-  const auto& boxes1 = buildATensor(ctx, ins[0]);
-  const auto& boxes2 = buildATensor(ctx, ins[1]);
-  auto ious = buildATensor(ctx, outs[0]);
-  box_iou_rotated_cuda(boxes1, boxes2, ious, mode_flag, aligned);
-}
-#endif
-
-PARROTS_EXTENSION_REGISTER(box_iou_rotated)
-    .attr("aligned")
-    .attr("mode_flag")
-    .input(2)
-    .output(1)
-    .apply(box_iou_rotated_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(box_iou_rotated_cuda_parrots)
-#endif
-    .done();
--- a/mmcv/ops/csrc/parrots/box_iou_rotated_pytorch.h
+++ b/mmcv/ops/csrc/parrots/box_iou_rotated_pytorch.h
@ -1,15 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef BOX_IOU_ROTATED_PYTORCH_H
-#define BOX_IOU_ROTATED_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void box_iou_rotated_cpu(const Tensor boxes1, const Tensor boxes2, Tensor ious,
-                         const int mode_flag, const bool aligned);
-
-#ifdef MMCV_WITH_CUDA
-void box_iou_rotated_cuda(const Tensor boxes1, const Tensor boxes2, Tensor ious,
-                          const int mode_flag, const bool aligned);
-#endif
-
-#endif  // BOX_IOU_ROTATED_PYTORCH_H
--- a/mmcv/ops/csrc/parrots/carafe.cpp
+++ b/mmcv/ops/csrc/parrots/carafe.cpp
@ -1,38 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void carafe_forward_impl(Tensor features, Tensor masks, Tensor rfeatures,
-                         Tensor routput, Tensor rmasks, Tensor output,
-                         int kernel_size, int group_size, int scale_factor) {
-  DISPATCH_DEVICE_IMPL(carafe_forward_impl, features, masks, rfeatures, routput,
-                       rmasks, output, kernel_size, group_size, scale_factor);
-}
-
-void carafe_backward_impl(Tensor top_grad, Tensor rfeatures, Tensor masks,
-                          Tensor rtop_grad, Tensor rbottom_grad_hs,
-                          Tensor rbottom_grad, Tensor rmask_grad,
-                          Tensor bottom_grad, Tensor mask_grad, int kernel_size,
-                          int group_size, int scale_factor) {
-  DISPATCH_DEVICE_IMPL(carafe_backward_impl, top_grad, rfeatures, masks,
-                       rtop_grad, rbottom_grad_hs, rbottom_grad, rmask_grad,
-                       bottom_grad, mask_grad, kernel_size, group_size,
-                       scale_factor);
-}
-
-void carafe_forward(Tensor features, Tensor masks, Tensor rfeatures,
-                    Tensor routput, Tensor rmasks, Tensor output,
-                    int kernel_size, int group_size, int scale_factor) {
-  carafe_forward_impl(features, masks, rfeatures, routput, rmasks, output,
-                      kernel_size, group_size, scale_factor);
-}
-
-void carafe_backward(Tensor top_grad, Tensor rfeatures, Tensor masks,
-                     Tensor rtop_grad, Tensor rbottom_grad_hs,
-                     Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad,
-                     Tensor mask_grad, int kernel_size, int group_size,
-                     int scale_factor) {
-  carafe_backward_impl(top_grad, rfeatures, masks, rtop_grad, rbottom_grad_hs,
-                       rbottom_grad, rmask_grad, bottom_grad, mask_grad,
-                       kernel_size, group_size, scale_factor);
-}
--- a/mmcv/ops/csrc/parrots/carafe_naive.cpp
+++ b/mmcv/ops/csrc/parrots/carafe_naive.cpp
@ -1,32 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void carafe_naive_forward_impl(Tensor features, Tensor masks, Tensor output,
-                               int kernel_size, int group_size,
-                               int scale_factor) {
-  DISPATCH_DEVICE_IMPL(carafe_naive_forward_impl, features, masks, output,
-                       kernel_size, group_size, scale_factor);
-}
-
-void carafe_naive_backward_impl(Tensor top_grad, Tensor features, Tensor masks,
-                                Tensor bottom_grad, Tensor mask_grad,
-                                int kernel_size, int group_size,
-                                int scale_factor) {
-  DISPATCH_DEVICE_IMPL(carafe_naive_backward_impl, top_grad, features, masks,
-                       bottom_grad, mask_grad, kernel_size, group_size,
-                       scale_factor);
-}
-
-void carafe_naive_forward(Tensor features, Tensor masks, Tensor output,
-                          int kernel_size, int group_size, int scale_factor) {
-  carafe_naive_forward_impl(features, masks, output, kernel_size, group_size,
-                            scale_factor);
-}
-
-void carafe_naive_backward(Tensor top_grad, Tensor features, Tensor masks,
-                           Tensor bottom_grad, Tensor mask_grad,
-                           int kernel_size, int group_size, int scale_factor) {
-  carafe_naive_backward_impl(top_grad, features, masks, bottom_grad, mask_grad,
-                             kernel_size, group_size, scale_factor);
-}
--- a/mmcv/ops/csrc/parrots/carafe_naive_parrots.cpp
+++ b/mmcv/ops/csrc/parrots/carafe_naive_parrots.cpp
@ -1,74 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "carafe_naive_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-/*void carafe_naive_forward_cuda(Tensor features, Tensor masks, Tensor output,
- *                                int kernel_size, int group_size,
- *                                int scale_factor)
- */
-void carafe_naive_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                       const OperatorBase::in_list_t& ins,
-                                       OperatorBase::out_list_t& outs) {
-  int kernel_size, group_size, scale_factor;
-  SSAttrs(attr)
-      .get<int>("kernel_size", kernel_size)
-      .get<int>("group_size", group_size)
-      .get<int>("scale_factor", scale_factor)
-      .done();
-
-  const auto& features = buildATensor(ctx, ins[0]);
-  const auto& masks = buildATensor(ctx, ins[1]);
-
-  auto output = buildATensor(ctx, outs[0]);
-  carafe_naive_forward_cuda(features, masks, output, kernel_size, group_size,
-                            scale_factor);
-}
-
-/*void carafe_naive_backward_cuda(Tensor top_grad, Tensor features, Tensor
- * masks, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size,
- *                                int scale_factor);
- */
-void carafe_naive_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                        const OperatorBase::in_list_t& ins,
-                                        OperatorBase::out_list_t& outs) {
-  int kernel_size, group_size, scale_factor;
-  SSAttrs(attr)
-      .get<int>("kernel_size", kernel_size)
-      .get<int>("group_size", group_size)
-      .get<int>("scale_factor", scale_factor)
-      .done();
-
-  const auto& top_grad = buildATensor(ctx, ins[0]);
-  const auto& features = buildATensor(ctx, ins[1]);
-  const auto& masks = buildATensor(ctx, ins[2]);
-
-  auto bottom_grad = buildATensor(ctx, outs[0]);
-  auto mask_grad = buildATensor(ctx, outs[1]);
-  carafe_naive_backward_cuda(top_grad, features, masks, bottom_grad, mask_grad,
-                             kernel_size, group_size, scale_factor);
-}
-
-PARROTS_EXTENSION_REGISTER(carafe_naive_forward)
-    .attr("kernel_size")
-    .attr("group_size")
-    .attr("scale_factor")
-    .input(2)
-    .output(1)
-    .apply(carafe_naive_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(carafe_naive_backward)
-    .attr("kernel_size")
-    .attr("group_size")
-    .attr("scale_factor")
-    .input(3)
-    .output(2)
-    .apply(carafe_naive_backward_cuda_parrots)
-    .done();
-#endif
--- a/mmcv/ops/csrc/parrots/carafe_naive_pytorch.h
+++ b/mmcv/ops/csrc/parrots/carafe_naive_pytorch.h
@ -1,15 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef CARAFE_NAIVE_PYTORCH_H
-#define CARAFE_NAIVE_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void carafe_naive_forward_cuda(Tensor features, Tensor masks, Tensor output,
-                               int kernel_size, int group_size,
-                               int scale_factor);
-
-void carafe_naive_backward_cuda(Tensor top_grad, Tensor features, Tensor masks,
-                                Tensor bottom_grad, Tensor mask_grad,
-                                int kernel_size, int group_size,
-                                int scale_factor);
-#endif  // CARAFE_NAIVE_PYTORCH_H
--- a/mmcv/ops/csrc/parrots/carafe_parrots.cpp
+++ b/mmcv/ops/csrc/parrots/carafe_parrots.cpp
@ -1,88 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "carafe_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-/*
- * void carafe_forward_cuda(Tensor features, Tensor masks, Tensor rfeatures,
- *                          Tensor routput, Tensor rmasks, Tensor output,
- *                          int kernel_size, int group_size, int scale_factor);
- */
-void carafe_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                 const OperatorBase::in_list_t& ins,
-                                 OperatorBase::out_list_t& outs) {
-  int kernel_size, group_size, scale_factor;
-  SSAttrs(attr)
-      .get<int>("kernel_size", kernel_size)
-      .get<int>("group_size", group_size)
-      .get<int>("scale_factor", scale_factor)
-      .done();
-
-  const auto& features = buildATensor(ctx, ins[0]);
-  const auto& masks = buildATensor(ctx, ins[1]);
-
-  auto rfeatures = buildATensor(ctx, outs[0]);
-  auto routput = buildATensor(ctx, outs[1]);
-  auto rmasks = buildATensor(ctx, outs[2]);
-  auto output = buildATensor(ctx, outs[3]);
-
-  carafe_forward_cuda(features, masks, rfeatures, routput, rmasks, output,
-                      kernel_size, group_size, scale_factor);
-}
-
-/*
- * void carafe_backward_cuda(Tensor top_grad, Tensor rfeatures, Tensor masks,
- *                           Tensor rtop_grad, Tensor rbottom_grad_hs,
- *                           Tensor rbottom_grad, Tensor rmask_grad,
- *                           Tensor bottom_grad, Tensor mask_grad, int
- * kernel_size, int group_size, int scale_factor);
- */
-void carafe_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                  const OperatorBase::in_list_t& ins,
-                                  OperatorBase::out_list_t& outs) {
-  int kernel_size, group_size, scale_factor;
-  SSAttrs(attr)
-      .get<int>("kernel_size", kernel_size)
-      .get<int>("group_size", group_size)
-      .get<int>("scale_factor", scale_factor)
-      .done();
-
-  const auto& top_grad = buildATensor(ctx, ins[0]);
-  const auto& rfeatures = buildATensor(ctx, ins[1]);
-  const auto& masks = buildATensor(ctx, ins[2]);
-
-  auto rtop_grad = buildATensor(ctx, outs[0]);
-  auto rbottom_grad_hs = buildATensor(ctx, outs[1]);
-  auto rbottom_grad = buildATensor(ctx, outs[2]);
-  auto rmask_grad = buildATensor(ctx, outs[3]);
-  auto bottom_grad = buildATensor(ctx, outs[4]);
-  auto mask_grad = buildATensor(ctx, outs[5]);
-
-  carafe_backward_cuda(top_grad, rfeatures, masks, rtop_grad, rbottom_grad_hs,
-                       rbottom_grad, rmask_grad, bottom_grad, mask_grad,
-                       kernel_size, group_size, scale_factor);
-}
-
-PARROTS_EXTENSION_REGISTER(carafe_forward)
-    .attr("kernel_size")
-    .attr("group_size")
-    .attr("scale_factor")
-    .input(2)
-    .output(4)
-    .apply(carafe_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(carafe_backward)
-    .attr("kernel_size")
-    .attr("group_size")
-    .attr("scale_factor")
-    .input(3)
-    .output(6)
-    .apply(carafe_backward_cuda_parrots)
-    .done();
-#endif
--- a/mmcv/ops/csrc/parrots/carafe_pytorch.h
+++ b/mmcv/ops/csrc/parrots/carafe_pytorch.h
@ -1,16 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef CARAFE_PYTORCH_H
-#define CARAFE_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void carafe_forward_cuda(Tensor features, Tensor masks, Tensor rfeatures,
-                         Tensor routput, Tensor rmasks, Tensor output,
-                         int kernel_size, int group_size, int scale_factor);
-
-void carafe_backward_cuda(Tensor top_grad, Tensor rfeatures, Tensor masks,
-                          Tensor rtop_grad, Tensor rbottom_grad_hs,
-                          Tensor rbottom_grad, Tensor rmask_grad,
-                          Tensor bottom_grad, Tensor mask_grad, int kernel_size,
-                          int group_size, int scale_factor);
-#endif  // CARAFE_PYTORCH_H
--- a/mmcv/ops/csrc/parrots/chamfer_distance.cpp
+++ b/mmcv/ops/csrc/parrots/chamfer_distance.cpp
@ -1,35 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-// Modified from
-// https://github.com/chrdiller/pyTorchChamferDistance/blob/master/chamfer_distance/chamfer_distance.cpp
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void chamfer_distance_forward_impl(const Tensor xyz1, const Tensor xyz2,
-                                   const Tensor dist1, const Tensor dist2,
-                                   const Tensor idx1, const Tensor idx2) {
-  DISPATCH_DEVICE_IMPL(chamfer_distance_forward_impl, xyz1, xyz2, dist1, dist2,
-                       idx1, idx2);
-}
-
-void chamfer_distance_backward_impl(const Tensor xyz1, const Tensor xyz2,
-                                    Tensor idx1, Tensor idx2, Tensor graddist1,
-                                    Tensor graddist2, Tensor gradxyz1,
-                                    Tensor gradxyz2) {
-  DISPATCH_DEVICE_IMPL(chamfer_distance_backward_impl, xyz1, xyz2, idx1, idx2,
-                       graddist1, graddist2, gradxyz1, gradxyz2);
-}
-
-void chamfer_distance_forward(const Tensor xyz1, const Tensor xyz2,
-                              const Tensor dist1, const Tensor dist2,
-                              const Tensor idx1, const Tensor idx2) {
-  chamfer_distance_forward_impl(xyz1, xyz2, dist1, dist2, idx1, idx2);
-}
-
-void chamfer_distance_backward(const Tensor xyz1, const Tensor xyz2,
-                               Tensor idx1, Tensor idx2, Tensor graddist1,
-                               Tensor graddist2, Tensor gradxyz1,
-                               Tensor gradxyz2) {
-  chamfer_distance_backward_impl(xyz1, xyz2, idx1, idx2, graddist1, graddist2,
-                                 gradxyz1, gradxyz2);
-}
--- a/mmcv/ops/csrc/parrots/chamfer_distance_parrots.cpp
+++ b/mmcv/ops/csrc/parrots/chamfer_distance_parrots.cpp
@ -1,51 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "chamfer_distance_pytorch.h"
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void chamfer_distance_forward_cuda_parrots(CudaContext& ctx,
-                                           const SSElement& attr,
-                                           const OperatorBase::in_list_t& ins,
-                                           OperatorBase::out_list_t& outs) {
-  auto xyz1 = buildATensor(ctx, ins[0]);
-  auto xyz2 = buildATensor(ctx, ins[1]);
-  auto dist1 = buildATensor(ctx, outs[0]);
-  auto dist2 = buildATensor(ctx, outs[1]);
-  auto idx1 = buildATensor(ctx, outs[2]);
-  auto idx2 = buildATensor(ctx, outs[3]);
-  chamfer_distance_forward(xyz1, xyz2, dist1, dist2, idx1, idx2);
-}
-
-void chamfer_distance_backward_cuda_parrots(CudaContext& ctx,
-                                            const SSElement& attr,
-                                            const OperatorBase::in_list_t& ins,
-                                            OperatorBase::out_list_t& outs) {
-  auto xyz1 = buildATensor(ctx, ins[0]);
-  auto xyz2 = buildATensor(ctx, ins[1]);
-  auto idx1 = buildATensor(ctx, ins[2]);
-  auto idx2 = buildATensor(ctx, ins[3]);
-  auto graddist1 = buildATensor(ctx, ins[4]);
-  auto graddist2 = buildATensor(ctx, ins[5]);
-  auto gradxyz1 = buildATensor(ctx, outs[0]);
-  auto gradxyz2 = buildATensor(ctx, outs[1]);
-  chamfer_distance_backward(xyz1, xyz2, idx1, idx2, graddist1, graddist2,
-                            gradxyz1, gradxyz2);
-}
-
-PARROTS_EXTENSION_REGISTER(chamfer_distance_forward)
-    .input(2)
-    .output(4)
-    .apply(chamfer_distance_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(chamfer_distance_backward)
-    .input(6)
-    .output(2)
-    .apply(chamfer_distance_backward_cuda_parrots)
-    .done();
-
-#endif
--- a/mmcv/ops/csrc/parrots/chamfer_distance_pytorch.h
+++ b/mmcv/ops/csrc/parrots/chamfer_distance_pytorch.h
@ -1,16 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef ACTIVE_CHAMFER_DISTANCE_PYTORCH_H
-#define ACTIVE_CHAMFER_DISTANCE_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void chamfer_distance_forward(const Tensor xyz1, const Tensor xyz2,
-                              const Tensor dist1, const Tensor dist2,
-                              const Tensor idx1, const Tensor idx);
-
-void chamfer_distance_backward(const Tensor xyz1, const Tensor xyz2,
-                               Tensor idx1, Tensor idx2, Tensor graddist1,
-                               Tensor graddist2, Tensor gradxyz1,
-                               Tensor gradxyz2);
-
-#endif  // ACTIVE_CHAMFER_DISTANCE_PYTORCH_H
--- a/mmcv/ops/csrc/parrots/contour_expand.cpp
+++ b/mmcv/ops/csrc/parrots/contour_expand.cpp
@ -1,111 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-// It is modified from https://github.com/whai362/PSENet
-#include <iostream>
-#include <queue>
-
-#include "pytorch_cpp_helper.hpp"
-
-using namespace std;
-
-class Point2d {
- public:
-  int x;
-  int y;
-
-  Point2d() : x(0), y(0) {}
-  Point2d(int _x, int _y) : x(_x), y(_y) {}
-};
-
-void kernel_dilate(const uint8_t *data, IntArrayRef data_shape,
-                   const int *label_map, int &label_num, int &min_area,
-                   vector<vector<int>> &text_line) {
-  std::vector<int> area(label_num + 1);
-  int kernel_num = data_shape[0];
-  int height = data_shape[1];
-  int width = data_shape[2];
-
-  for (int x = 0; x < height; ++x) {
-    for (int y = 0; y < width; ++y) {
-      int label = label_map[x * width + y];
-      if (label == 0) continue;
-      area[label] += 1;
-    }
-  }
-
-  queue<Point2d> queue, next_queue;
-  for (int x = 0; x < height; ++x) {
-    vector<int> row(width);
-    for (int y = 0; y < width; ++y) {
-      int label = label_map[x * width + y];
-      if (label == 0) continue;
-      if (area[label] < min_area) continue;
-
-      Point2d point(x, y);
-      queue.push(point);
-      row[y] = label;
-    }
-    text_line.emplace_back(row);
-  }
-
-  int dx[] = {-1, 1, 0, 0};
-  int dy[] = {0, 0, -1, 1};
-  vector<int> kernel_step(kernel_num);
-  std::for_each(kernel_step.begin(), kernel_step.end(),
-                [=](int &k) { return k * height * width; });
-
-  for (int kernel_id = kernel_num - 2; kernel_id >= 0; --kernel_id) {
-    while (!queue.empty()) {
-      Point2d point = queue.front();
-      queue.pop();
-      int x = point.x;
-      int y = point.y;
-      int label = text_line[x][y];
-
-      bool is_edge = true;
-      for (int d = 0; d < 4; ++d) {
-        int tmp_x = x + dx[d];
-        int tmp_y = y + dy[d];
-
-        if (tmp_x < 0 || tmp_x >= height) continue;
-        if (tmp_y < 0 || tmp_y >= width) continue;
-        int kernel_value = data[kernel_step[kernel_id] + tmp_x * width + tmp_y];
-        if (kernel_value == 0) continue;
-        if (text_line[tmp_x][tmp_y] > 0) continue;
-
-        Point2d point(tmp_x, tmp_y);
-        queue.push(point);
-        text_line[tmp_x][tmp_y] = label;
-        is_edge = false;
-      }
-
-      if (is_edge) {
-        next_queue.push(point);
-      }
-    }
-    swap(queue, next_queue);
-  }
-}
-
-std::vector<std::vector<int>> contour_expand(Tensor kernel_mask,
-                                             Tensor internal_kernel_label,
-                                             int min_kernel_area,
-                                             int kernel_num) {
-  kernel_mask = kernel_mask.contiguous();
-  internal_kernel_label = internal_kernel_label.contiguous();
-  assert(kernel_mask.dim() == 3);
-  assert(internal_kernel_label.dim() == 2);
-  assert(kernel_mask.size(1) == internal_kernel_label.size(0));
-  assert(kernel_mask.size(2) == internal_kernel_label.size(1));
-  CHECK_CPU_INPUT(kernel_mask);
-  CHECK_CPU_INPUT(internal_kernel_label);
-  auto ptr_data = kernel_mask.data_ptr<uint8_t>();
-  IntArrayRef data_shape = kernel_mask.sizes();
-
-  auto data_label_map = internal_kernel_label.data_ptr<int32_t>();
-  vector<vector<int>> text_line;
-
-  kernel_dilate(ptr_data, data_shape, data_label_map, kernel_num,
-                min_kernel_area, text_line);
-
-  return text_line;
-}
--- a/mmcv/ops/csrc/parrots/contour_expand_parrots.cpp
+++ b/mmcv/ops/csrc/parrots/contour_expand_parrots.cpp
@ -1,43 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "contour_expand_pytorch.h"
-
-using namespace parrots;
-using namespace std;
-
-template <typename T>
-void contour_expand_parrots(T& ctx, const SSElement& attr,
-                            const OperatorBase::in_list_t& ins,
-                            OperatorBase::out_list_t& outs) {
-  int min_kernel_area, kernel_num;
-  SSAttrs(attr)
-      .get<int>("min_kernel_area", min_kernel_area)
-      .get<int>("kernel_num", kernel_num)
-      .done();
-  at::Tensor kernel_mask;
-  at::Tensor internal_kernel_label;
-  kernel_mask = buildATensor(ctx, ins[0]);
-  internal_kernel_label = buildATensor(ctx, ins[1]);
-  auto out = contour_expand(kernel_mask, internal_kernel_label, min_kernel_area,
-                            kernel_num);
-  int n = out.size(), m = 0;
-  for (int i = 0; i < n; ++i)
-    if (m < out[i].size()) m = out[i].size();
-  auto options = torch::TensorOptions().dtype(at::kInt);
-  auto tensor = torch::zeros({n, m}, options);
-  for (int i = 0; i < n; i++)
-    tensor.slice(0, i, i + 1) =
-        torch::from_blob(out[i].data(), {out[i].size()}, options);
-  updateDArray(ctx, tensor, outs[0]);
-}
-
-PARROTS_EXTENSION_REGISTER(contour_expand)
-    .attr("min_kernel_area")
-    .attr("kernel_num")
-    .input(2)
-    .output(1)
-    .apply(contour_expand_parrots<HostContext>)
-    .done();
--- a/mmcv/ops/csrc/parrots/contour_expand_pytorch.h
+++ b/mmcv/ops/csrc/parrots/contour_expand_pytorch.h
@ -1,12 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef CONTOUR_EXPAND_PYTORCH_H
-#define CONTOUR_EXPAND_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-std::vector<std::vector<int>> contour_expand(Tensor kernel_mask,
-                                             Tensor internal_kernel_label,
-                                             int min_kernel_area,
-                                             int kernel_num);
-
-#endif  // CONTOUR_EXPAND_PYTORCH_H
--- a/mmcv/ops/csrc/parrots/convex_iou.cpp
+++ b/mmcv/ops/csrc/parrots/convex_iou.cpp
@ -1,23 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-// modified from
-// https://github.com/SDL-GuoZonghao/BeyondBoundingBox/tree/main/mmdet/ops/iou/src
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void convex_iou_impl(const Tensor pointsets, const Tensor polygons,
-                     Tensor ious) {
-  DISPATCH_DEVICE_IMPL(convex_iou_impl, pointsets, polygons, ious);
-}
-
-void convex_iou(const Tensor pointsets, const Tensor polygons, Tensor ious) {
-  convex_iou_impl(pointsets, polygons, ious);
-}
-
-void convex_giou_impl(const Tensor pointsets, const Tensor polygons,
-                      Tensor output) {
-  DISPATCH_DEVICE_IMPL(convex_giou_impl, pointsets, polygons, output);
-}
-
-void convex_giou(const Tensor pointsets, const Tensor polygons, Tensor output) {
-  convex_giou_impl(pointsets, polygons, output);
-}
--- a/mmcv/ops/csrc/parrots/convex_iou_parrots.cpp
+++ b/mmcv/ops/csrc/parrots/convex_iou_parrots.cpp
@ -1,40 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "convex_iou_pytorch.h"
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void convex_iou_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                     const OperatorBase::in_list_t& ins,
-                                     OperatorBase::out_list_t& outs) {
-  auto pointsets = buildATensor(ctx, ins[0]);
-  auto polygons = buildATensor(ctx, ins[1]);
-  auto ious = buildATensor(ctx, outs[0]);
-  convex_iou(pointsets, polygons, ious);
-}
-
-void convex_giou_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                      const OperatorBase::in_list_t& ins,
-                                      OperatorBase::out_list_t& outs) {
-  auto pointsets = buildATensor(ctx, ins[0]);
-  auto polygons = buildATensor(ctx, ins[1]);
-  auto output = buildATensor(ctx, outs[0]);
-  convex_giou(pointsets, polygons, output);
-}
-
-PARROTS_EXTENSION_REGISTER(convex_iou)
-    .input(2)
-    .output(1)
-    .apply(convex_iou_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(convex_giou)
-    .input(2)
-    .output(1)
-    .apply(convex_giou_forward_cuda_parrots)
-    .done();
-
-#endif
--- a/mmcv/ops/csrc/parrots/convex_iou_pytorch.h
+++ b/mmcv/ops/csrc/parrots/convex_iou_pytorch.h
@ -1,11 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef CONVEX_IOU_PYTORCH_H
-#define CONVEX_IOU_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void convex_iou(const Tensor pointsets, const Tensor polygons, Tensor ious);
-
-void convex_giou(const Tensor pointsets, const Tensor polygons, Tensor output);
-
-#endif  // RIROI_ALIGN_ROTATED_PYTORCH_H
--- a/mmcv/ops/csrc/parrots/correlation.cpp
+++ b/mmcv/ops/csrc/parrots/correlation.cpp
@ -1,47 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-#include <iostream>
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void correlation_forward_impl(Tensor input1, Tensor input2, Tensor output,
-                              int kH, int kW, int patchH, int patchW, int padH,
-                              int padW, int dilationH, int dilationW,
-                              int dilation_patchH, int dilation_patchW, int dH,
-                              int dW) {
-  DISPATCH_DEVICE_IMPL(correlation_forward_impl, input1, input2, output, kH, kW,
-                       patchH, patchW, padH, padW, dilationH, dilationW,
-                       dilation_patchH, dilation_patchW, dH, dW);
-}
-
-void correlation_backward_impl(Tensor grad_output, Tensor input1, Tensor input2,
-                               Tensor grad_input1, Tensor grad_input2, int kH,
-                               int kW, int patchH, int patchW, int padH,
-                               int padW, int dilationH, int dilationW,
-                               int dilation_patchH, int dilation_patchW, int dH,
-                               int dW) {
-  DISPATCH_DEVICE_IMPL(correlation_backward_impl, grad_output, input1, input2,
-                       grad_input1, grad_input2, kH, kW, patchH, patchW, padH,
-                       padW, dilationH, dilationW, dilation_patchH,
-                       dilation_patchW, dH, dW);
-}
-
-void correlation_forward(Tensor input1, Tensor input2, Tensor output, int kH,
-                         int kW, int patchH, int patchW, int padH, int padW,
-                         int dilationH, int dilationW, int dilation_patchH,
-                         int dilation_patchW, int dH, int dW) {
-  correlation_forward_impl(input1, input2, output, kH, kW, patchH, patchW, padH,
-                           padW, dilationH, dilationW, dilation_patchH,
-                           dilation_patchW, dH, dW);
-}
-
-void correlation_backward(Tensor grad_output, Tensor input1, Tensor input2,
-                          Tensor grad_input1, Tensor grad_input2, int kH,
-                          int kW, int patchH, int patchW, int padH, int padW,
-                          int dilationH, int dilationW, int dilation_patchH,
-                          int dilation_patchW, int dH, int dW) {
-  correlation_backward_impl(grad_output, input1, input2, grad_input1,
-                            grad_input2, kH, kW, patchH, patchW, padH, padW,
-                            dilationH, dilationW, dilation_patchH,
-                            dilation_patchW, dH, dW);
-}
--- a/mmcv/ops/csrc/parrots/correlation_parrots.cpp
+++ b/mmcv/ops/csrc/parrots/correlation_parrots.cpp
@ -1,176 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "correlation_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void correlation_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                      const OperatorBase::in_list_t& ins,
-                                      OperatorBase::out_list_t& outs) {
-  int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH,
-      dilation_patchW, dH, dW;
-  SSAttrs(attr)
-      .get<int>("kH", kH)
-      .get<int>("kW", kW)
-      .get<int>("patchH", patchH)
-      .get<int>("patchW", patchW)
-      .get<int>("padH", padH)
-      .get<int>("padW", padW)
-      .get<int>("dilationH", dilationH)
-      .get<int>("dilationW", dilationW)
-      .get<int>("dilation_patchH", dilation_patchH)
-      .get<int>("dilation_patchW", dilation_patchW)
-      .get<int>("dH", dH)
-      .get<int>("dW", dW)
-      .done();
-
-  auto input1 = buildATensor(ctx, ins[0]);
-  auto input2 = buildATensor(ctx, ins[1]);
-
-  auto output = buildATensor(ctx, outs[0]);
-
-  correlation_forward(input1, input2, output, kH, kW, patchH, patchW, padH,
-                      padW, dilationH, dilationW, dilation_patchH,
-                      dilation_patchW, dH, dW);
-}
-
-void correlation_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                       const OperatorBase::in_list_t& ins,
-                                       OperatorBase::out_list_t& outs) {
-  int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH,
-      dilation_patchW, dH, dW;
-  SSAttrs(attr)
-      .get<int>("kH", kH)
-      .get<int>("kW", kW)
-      .get<int>("patchH", patchH)
-      .get<int>("patchW", patchW)
-      .get<int>("padH", padH)
-      .get<int>("padW", padW)
-      .get<int>("dilationH", dilationH)
-      .get<int>("dilationW", dilationW)
-      .get<int>("dilation_patchH", dilation_patchH)
-      .get<int>("dilation_patchW", dilation_patchW)
-      .get<int>("dH", dH)
-      .get<int>("dW", dW)
-      .done();
-
-  auto grad_output = buildATensor(ctx, ins[0]);
-  auto input1 = buildATensor(ctx, ins[1]);
-  auto input2 = buildATensor(ctx, ins[2]);
-
-  auto grad_input1 = buildATensor(ctx, outs[0]);
-  auto grad_input2 = buildATensor(ctx, outs[1]);
-
-  correlation_backward(grad_output, input1, input2, grad_input1, grad_input2,
-                       kH, kW, patchH, patchW, padH, padW, dilationH, dilationW,
-                       dilation_patchH, dilation_patchW, dH, dW);
-}
-#endif
-
-void correlation_forward_cpu_parrots(HostContext& ctx, const SSElement& attr,
-                                     const OperatorBase::in_list_t& ins,
-                                     OperatorBase::out_list_t& outs) {
-  int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH,
-      dilation_patchW, dH, dW;
-  SSAttrs(attr)
-      .get<int>("kH", kH)
-      .get<int>("kW", kW)
-      .get<int>("patchH", patchH)
-      .get<int>("patchW", patchW)
-      .get<int>("padH", padH)
-      .get<int>("padW", padW)
-      .get<int>("dilationH", dilationH)
-      .get<int>("dilationW", dilationW)
-      .get<int>("dilation_patchH", dilation_patchH)
-      .get<int>("dilation_patchW", dilation_patchW)
-      .get<int>("dH", dH)
-      .get<int>("dW", dW)
-      .done();
-
-  auto input1 = buildATensor(ctx, ins[0]);
-  auto input2 = buildATensor(ctx, ins[1]);
-
-  auto output = buildATensor(ctx, outs[0]);
-
-  correlation_forward(input1, input2, output, kH, kW, patchH, patchW, padH,
-                      padW, dilationH, dilationW, dilation_patchH,
-                      dilation_patchW, dH, dW);
-}
-
-void correlation_backward_cpu_parrots(HostContext& ctx, const SSElement& attr,
-                                      const OperatorBase::in_list_t& ins,
-                                      OperatorBase::out_list_t& outs) {
-  int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH,
-      dilation_patchW, dH, dW;
-  SSAttrs(attr)
-      .get<int>("kH", kH)
-      .get<int>("kW", kW)
-      .get<int>("patchH", patchH)
-      .get<int>("patchW", patchW)
-      .get<int>("padH", padH)
-      .get<int>("padW", padW)
-      .get<int>("dilationH", dilationH)
-      .get<int>("dilationW", dilationW)
-      .get<int>("dilation_patchH", dilation_patchH)
-      .get<int>("dilation_patchW", dilation_patchW)
-      .get<int>("dH", dH)
-      .get<int>("dW", dW)
-      .done();
-
-  auto grad_output = buildATensor(ctx, ins[0]);
-  auto input1 = buildATensor(ctx, ins[1]);
-  auto input2 = buildATensor(ctx, ins[2]);
-
-  auto grad_input1 = buildATensor(ctx, outs[0]);
-  auto grad_input2 = buildATensor(ctx, outs[1]);
-
-  correlation_backward(grad_output, input1, input2, grad_input1, grad_input2,
-                       kH, kW, patchH, patchW, padH, padW, dilationH, dilationW,
-                       dilation_patchH, dilation_patchW, dH, dW);
-}
-
-PARROTS_EXTENSION_REGISTER(correlation_forward)
-    .attr("kH")
-    .attr("kW")
-    .attr("patchH")
-    .attr("patchW")
-    .attr("padH")
-    .attr("padW")
-    .attr("dilationH")
-    .attr("dilationW")
-    .attr("dilation_patchH")
-    .attr("dilation_patchW")
-    .attr("dH")
-    .attr("dW")
-    .input(2)
-    .output(1)
-    .apply(correlation_forward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(correlation_forward_cuda_parrots)
-#endif
-    .done();
-
-PARROTS_EXTENSION_REGISTER(correlation_backward)
-    .attr("kH")
-    .attr("kW")
-    .attr("patchH")
-    .attr("patchW")
-    .attr("padH")
-    .attr("padW")
-    .attr("dilationH")
-    .attr("dilationW")
-    .attr("dilation_patchH")
-    .attr("dilation_patchW")
-    .attr("dH")
-    .attr("dW")
-    .input(3)
-    .output(2)
-    .apply(correlation_backward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(correlation_backward_cuda_parrots)
-#endif
-    .done();
--- a/Show More
+++ b/Show More