diff --git a/MANIFEST.in b/MANIFEST.in
index 622635caa..70f13989c 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,6 +1,5 @@
 include requirements/runtime.txt
 include mmcv/ops/csrc/common/cuda/*.cuh mmcv/ops/csrc/common/cuda/*.hpp mmcv/ops/csrc/common/*.hpp
 include mmcv/ops/csrc/pytorch/*.cpp mmcv/ops/csrc/pytorch/cuda/*.cu mmcv/ops/csrc/pytorch/cuda/*.cpp mmcv/ops/csrc/pytorch/cpu/*.cpp
-include mmcv/ops/csrc/parrots/*.h mmcv/ops/csrc/parrots/*.cpp
 include mmcv/ops/csrc/pytorch/mps/*.mm mmcv/ops/csrc/common/mps/*.h mmcv/ops/csrc/common/mps/*.mm
 recursive-include mmcv/ops/csrc/ *.h *.hpp *.cpp *.cuh *.cu *.mm
diff --git a/docs/en/api/utils.rst b/docs/en/api/utils.rst
index f2ff4c2a3..2a5f7a5ac 100644
--- a/docs/en/api/utils.rst
+++ b/docs/en/api/utils.rst
@@ -19,5 +19,3 @@ mmcv.utils
    IS_MLU_AVAILABLE
    IS_MPS_AVAILABLE
    collect_env
-   jit
-   skip_no_elena
diff --git a/docs/zh_cn/api/utils.rst b/docs/zh_cn/api/utils.rst
index f2ff4c2a3..2a5f7a5ac 100644
--- a/docs/zh_cn/api/utils.rst
+++ b/docs/zh_cn/api/utils.rst
@@ -19,5 +19,3 @@ mmcv.utils
    IS_MLU_AVAILABLE
    IS_MPS_AVAILABLE
    collect_env
-   jit
-   skip_no_elena
diff --git a/mmcv/cnn/bricks/activation.py b/mmcv/cnn/bricks/activation.py
index ae99714b9..236d2631e 100644
--- a/mmcv/cnn/bricks/activation.py
+++ b/mmcv/cnn/bricks/activation.py
@@ -92,8 +92,7 @@ class GELU(nn.Module):
         return F.gelu(input)
 
 
-if (TORCH_VERSION == 'parrots'
-        or digit_version(TORCH_VERSION) < digit_version('1.4')):
+if digit_version(TORCH_VERSION) < digit_version('1.4'):
     MODELS.register_module(module=GELU)
 else:
     MODELS.register_module(module=nn.GELU)
diff --git a/mmcv/cnn/bricks/conv_module.py b/mmcv/cnn/bricks/conv_module.py
index 1f8e16051..21cf6adae 100644
--- a/mmcv/cnn/bricks/conv_module.py
+++ b/mmcv/cnn/bricks/conv_module.py
@@ -6,7 +6,8 @@ import torch
 import torch.nn as nn
 from mmengine.model import constant_init, kaiming_init
 from mmengine.registry import MODELS
-from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm, _InstanceNorm
+from torch.nn.modules.batchnorm import _BatchNorm
+from torch.nn.modules.instancenorm import _InstanceNorm
 
 from .activation import build_activation_layer
 from .conv import build_conv_layer
diff --git a/mmcv/cnn/bricks/hswish.py b/mmcv/cnn/bricks/hswish.py
index 6b6dd006d..ac5a72339 100644
--- a/mmcv/cnn/bricks/hswish.py
+++ b/mmcv/cnn/bricks/hswish.py
@@ -30,8 +30,7 @@ class HSwish(nn.Module):
         return x * self.act(x + 3) / 6
 
 
-if (TORCH_VERSION == 'parrots'
-        or digit_version(TORCH_VERSION) < digit_version('1.7')):
+if digit_version(TORCH_VERSION) < digit_version('1.7'):
     # Hardswish is not supported when PyTorch version < 1.6.
     # And Hardswish in PyTorch 1.6 does not support inplace.
     MODELS.register_module(module=HSwish)
diff --git a/mmcv/cnn/bricks/norm.py b/mmcv/cnn/bricks/norm.py
index 2fff684af..ddbfc8065 100644
--- a/mmcv/cnn/bricks/norm.py
+++ b/mmcv/cnn/bricks/norm.py
@@ -5,14 +5,14 @@ from typing import Dict, Tuple, Union
 import torch.nn as nn
 from mmengine.registry import MODELS
 from mmengine.utils import is_tuple_of
-from mmengine.utils.dl_utils.parrots_wrapper import (SyncBatchNorm, _BatchNorm,
-                                                     _InstanceNorm)
+from torch.nn.modules.batchnorm import _BatchNorm
+from torch.nn.modules.instancenorm import _InstanceNorm
 
 MODELS.register_module('BN', module=nn.BatchNorm2d)
 MODELS.register_module('BN1d', module=nn.BatchNorm1d)
 MODELS.register_module('BN2d', module=nn.BatchNorm2d)
 MODELS.register_module('BN3d', module=nn.BatchNorm3d)
-MODELS.register_module('SyncBN', module=SyncBatchNorm)
+MODELS.register_module('SyncBN', module=nn.SyncBatchNorm)
 MODELS.register_module('GN', module=nn.GroupNorm)
 MODELS.register_module('LN', module=nn.LayerNorm)
 MODELS.register_module('IN', module=nn.InstanceNorm2d)
diff --git a/mmcv/cnn/bricks/wrappers.py b/mmcv/cnn/bricks/wrappers.py
index 07eb04ee3..357665821 100644
--- a/mmcv/cnn/bricks/wrappers.py
+++ b/mmcv/cnn/bricks/wrappers.py
@@ -12,16 +12,13 @@ import torch.nn as nn
 from mmengine.registry import MODELS
 from torch.nn.modules.utils import _pair, _triple
 
-if torch.__version__ == 'parrots':
-    TORCH_VERSION = torch.__version__
-else:
-    # torch.__version__ could be 1.3.1+cu92, we only need the first two
-    # for comparison
-    TORCH_VERSION = tuple(int(x) for x in torch.__version__.split('.')[:2])
+# torch.__version__ could be 1.3.1+cu92, we only need the first two
+# for comparison
+TORCH_VERSION = tuple(int(x) for x in torch.__version__.split('.')[:2])
 
 
 def obsolete_torch_version(torch_version, version_threshold) -> bool:
-    return torch_version == 'parrots' or torch_version <= version_threshold
+    return torch_version <= version_threshold
 
 
 class NewEmptyTensorOp(torch.autograd.Function):
diff --git a/mmcv/ops/ball_query.py b/mmcv/ops/ball_query.py
index a89b36b52..fe1b9162e 100644
--- a/mmcv/ops/ball_query.py
+++ b/mmcv/ops/ball_query.py
@@ -75,8 +75,7 @@ class BallQuery(Function):
                 min_radius=min_radius,
                 max_radius=max_radius,
                 nsample=sample_num)
-        if torch.__version__ != 'parrots':
-            ctx.mark_non_differentiable(idx)
+        ctx.mark_non_differentiable(idx)
         return idx
 
     @staticmethod
diff --git a/mmcv/ops/bbox.py b/mmcv/ops/bbox.py
index 4583ba7d5..4ba93d6b2 100644
--- a/mmcv/ops/bbox.py
+++ b/mmcv/ops/bbox.py
@@ -116,10 +116,6 @@ def bbox_overlaps(bboxes1: torch.Tensor,
     if rows * cols == 0:
         return ious
 
-    if bboxes1.device.type == 'cpu' and torch.__version__ == 'parrots':
-        return _bbox_overlaps_cpu(
-            bboxes1, bboxes2, mode=mode, aligned=aligned, offset=offset)
-
     ext_module.bbox_overlaps(
         bboxes1, bboxes2, ious, mode=mode_flag, aligned=aligned, offset=offset)
 
diff --git a/mmcv/ops/carafe.py b/mmcv/ops/carafe.py
index f7e79c275..490bf0f1a 100644
--- a/mmcv/ops/carafe.py
+++ b/mmcv/ops/carafe.py
@@ -56,8 +56,7 @@ class CARAFENaiveFunction(Function):
             group_size=group_size,
             scale_factor=scale_factor)
 
-        if features.requires_grad or masks.requires_grad or \
-                torch.__version__ == 'parrots':
+        if features.requires_grad or masks.requires_grad:
             ctx.save_for_backward(features, masks)
         return output
 
@@ -150,8 +149,7 @@ class CARAFEFunction(Function):
             group_size=group_size,
             scale_factor=scale_factor)
 
-        if features.requires_grad or masks.requires_grad or \
-                torch.__version__ == 'parrots':
+        if features.requires_grad or masks.requires_grad:
             ctx.save_for_backward(features, masks, rfeatures)
         return output
 
diff --git a/mmcv/ops/contour_expand.py b/mmcv/ops/contour_expand.py
index 7184609ad..409348fe8 100644
--- a/mmcv/ops/contour_expand.py
+++ b/mmcv/ops/contour_expand.py
@@ -36,17 +36,6 @@ def contour_expand(kernel_mask: Union[np.array, torch.Tensor],
     if isinstance(internal_kernel_label, np.ndarray):
         internal_kernel_label = torch.from_numpy(internal_kernel_label)
 
-    if torch.__version__ == 'parrots':
-        if kernel_mask.shape[0] == 0 or internal_kernel_label.shape[0] == 0:
-            label = []
-        else:
-            label = ext_module.contour_expand(
-                kernel_mask,
-                internal_kernel_label,
-                min_kernel_area=min_kernel_area,
-                kernel_num=kernel_num)
-            label = label.tolist()  # type: ignore
-    else:
-        label = ext_module.contour_expand(kernel_mask, internal_kernel_label,
-                                          min_kernel_area, kernel_num)
+    label = ext_module.contour_expand(kernel_mask, internal_kernel_label,
+                                      min_kernel_area, kernel_num)
     return label
diff --git a/mmcv/ops/corner_pool.py b/mmcv/ops/corner_pool.py
index 89a7c485c..f63302b38 100644
--- a/mmcv/ops/corner_pool.py
+++ b/mmcv/ops/corner_pool.py
@@ -70,7 +70,7 @@ class CornerPool(nn.Module):
         self.mode = mode
 
     def forward(self, x: Tensor) -> Tensor:
-        if torch.__version__ != 'parrots' and torch.__version__ >= '1.5.0':
+        if torch.__version__ >= '1.5.0':
             dim, flip = self.cummax_dim_flip[self.mode]
             if flip:
                 x = x.flip(dim)
diff --git a/mmcv/ops/csrc/README.md b/mmcv/ops/csrc/README.md
index 8fcc6eb1a..02116bdf6 100644
--- a/mmcv/ops/csrc/README.md
+++ b/mmcv/ops/csrc/README.md
@@ -8,14 +8,11 @@ This folder contains all non-python code for MMCV custom ops. Please follow the
 .
 ├── common
 │   ├── box_iou_rotated_utils.hpp
-│   ├── parrots_cpp_helper.hpp
-│   ├── parrots_cuda_helper.hpp
 │   ├── pytorch_cpp_helper.hpp
 │   ├── pytorch_cuda_helper.hpp
 │   ├── pytorch_device_registry.hpp
 │   ├── cuda
 │   │   ├── common_cuda_helper.hpp
-│   │   ├── parrots_cudawarpfunction.cuh
 │   │   ├── ...
 │   │   └── ops_cuda_kernel.cuh
 |   ├── mps
@@ -26,11 +23,6 @@ This folder contains all non-python code for MMCV custom ops. Please follow the
 │   │   └── ...
 |   └── utils
 │   │   └── ...
-├── parrots
-│   ├── ...
-│   ├── ops.cpp
-│   ├── ops_parrots.cpp
-│   └── ops_pytorch.h
 └── pytorch
     ├── info.cpp
     ├── pybind.cpp
@@ -57,7 +49,6 @@ This folder contains all non-python code for MMCV custom ops. Please follow the
   - `mps`: The tools used to support MPS ops. **NOTE** that MPS support is **experimental**.
   - `mlu`: The MLU kernels used to support [Cambricon](https://www.cambricon.com/) device.
   - `utils`: The kernels and utils of spconv.
-- `parrots`: **Parrots** is a deep learning frame for model training and inference. Parrots custom ops are placed in this directory.
 - `pytorch`: **PyTorch** custom ops are supported by binding C++ to Python with **pybind11**. The ops implementation and binding codes are placed in this directory.
   - `cuda`: This directory contains cuda kernel launchers, which feed memory pointers of tensor to the cuda kernel in `common/cuda`. The launchers provide c++ interface of cuda implementation of corresponding custom ops.
   - `cpu`: This directory contain cpu implementations of corresponding custom ops.
diff --git a/mmcv/ops/csrc/common/cuda/active_rotated_filter_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/active_rotated_filter_cuda_kernel.cuh
index 36e41107e..0ab23399b 100644
--- a/mmcv/ops/csrc/common/cuda/active_rotated_filter_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/active_rotated_filter_cuda_kernel.cuh
@@ -4,11 +4,7 @@
 #ifndef ACTIVE_ROTATED_FILTER_CUDA_KERNEL_CUH
 #define ACTIVE_ROTATED_FILTER_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 template <typename scalar_t>
 __global__ void active_rotated_filter_forward_cuda_kernel(
diff --git a/mmcv/ops/csrc/common/cuda/assign_score_withk_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/assign_score_withk_cuda_kernel.cuh
index 9f9250844..0891b483c 100644
--- a/mmcv/ops/csrc/common/cuda/assign_score_withk_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/assign_score_withk_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef ASSIGN_SCORE_WITHK_CUDA_KERNEL_CUH
 #define ASSIGN_SCORE_WITHK_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 // input: points(B,N0,M,O), centers(B,N0,M,O), scores(B,N1,K,M), knn_idx(B,N1,K)
 // output: fout(B,O,N)
diff --git a/mmcv/ops/csrc/common/cuda/ball_query_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/ball_query_cuda_kernel.cuh
index 632b5c494..0bb05adcb 100644
--- a/mmcv/ops/csrc/common/cuda/ball_query_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/ball_query_cuda_kernel.cuh
@@ -4,11 +4,7 @@
 #ifndef BALL_QUERY_CUDA_KERNEL_CUH
 #define BALL_QUERY_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 template <typename T>
 __global__ void ball_query_forward_cuda_kernel(int b, int n, int m,
diff --git a/mmcv/ops/csrc/common/cuda/bbox_overlaps_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/bbox_overlaps_cuda_kernel.cuh
index 15bd91eca..ec9fbef65 100644
--- a/mmcv/ops/csrc/common/cuda/bbox_overlaps_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/bbox_overlaps_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef BBOX_OVERLAPS_CUDA_KERNEL_CUH
 #define BBOX_OVERLAPS_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 template <typename T>
 __device__ __forceinline__ void load_bbox(const T* bbox, const int base, T& x1,
diff --git a/mmcv/ops/csrc/common/cuda/bezier_align_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/bezier_align_cuda_kernel.cuh
index 537610416..a1091eef8 100644
--- a/mmcv/ops/csrc/common/cuda/bezier_align_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/bezier_align_cuda_kernel.cuh
@@ -8,11 +8,7 @@
 #ifdef MMCV_WITH_TRT
 #include "common_cuda_helper.hpp"
 #else  // MMCV_WITH_TRT
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else  // MMCV_USE_PARROTS
 #include "pytorch_cuda_helper.hpp"
-#endif  // MMCV_USE_PARROTS
 #endif  // MMCV_WITH_TRT
 
 template <typename T>
diff --git a/mmcv/ops/csrc/common/cuda/border_align_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/border_align_cuda_kernel.cuh
index 1d2a2197b..49c78776e 100644
--- a/mmcv/ops/csrc/common/cuda/border_align_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/border_align_cuda_kernel.cuh
@@ -12,11 +12,7 @@
 #ifdef MMCV_WITH_TRT
 #include "common_cuda_helper.hpp"
 #else  // MMCV_WITH_TRT
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else  // MMCV_USE_PARROTS
 #include "pytorch_cuda_helper.hpp"
-#endif  // MMCV_USE_PARROTS
 #endif  // MMCV_WITH_TRT
 
 enum BorderMode { Top = 0, Left = 1, Bottom = 2, Right = 3 };
diff --git a/mmcv/ops/csrc/common/cuda/box_iou_quadri_cuda.cuh b/mmcv/ops/csrc/common/cuda/box_iou_quadri_cuda.cuh
index cf8ad5e1a..affb3e39d 100644
--- a/mmcv/ops/csrc/common/cuda/box_iou_quadri_cuda.cuh
+++ b/mmcv/ops/csrc/common/cuda/box_iou_quadri_cuda.cuh
@@ -2,12 +2,8 @@
 #ifndef BOX_IOU_QUADRI_CUDA_CUH
 #define BOX_IOU_QUADRI_CUDA_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
-#include "pytorch_cuda_helper.hpp"
-#endif
 #include "box_iou_rotated_utils.hpp"
+#include "pytorch_cuda_helper.hpp"
 
 // 2D block with 32 * 16 = 512 threads per block
 const int BLOCK_DIM_X = 32;
diff --git a/mmcv/ops/csrc/common/cuda/box_iou_rotated_cuda.cuh b/mmcv/ops/csrc/common/cuda/box_iou_rotated_cuda.cuh
index abd47cd85..7fbafeb00 100644
--- a/mmcv/ops/csrc/common/cuda/box_iou_rotated_cuda.cuh
+++ b/mmcv/ops/csrc/common/cuda/box_iou_rotated_cuda.cuh
@@ -4,12 +4,8 @@
 #ifndef BOX_IOU_ROTATED_CUDA_CUH
 #define BOX_IOU_ROTATED_CUDA_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
-#include "pytorch_cuda_helper.hpp"
-#endif
 #include "box_iou_rotated_utils.hpp"
+#include "pytorch_cuda_helper.hpp"
 
 // 2D block with 32 * 16 = 512 threads per block
 const int BLOCK_DIM_X = 32;
diff --git a/mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh
index 20fd617ff..01a588694 100644
--- a/mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef CARAFE_CUDA_KERNEL_CUH
 #define CARAFE_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 #ifdef MMCV_WITH_HIP
 #define WARP_SIZE 64
diff --git a/mmcv/ops/csrc/common/cuda/carafe_naive_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/carafe_naive_cuda_kernel.cuh
index 48230c632..0a4ab8737 100644
--- a/mmcv/ops/csrc/common/cuda/carafe_naive_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/carafe_naive_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef CARAFE_NAIVE_CUDA_KERNEL_CUH
 #define CARAFE_NAIVE_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 __device__ inline int Loc2Index(const int n, const int c, const int h,
                                 const int w, const int channel_num,
diff --git a/mmcv/ops/csrc/common/cuda/chamfer_distance_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/chamfer_distance_cuda_kernel.cuh
index 89feea4a5..57a3d7b2b 100644
--- a/mmcv/ops/csrc/common/cuda/chamfer_distance_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/chamfer_distance_cuda_kernel.cuh
@@ -4,11 +4,7 @@
 #ifndef CHAMFER_DISTANCE_CUDA_KERNEL_CUH
 #define CHAMFER_DISTANCE_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 #define MAX_SHARED_SCALAR_T 6144  // 49152 / 8 = 6144
 
diff --git a/mmcv/ops/csrc/common/cuda/convex_iou_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/convex_iou_cuda_kernel.cuh
index 2af96f796..8bde5e257 100644
--- a/mmcv/ops/csrc/common/cuda/convex_iou_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/convex_iou_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef CONVEX_IOU_CUDA_KERNEL_CUH
 #define CONVEX_IOU_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 #define MAXN 100
 #define NMAX 512
diff --git a/mmcv/ops/csrc/common/cuda/correlation_cuda.cuh b/mmcv/ops/csrc/common/cuda/correlation_cuda.cuh
index f910561ec..a9e68bc0d 100644
--- a/mmcv/ops/csrc/common/cuda/correlation_cuda.cuh
+++ b/mmcv/ops/csrc/common/cuda/correlation_cuda.cuh
@@ -6,14 +6,10 @@
 #ifndef CORRELATION_CUDA
 #define CORRELATION_CUDA
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
-#include "pytorch_cuda_helper.hpp"
-#endif
-
 #include <cuda.h>
 #include <cuda_runtime.h>
+
+#include "pytorch_cuda_helper.hpp"
 // Using <torch/extension.h> is recommended in the official documentation in
 // https://pytorch.org/tutorials/advanced/cpp_extension.html#writing-the-c-op.
 // However, we use <torch/types.h> for compatibility with CUDA 9.0
diff --git a/mmcv/ops/csrc/common/cuda/deform_conv_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/deform_conv_cuda_kernel.cuh
index 6b4d1bbd8..98e1e7aee 100644
--- a/mmcv/ops/csrc/common/cuda/deform_conv_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/deform_conv_cuda_kernel.cuh
@@ -70,11 +70,7 @@
 #ifdef MMCV_WITH_TRT
 #include "common_cuda_helper.hpp"
 #else  // MMCV_WITH_TRT
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else  // MMCV_USE_PARROTS
 #include "pytorch_cuda_helper.hpp"
-#endif  // MMCV_USE_PARROTS
 #endif  // MMCV_WITH_TRT
 
 template <typename T>
diff --git a/mmcv/ops/csrc/common/cuda/deform_roi_pool_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/deform_roi_pool_cuda_kernel.cuh
index 86c4bc66d..ac95b35f0 100644
--- a/mmcv/ops/csrc/common/cuda/deform_roi_pool_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/deform_roi_pool_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef DEFORM_ROI_POOL_CUDA_KERNEL_CUH
 #define DEFORM_ROI_POOL_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 template <typename T>
 __global__ void deform_roi_pool_forward_cuda_kernel(
diff --git a/mmcv/ops/csrc/common/cuda/diff_iou_rotated_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/diff_iou_rotated_cuda_kernel.cuh
index 053977a30..49319e915 100644
--- a/mmcv/ops/csrc/common/cuda/diff_iou_rotated_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/diff_iou_rotated_cuda_kernel.cuh
@@ -1,11 +1,7 @@
 // Copyright (c) OpenMMLab. All rights reserved
 // Adapted from
 // https://github.com/lilanxiao/Rotated_IoU/cuda_op/sort_vert_kernel.cu  # noqa
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 #define MAX_NUM_VERT_IDX 9
 #define INTERSECTION_OFFSET 8
diff --git a/mmcv/ops/csrc/common/cuda/furthest_point_sample_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/furthest_point_sample_cuda_kernel.cuh
index d3801a02c..c23278afb 100644
--- a/mmcv/ops/csrc/common/cuda/furthest_point_sample_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/furthest_point_sample_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef FURTHEST_POINT_SAMPLE_CUDA_KERNEL_CUH
 #define FURTHEST_POINT_SAMPLE_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 __device__ void __update(float *__restrict__ dists, int *__restrict__ dists_i,
                          int idx1, int idx2) {
diff --git a/mmcv/ops/csrc/common/cuda/gather_points_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/gather_points_cuda_kernel.cuh
index 6d932434c..249761113 100644
--- a/mmcv/ops/csrc/common/cuda/gather_points_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/gather_points_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef GATHER_POINTS_CUDA_KERNEL_CUH
 #define GATHER_POINTS_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 #define TOTAL_THREADS 1024
 
diff --git a/mmcv/ops/csrc/common/cuda/group_points_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/group_points_cuda_kernel.cuh
index dfad66fc1..c5e3540b3 100644
--- a/mmcv/ops/csrc/common/cuda/group_points_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/group_points_cuda_kernel.cuh
@@ -4,11 +4,7 @@
 #ifndef GROUP_POINTS_CUDA_KERNEL_CUH
 #define GROUP_POINTS_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 template <typename T>
 __global__ void group_points_forward_cuda_kernel(int b, int c, int n,
diff --git a/mmcv/ops/csrc/common/cuda/iou3d_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/iou3d_cuda_kernel.cuh
index 9ebdcad15..ae1881ac4 100644
--- a/mmcv/ops/csrc/common/cuda/iou3d_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/iou3d_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef IOU3D_CUDA_KERNEL_CUH
 #define IOU3D_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 const int THREADS_PER_BLOCK_IOU3D = 16;
 const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;
diff --git a/mmcv/ops/csrc/common/cuda/knn_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/knn_cuda_kernel.cuh
index 3cf52bb90..7686d4f27 100644
--- a/mmcv/ops/csrc/common/cuda/knn_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/knn_cuda_kernel.cuh
@@ -4,11 +4,7 @@
 #ifndef KNN_CUDA_KERNEL_CUH
 #define KNN_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 inline __device__ void swap_float(float *x, float *y) {
   float tmp = *x;
diff --git a/mmcv/ops/csrc/common/cuda/masked_conv2d_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/masked_conv2d_cuda_kernel.cuh
index 1a0bd040e..b11b3cd58 100644
--- a/mmcv/ops/csrc/common/cuda/masked_conv2d_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/masked_conv2d_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef MASKED_CONV2D_CUDA_KERNEL_CUH
 #define MASKED_CONV2D_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 template <typename scalar_t>
 __global__ void MaskedIm2colForward(const int n, const scalar_t *data_im,
diff --git a/mmcv/ops/csrc/common/cuda/min_area_polygons_cuda.cuh b/mmcv/ops/csrc/common/cuda/min_area_polygons_cuda.cuh
index df56e7436..cd96b6370 100644
--- a/mmcv/ops/csrc/common/cuda/min_area_polygons_cuda.cuh
+++ b/mmcv/ops/csrc/common/cuda/min_area_polygons_cuda.cuh
@@ -2,11 +2,7 @@
 #ifndef MIN_AREA_POLYGONS_CUDA_KERNEL_CUH
 #define MIN_AREA_POLYGONS_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 #define MAXN 20
 __device__ const float PI = 3.1415926;
diff --git a/mmcv/ops/csrc/common/cuda/modulated_deform_conv_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/modulated_deform_conv_cuda_kernel.cuh
index ca0e91a25..b29c74ed9 100644
--- a/mmcv/ops/csrc/common/cuda/modulated_deform_conv_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/modulated_deform_conv_cuda_kernel.cuh
@@ -70,11 +70,7 @@
 #ifdef MMCV_WITH_TRT
 #include "common_cuda_helper.hpp"
 #else  // MMCV_WITH_TRT
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else  // MMCV_USE_PARROTS
 #include "pytorch_cuda_helper.hpp"
-#endif  // MMCV_USE_PARROTS
 #endif  // MMCV_WITH_TRT
 
 template <typename T>
diff --git a/mmcv/ops/csrc/common/cuda/nms_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/nms_cuda_kernel.cuh
index 281d9f0b4..09fede657 100644
--- a/mmcv/ops/csrc/common/cuda/nms_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/nms_cuda_kernel.cuh
@@ -6,11 +6,7 @@
 #ifdef MMCV_WITH_TRT
 #include "common_cuda_helper.hpp"
 #else  // MMCV_WITH_TRT
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else  // MMCV_USE_PARROTS
 #include "pytorch_cuda_helper.hpp"
-#endif  // MMCV_USE_PARROTS
 #endif  // MMCV_WITH_TRT
 
 int const threadsPerBlock = sizeof(unsigned long long int) * 8;
diff --git a/mmcv/ops/csrc/common/cuda/nms_quadri_cuda.cuh b/mmcv/ops/csrc/common/cuda/nms_quadri_cuda.cuh
index bba3b8258..9a4a232f0 100644
--- a/mmcv/ops/csrc/common/cuda/nms_quadri_cuda.cuh
+++ b/mmcv/ops/csrc/common/cuda/nms_quadri_cuda.cuh
@@ -2,12 +2,8 @@
 #ifndef NMS_QUADRI_CUDA_CUH
 #define NMS_QUADRI_CUDA_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
-#include "pytorch_cuda_helper.hpp"
-#endif
 #include "box_iou_rotated_utils.hpp"
+#include "pytorch_cuda_helper.hpp"
 
 __host__ __device__ inline int divideUP(const int x, const int y) {
   return (((x) + (y)-1) / (y));
diff --git a/mmcv/ops/csrc/common/cuda/nms_rotated_cuda.cuh b/mmcv/ops/csrc/common/cuda/nms_rotated_cuda.cuh
index 747327afb..ade478925 100644
--- a/mmcv/ops/csrc/common/cuda/nms_rotated_cuda.cuh
+++ b/mmcv/ops/csrc/common/cuda/nms_rotated_cuda.cuh
@@ -4,12 +4,8 @@
 #ifndef NMS_ROTATED_CUDA_CUH
 #define NMS_ROTATED_CUDA_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
-#include "pytorch_cuda_helper.hpp"
-#endif
 #include "box_iou_rotated_utils.hpp"
+#include "pytorch_cuda_helper.hpp"
 
 __host__ __device__ inline int divideUP(const int x, const int y) {
   return (((x) + (y)-1) / (y));
diff --git a/mmcv/ops/csrc/common/cuda/parrots_cudawarpfunction.cuh b/mmcv/ops/csrc/common/cuda/parrots_cudawarpfunction.cuh
deleted file mode 100644
index 7918a5745..000000000
--- a/mmcv/ops/csrc/common/cuda/parrots_cudawarpfunction.cuh
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Copyright (c) 2019, SenseTime.
- */
-
-#ifndef INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_
-#define INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_
-
-#ifndef __CUDACC__
-#error cudawarpfunction.cuh should only be included by .cu files
-#endif
-#include <cuda.h>
-
-#include <parrots/foundation/common.hpp>
-
-#ifdef PARROTS_USE_HALF
-#include <cuda_fp16.h>
-#endif
-#ifdef __CUDA_ARCH__
-#define CUDA_INTRINSIC_FUNC(Expr) Expr
-#else
-#define CUDA_INTRINSIC_FUNC(Expr)
-#endif
-
-#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
-
-#ifdef PARROTS_USE_HALF
-
-#if CUDA_VERSION < 9000
-
-__device__ inline float16 __shfl(float16 var, int srcLane, int width) {
-  CUDA_INTRINSIC_FUNC(return __shfl(var.y, srcLane, width););
-}
-
-__device__ inline float16 __shfl_up(float16 var, unsigned delta, int width) {
-  CUDA_INTRINSIC_FUNC(return __shfl_up(var.y, delta, width););
-}
-
-__device__ inline float16 __shfl_down(float16 var, unsigned delta, int width) {
-  CUDA_INTRINSIC_FUNC(return __shfl_down(var.y, delta, width););
-}
-
-__device__ inline float16 __shfl_xor(float16 var, int laneMask, int width) {
-  CUDA_INTRINSIC_FUNC(return __shfl_xor(var.y, laneMask, width););
-}
-
-#else  // CUDA_VERSION >= 9000
-
-__device__ inline float16 __shfl_sync(unsigned mask, float16 var, int srcLane,
-                                      int width = warpSize) {
-  CUDA_INTRINSIC_FUNC(float16 r; r.y = __shfl_sync(mask, var.y, srcLane, width);
-                      return r;);
-}
-
-__device__ inline float16 __shfl_up_sync(unsigned mask, float16 var,
-                                         unsigned delta, int width = warpSize) {
-  CUDA_INTRINSIC_FUNC(
-      float16 r; r.y = __shfl_up_sync(mask, var.y, delta, width); return r;);
-}
-
-__device__ inline float16 __shfl_down_sync(unsigned mask, float16 var,
-                                           unsigned delta,
-                                           int width = warpSize) {
-  CUDA_INTRINSIC_FUNC(
-      float16 r; r.y = __shfl_down_sync(mask, var.y, delta, width); return r;);
-}
-
-__device__ inline float16 __shfl_xor_sync(unsigned mask, float16 var,
-                                          int laneMask, int width) {
-  CUDA_INTRINSIC_FUNC(float16 r;
-                      r.y = __shfl_xor_sync(mask, var.y, laneMask, width);
-                      return r;);
-}
-
-#endif  // CUDA_VERSION < 9000
-
-#endif  // PARROTS_USE_HALF
-
-// warp shuffle interface with a dummy mask
-#if CUDA_VERSION < 9000
-
-template <typename T>
-__device__ inline T __shfl_sync(unsigned mask, T var, int srcLane,
-                                int width = warpSize) {
-  CUDA_INTRINSIC_FUNC(return __shfl(var, srcLane, width););
-}
-
-template <typename T>
-__device__ inline T __shfl_up_sync(unsigned mask, T var, unsigned delta,
-                                   int width = warpSize) {
-  CUDA_INTRINSIC_FUNC(return __shfl_up(var, delta, width););
-}
-
-template <typename T>
-__device__ inline T __shfl_down_sync(unsigned mask, T var, unsigned delta,
-                                     int width = warpSize) {
-  CUDA_INTRINSIC_FUNC(return __shfl_down(var, delta, width););
-}
-
-template <typename T>
-__device__ inline T __shfl_xor_sync(unsigned mask, T var, int laneMask,
-                                    int width = warpSize) {
-  CUDA_INTRINSIC_FUNC(return __shfl_xor(var, laneMask, width););
-}
-
-#endif  // CUDA_VERSION < 9000
-
-#endif  // !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 300
-
-#endif  // INCLUDE_PARROTS_DARRAY_CUDAWARPFUNCTION_CUH_
diff --git a/mmcv/ops/csrc/common/cuda/points_in_boxes_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/points_in_boxes_cuda_kernel.cuh
index 342362079..db0e8ced2 100644
--- a/mmcv/ops/csrc/common/cuda/points_in_boxes_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/points_in_boxes_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef POINT_IN_BOXES_CUDA_KERNEL_CUH
 #define POINT_IN_BOXES_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 template <typename T>
 __device__ inline void lidar_to_local_coords(T shift_x, T shift_y, T rz,
diff --git a/mmcv/ops/csrc/common/cuda/points_in_polygons_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/points_in_polygons_cuda_kernel.cuh
index a0769d75a..ea9e0e827 100644
--- a/mmcv/ops/csrc/common/cuda/points_in_polygons_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/points_in_polygons_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef POINTS_IN_POLYGONS_CUDA_KERNEL_CUH
 #define POINTS_IN_POLYGONS_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 struct point {
   float x, y;
diff --git a/mmcv/ops/csrc/common/cuda/prroi_pool_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/prroi_pool_cuda_kernel.cuh
index e2f5a11b8..57662d2fe 100644
--- a/mmcv/ops/csrc/common/cuda/prroi_pool_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/prroi_pool_cuda_kernel.cuh
@@ -5,11 +5,7 @@
 #ifndef PRROI_POOL_CUDA_KERNEL_CUH
 #define PRROI_POOL_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 template <typename T>
 __device__ static __forceinline__ T PrRoIPoolingGetData(const T *data,
diff --git a/mmcv/ops/csrc/common/cuda/psamask_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/psamask_cuda_kernel.cuh
index 5d946686b..523d71a01 100644
--- a/mmcv/ops/csrc/common/cuda/psamask_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/psamask_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef PSAMASK_CUDA_KERNEL_CUH
 #define PSAMASK_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 // CUDA: grid stride looping
 #ifndef CUDA_KERNEL_LOOP
diff --git a/mmcv/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh
index 4383d9e82..45b576ca5 100644
--- a/mmcv/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh
@@ -4,11 +4,8 @@
 #define RIROI_ALIGN_ROTATED_CUDA_KERNEL_CUH
 
 #include <float.h>
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else  // MMCV_USE_PARROTS
+
 #include "pytorch_cuda_helper.hpp"
-#endif  // MMCV_USE_PARROTS
 
 /*** Forward ***/
 template <typename scalar_t>
diff --git a/mmcv/ops/csrc/common/cuda/roi_align_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/roi_align_cuda_kernel.cuh
index 4541462af..8b90ee694 100644
--- a/mmcv/ops/csrc/common/cuda/roi_align_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/roi_align_cuda_kernel.cuh
@@ -6,11 +6,7 @@
 #ifdef MMCV_WITH_TRT
 #include "common_cuda_helper.hpp"
 #else  // MMCV_WITH_TRT
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else  // MMCV_USE_PARROTS
 #include "pytorch_cuda_helper.hpp"
-#endif  // MMCV_USE_PARROTS
 #endif  // MMCV_WITH_TRT
 
 /*** Forward ***/
diff --git a/mmcv/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh
index 8274dc50c..8f631649d 100644
--- a/mmcv/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh
@@ -8,11 +8,7 @@
 #ifdef MMCV_WITH_TRT
 #include "common_cuda_helper.hpp"
 #else  // MMCV_WITH_TRT
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else  // MMCV_USE_PARROTS
 #include "pytorch_cuda_helper.hpp"
-#endif  // MMCV_USE_PARROTS
 #endif  // MMCV_WITH_TRT
 
 /*** Forward ***/
diff --git a/mmcv/ops/csrc/common/cuda/roi_pool_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/roi_pool_cuda_kernel.cuh
index 3d7eae66b..39c7cb193 100644
--- a/mmcv/ops/csrc/common/cuda/roi_pool_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/roi_pool_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef ROI_POOL_CUDA_KERNEL_CUH
 #define ROI_POOL_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 template <typename T>
 __global__ void roi_pool_forward_cuda_kernel(
diff --git a/mmcv/ops/csrc/common/cuda/roiaware_pool3d_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/roiaware_pool3d_cuda_kernel.cuh
index fc0aacf14..ba0aaad71 100644
--- a/mmcv/ops/csrc/common/cuda/roiaware_pool3d_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/roiaware_pool3d_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef ROIAWARE_POOL3D_CUDA_KERNEL_CUH
 #define ROIAWARE_POOL3D_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 template <typename T>
 __device__ inline void lidar_to_local_coords(T shift_x, T shift_y, T rz,
diff --git a/mmcv/ops/csrc/common/cuda/roipoint_pool3d_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/roipoint_pool3d_cuda_kernel.cuh
index 545f6ffa0..eb10c8c42 100644
--- a/mmcv/ops/csrc/common/cuda/roipoint_pool3d_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/roipoint_pool3d_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef ROIPOINT_POOL3D_CUDA_KERNEL_CUH
 #define ROIPOINT_POOL3D_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 template <typename T>
 __device__ inline void lidar_to_local_coords(T shift_x, T shift_y, T rz,
diff --git a/mmcv/ops/csrc/common/cuda/rotated_feature_align_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/rotated_feature_align_cuda_kernel.cuh
index ffcc658cc..09f69aa3a 100644
--- a/mmcv/ops/csrc/common/cuda/rotated_feature_align_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/rotated_feature_align_cuda_kernel.cuh
@@ -4,11 +4,7 @@
 #ifndef ROTATED_FEATURE_ALIGN_CUDA_KERNEL_CUH
 #define ROTATED_FEATURE_ALIGN_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 template <typename scalar_t>
 __global__ void rotated_feature_align_forward_kernel(
diff --git a/mmcv/ops/csrc/common/cuda/scatter_points_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/scatter_points_cuda_kernel.cuh
index af5b9f67b..a6710331c 100644
--- a/mmcv/ops/csrc/common/cuda/scatter_points_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/scatter_points_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef SCATTER_POINTS_CUDA_KERNEL_CUH
 #define SCATTER_POINTS_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t;
 int const maxGridDim = 50000;
diff --git a/mmcv/ops/csrc/common/cuda/sigmoid_focal_loss_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/sigmoid_focal_loss_cuda_kernel.cuh
index 1eb5f8fcc..d13358952 100644
--- a/mmcv/ops/csrc/common/cuda/sigmoid_focal_loss_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/sigmoid_focal_loss_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef SIGMOID_FOCAL_LOSS_CUDA_KERNEL_CUH
 #define SIGMOID_FOCAL_LOSS_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 template <typename T>
 __global__ void sigmoid_focal_loss_forward_cuda_kernel(
diff --git a/mmcv/ops/csrc/common/cuda/softmax_focal_loss_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/softmax_focal_loss_cuda_kernel.cuh
index 631b2c617..64299b9b6 100644
--- a/mmcv/ops/csrc/common/cuda/softmax_focal_loss_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/softmax_focal_loss_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef SOFTMAX_FOCAL_LOSS_CUDA_KERNEL_CUH
 #define SOFTMAX_FOCAL_LOSS_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 template <typename T>
 __global__ void softmax_focal_loss_forward_cuda_kernel(
diff --git a/mmcv/ops/csrc/common/cuda/stack_ball_query_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/stack_ball_query_cuda_kernel.cuh
index 06caefa18..3d304bd52 100644
--- a/mmcv/ops/csrc/common/cuda/stack_ball_query_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/stack_ball_query_cuda_kernel.cuh
@@ -4,11 +4,7 @@
 #ifndef STACK_BALL_QUERY_CUDA_KERNEL_CUH
 #define STACK_BALL_QUERY_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 template <typename T>
 __global__ void stack_ball_query_forward_cuda_kernel(
diff --git a/mmcv/ops/csrc/common/cuda/stack_group_points_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/stack_group_points_cuda_kernel.cuh
index 4ef3663d0..6a79c37a5 100644
--- a/mmcv/ops/csrc/common/cuda/stack_group_points_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/stack_group_points_cuda_kernel.cuh
@@ -3,12 +3,9 @@
 // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points_gpu.cu
 #ifndef STACK_GROUP_POINTS_CUDA_KERNEL_CUH
 #define STACK_GROUP_POINTS_CUDA_KERNEL_CUH
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
-#include "pytorch_cuda_helper.hpp"
-#endif
 #include <stdio.h>
+
+#include "pytorch_cuda_helper.hpp"
 template <typename T>
 __global__ void stack_group_points_forward_cuda_kernel(
     int b, int c, int m, int nsample, const T *features,
diff --git a/mmcv/ops/csrc/common/cuda/sync_bn_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/sync_bn_cuda_kernel.cuh
index 4ec6a4668..e16e637cd 100644
--- a/mmcv/ops/csrc/common/cuda/sync_bn_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/sync_bn_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef SYNCBN_CUDA_KERNEL_CUH
 #define SYNCBN_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 template <typename T>
 __global__ void sync_bn_forward_mean_cuda_kernel(const T *input, float *mean,
diff --git a/mmcv/ops/csrc/common/cuda/three_interpolate_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/three_interpolate_cuda_kernel.cuh
index 971b496e5..41ce65b9c 100644
--- a/mmcv/ops/csrc/common/cuda/three_interpolate_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/three_interpolate_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef THREE_INTERPOLATE_CUDA_KERNEL_CUH
 #define THREE_INTERPOLATE_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 template <typename T>
 __global__ void three_interpolate_forward_cuda_kernel(
diff --git a/mmcv/ops/csrc/common/cuda/three_nn_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/three_nn_cuda_kernel.cuh
index 15434121b..e31b6a0de 100644
--- a/mmcv/ops/csrc/common/cuda/three_nn_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/three_nn_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef THREE_NN_CUDA_KERNEL_CUH
 #define THREE_NN_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 template <typename T>
 __global__ void three_nn_forward_cuda_kernel(int b, int n, int m,
diff --git a/mmcv/ops/csrc/common/cuda/tin_shift_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/tin_shift_cuda_kernel.cuh
index 4d1159a51..8b7112dab 100644
--- a/mmcv/ops/csrc/common/cuda/tin_shift_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/tin_shift_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef TIN_SHIFT_CUDA_KERNEL_CUH
 #define TIN_SHIFT_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 template <typename T>
 __global__ void tin_shift_forward_cuda_kernel(
diff --git a/mmcv/ops/csrc/common/cuda/voxelization_cuda_kernel.cuh b/mmcv/ops/csrc/common/cuda/voxelization_cuda_kernel.cuh
index 021b488d8..b4cd1644e 100644
--- a/mmcv/ops/csrc/common/cuda/voxelization_cuda_kernel.cuh
+++ b/mmcv/ops/csrc/common/cuda/voxelization_cuda_kernel.cuh
@@ -2,11 +2,7 @@
 #ifndef VOXELIZATION_CUDA_KERNEL_CUH
 #define VOXELIZATION_CUDA_KERNEL_CUH
 
-#ifdef MMCV_USE_PARROTS
-#include "parrots_cuda_helper.hpp"
-#else
 #include "pytorch_cuda_helper.hpp"
-#endif
 
 typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t;
 
diff --git a/mmcv/ops/csrc/common/parrots_cpp_helper.hpp b/mmcv/ops/csrc/common/parrots_cpp_helper.hpp
deleted file mode 100644
index 72701890d..000000000
--- a/mmcv/ops/csrc/common/parrots_cpp_helper.hpp
+++ /dev/null
@@ -1,40 +0,0 @@
-#ifndef PARROTS_CPP_HELPER
-#define PARROTS_CPP_HELPER
-#include <parrots/darray/darraymath.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/darraylite.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-#include <vector>
-
-using namespace parrots;
-
-#define PARROTS_PRIVATE_CASE_TYPE(prim_type, type, ...) \
-  case prim_type: {                                     \
-    using scalar_t = type;                              \
-    return __VA_ARGS__();                               \
-  }
-
-#define PARROTS_DISPATCH_FLOATING_TYPES(TYPE, ...)                  \
-  [&] {                                                             \
-    const auto& the_type = TYPE;                                    \
-    switch (the_type) {                                             \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__) \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__)  \
-      default:                                                      \
-        PARROTS_NOTSUPPORTED;                                       \
-    }                                                               \
-  }()
-
-#define PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(TYPE, ...)          \
-  [&] {                                                              \
-    const auto& the_type = TYPE;                                     \
-    switch (the_type) {                                              \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__)  \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__)   \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float16, float16, __VA_ARGS__) \
-      default:                                                       \
-        PARROTS_NOTSUPPORTED;                                        \
-    }                                                                \
-  }()
-
-#endif  // PARROTS_CPP_HELPER
diff --git a/mmcv/ops/csrc/common/parrots_cuda_helper.hpp b/mmcv/ops/csrc/common/parrots_cuda_helper.hpp
deleted file mode 100644
index 539009c3f..000000000
--- a/mmcv/ops/csrc/common/parrots_cuda_helper.hpp
+++ /dev/null
@@ -1,111 +0,0 @@
-#ifndef PARROTS_CUDA_HELPER
-#define PARROTS_CUDA_HELPER
-
-#include <cuda.h>
-#include <float.h>
-
-#include <parrots/darray/darraymath.hpp>
-#include <parrots/darray/mathfunctions.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/darrayutil.hpp>
-#include <parrots/foundation/exceptions.hpp>
-#include <parrots/foundation/float16.hpp>
-#include <parrots/foundation/mathfunction.hpp>
-
-#include "common_cuda_helper.hpp"
-#include "parrots_cudawarpfunction.cuh"
-
-using namespace parrots;
-using phalf = float16;
-
-#define __PHALF(x) (x.y)
-
-#define PARROTS_CUDA_CHECK(exp)                         \
-  do {                                                  \
-    cudaError_t err = exp;                              \
-    if (err != cudaSuccess) {                           \
-      fprintf(stderr, "cudaCheckError() failed : %s\n", \
-              cudaGetErrorString(err));                 \
-      exit(-1);                                         \
-    }                                                   \
-  } while (0)
-
-#define PARROTS_PRIVATE_CASE_TYPE(prim_type, type, ...) \
-  case prim_type: {                                     \
-    using scalar_t = type;                              \
-    return __VA_ARGS__();                               \
-  }
-
-#define PARROTS_DISPATCH_FLOATING_TYPES(TYPE, ...)                  \
-  [&] {                                                             \
-    const auto& the_type = TYPE;                                    \
-    switch (the_type) {                                             \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__) \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__)  \
-      default:                                                      \
-        PARROTS_NOTSUPPORTED;                                       \
-    }                                                               \
-  }()
-
-#define PARROTS_DISPATCH_FLOATING_TYPES_AND_HALF(TYPE, ...)          \
-  [&] {                                                              \
-    const auto& the_type = TYPE;                                     \
-    switch (the_type) {                                              \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float64, double, __VA_ARGS__)  \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float32, float, __VA_ARGS__)   \
-      PARROTS_PRIVATE_CASE_TYPE(Prim::Float16, float16, __VA_ARGS__) \
-      default:                                                       \
-        PARROTS_NOTSUPPORTED;                                        \
-    }                                                                \
-  }()
-
-/** atomicAdd **/
-#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 600
-
-static __inline__ __device__ double atomicAdd(double* address, double val) {
-  unsigned long long int* address_as_ull = (unsigned long long int*)address;
-  unsigned long long int old = *address_as_ull, assumed;
-  if (val == 0.0) return __longlong_as_double(old);
-  do {
-    assumed = old;
-    old = atomicCAS(address_as_ull, assumed,
-                    __double_as_longlong(val + __longlong_as_double(assumed)));
-  } while (assumed != old);
-  return __longlong_as_double(old);
-}
-
-#endif
-
-static __inline__ __device__ float16 atomicAdd(float16* address, float16 val) {
-  unsigned int* aligned =
-      (unsigned int*)((size_t)address - ((size_t)address & 2));
-  unsigned int old = *aligned;
-  unsigned int assumed;
-  unsigned short old_as_us;
-  do {
-    assumed = old;
-    old_as_us =
-        (unsigned short)((size_t)address & 2 ? old >> 16 : old & 0xffff);
-
-#if __CUDACC_VER_MAJOR__ >= 9
-    float16 tmp;
-    tmp.x = old_as_us;
-    float16 sum = tmp + val;
-    unsigned short sum_as_us = sum.x;
-//         half sum = __float2half_rn(__half2float(__ushort_as_half(old_as_us))
-//         + (float)(val)); unsigned short sum_as_us = __half_as_ushort(sum);
-#else
-    unsigned short sum_as_us =
-        __float2half_rn(__half2float(old_as_us) + (float)(val));
-#endif
-
-    unsigned int sum_as_ui = (size_t)address & 2
-                                 ? (sum_as_us << 16) | (old & 0xffff)
-                                 : (old & 0xffff0000) | sum_as_us;
-    old = atomicCAS(aligned, assumed, sum_as_ui);
-  } while (assumed != old);
-  //__half_raw raw = {old_as_us};
-  // return float16(raw);
-  return *reinterpret_cast<float16*>(&old_as_us);
-}
-#endif  // PARROTS_CUDA_HELPER
diff --git a/mmcv/ops/csrc/parrots/active_rotated_filter.cpp b/mmcv/ops/csrc/parrots/active_rotated_filter.cpp
deleted file mode 100644
index e1ead1f8e..000000000
--- a/mmcv/ops/csrc/parrots/active_rotated_filter.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-// Modified from
-// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/orn/src/ActiveRotatingFilter.h
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void active_rotated_filter_forward_impl(const Tensor input,
-                                        const Tensor indices, Tensor output) {
-  DISPATCH_DEVICE_IMPL(active_rotated_filter_forward_impl, input, indices,
-                       output);
-}
-
-void active_rotated_filter_backward_impl(const Tensor grad_out,
-                                         const Tensor indices, Tensor grad_in) {
-  DISPATCH_DEVICE_IMPL(active_rotated_filter_backward_impl, grad_out, indices,
-                       grad_in);
-}
-
-void active_rotated_filter_forward(const Tensor input, const Tensor indices,
-                                   Tensor output) {
-  active_rotated_filter_forward_impl(input, indices, output);
-}
-
-void active_rotated_filter_backward(const Tensor grad_out, const Tensor indices,
-                                    Tensor grad_in) {
-  active_rotated_filter_backward_impl(grad_out, indices, grad_in);
-}
diff --git a/mmcv/ops/csrc/parrots/active_rotated_filter_parrots.cpp b/mmcv/ops/csrc/parrots/active_rotated_filter_parrots.cpp
deleted file mode 100644
index 9097f7e0a..000000000
--- a/mmcv/ops/csrc/parrots/active_rotated_filter_parrots.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "active_rotated_filter_pytorch.h"
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void active_rotated_filter_forward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  auto input = buildATensor(ctx, ins[0]);
-  auto indices = buildATensor(ctx, ins[1]);
-  auto output = buildATensor(ctx, outs[0]);
-  active_rotated_filter_forward(input, indices, output);
-}
-
-void active_rotated_filter_backward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  auto grad_out = buildATensor(ctx, ins[0]);
-  auto indices = buildATensor(ctx, ins[1]);
-  auto grad_in = buildATensor(ctx, outs[0]);
-  active_rotated_filter_backward(grad_out, indices, grad_in);
-}
-#endif
-
-void active_rotated_filter_forward_cpu_parrots(
-    HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  auto input = buildATensor(ctx, ins[0]);
-  auto indices = buildATensor(ctx, ins[1]);
-  auto output = buildATensor(ctx, outs[0]);
-  active_rotated_filter_forward(input, indices, output);
-}
-
-void active_rotated_filter_backward_cpu_parrots(
-    HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  auto grad_out = buildATensor(ctx, ins[0]);
-  auto indices = buildATensor(ctx, ins[1]);
-  auto grad_in = buildATensor(ctx, outs[0]);
-  active_rotated_filter_backward(grad_out, indices, grad_in);
-}
-
-PARROTS_EXTENSION_REGISTER(active_rotated_filter_forward)
-    .input(2)
-    .output(1)
-    .apply(active_rotated_filter_forward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(active_rotated_filter_forward_cuda_parrots)
-#endif
-    .done();
-
-PARROTS_EXTENSION_REGISTER(active_rotated_filter_backward)
-    .input(2)
-    .output(1)
-    .apply(active_rotated_filter_backward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(active_rotated_filter_backward_cuda_parrots)
-#endif
-    .done();
diff --git a/mmcv/ops/csrc/parrots/active_rotated_filter_pytorch.h b/mmcv/ops/csrc/parrots/active_rotated_filter_pytorch.h
deleted file mode 100644
index 9a4d2ce96..000000000
--- a/mmcv/ops/csrc/parrots/active_rotated_filter_pytorch.h
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef ACTIVE_ROTATED_FILTER_PYTORCH_H
-#define ACTIVE_ROTATED_FILTER_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void active_rotated_filter_forward(const Tensor input, const Tensor indices,
-                                   Tensor output);
-
-void active_rotated_filter_backward(const Tensor grad_out, const Tensor indices,
-                                    Tensor grad_in);
-
-#endif  // ACTIVE_ROTATED_FILTER_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/assign_score_withk.cpp b/mmcv/ops/csrc/parrots/assign_score_withk.cpp
deleted file mode 100644
index 907627718..000000000
--- a/mmcv/ops/csrc/parrots/assign_score_withk.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-// Modified from
-// https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/paconv_lib/src/gpu
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void assign_score_withk_forward_impl(int B, int N0, int N1, int M, int K, int O,
-                                     int aggregate, const Tensor& points,
-                                     const Tensor& centers,
-                                     const Tensor& scores,
-                                     const Tensor& knn_idx, Tensor& output) {
-  DISPATCH_DEVICE_IMPL(assign_score_withk_forward_impl, B, N0, N1, M, K, O,
-                       aggregate, points, centers, scores, knn_idx, output);
-}
-
-void assign_score_withk_backward_impl(
-    int B, int N0, int N1, int M, int K, int O, int aggregate,
-    const Tensor& grad_out, const Tensor& points, const Tensor& centers,
-    const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
-    Tensor& grad_centers, Tensor& grad_scores) {
-  DISPATCH_DEVICE_IMPL(assign_score_withk_backward_impl, B, N0, N1, M, K, O,
-                       aggregate, grad_out, points, centers, scores, knn_idx,
-                       grad_points, grad_centers, grad_scores);
-}
-
-void assign_score_withk_forward(const Tensor& points, const Tensor& centers,
-                                const Tensor& scores, const Tensor& knn_idx,
-                                Tensor& output, int B, int N0, int N1, int M,
-                                int K, int O, int aggregate) {
-  assign_score_withk_forward_impl(B, N0, N1, M, K, O, aggregate, points,
-                                  centers, scores, knn_idx, output);
-}
-
-void assign_score_withk_backward(const Tensor& grad_out, const Tensor& points,
-                                 const Tensor& centers, const Tensor& scores,
-                                 const Tensor& knn_idx, Tensor& grad_points,
-                                 Tensor& grad_centers, Tensor& grad_scores,
-                                 int B, int N0, int N1, int M, int K, int O,
-                                 int aggregate) {
-  assign_score_withk_backward_impl(B, N0, N1, M, K, O, aggregate, grad_out,
-                                   points, centers, scores, knn_idx,
-                                   grad_points, grad_centers, grad_scores);
-}
diff --git a/mmcv/ops/csrc/parrots/assign_score_withk_parrots.cpp b/mmcv/ops/csrc/parrots/assign_score_withk_parrots.cpp
deleted file mode 100644
index 5729c7163..000000000
--- a/mmcv/ops/csrc/parrots/assign_score_withk_parrots.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "assign_score_withk_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void assign_score_withk_forward_cuda_parrots(CudaContext& ctx,
-                                             const SSElement& attr,
-                                             const OperatorBase::in_list_t& ins,
-                                             OperatorBase::out_list_t& outs) {
-  int B, N0, N1, M, K, O, aggregate;
-  SSAttrs(attr)
-      .get<int>("B", B)
-      .get<int>("N0", N0)
-      .get<int>("N1", N1)
-      .get<int>("M", M)
-      .get<int>("K", K)
-      .get<int>("O", O)
-      .get<int>("aggregate", aggregate)
-      .done();
-
-  const auto& points = buildATensor(ctx, ins[0]);
-  const auto& centers = buildATensor(ctx, ins[1]);
-  const auto& scores = buildATensor(ctx, ins[2]);
-  const auto& knn_idx = buildATensor(ctx, ins[3]);
-
-  auto output = buildATensor(ctx, outs[0]);
-  assign_score_withk_forward(points, centers, scores, knn_idx, output, B, N0,
-                             N1, M, K, O, aggregate);
-}
-
-void assign_score_withk_backward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  int B, N0, N1, M, K, O, aggregate;
-  SSAttrs(attr)
-      .get<int>("B", B)
-      .get<int>("N0", N0)
-      .get<int>("N1", N1)
-      .get<int>("M", M)
-      .get<int>("K", K)
-      .get<int>("O", O)
-      .get<int>("aggregate", aggregate)
-      .done();
-
-  const auto& grad_out = buildATensor(ctx, ins[0]);
-  const auto& points = buildATensor(ctx, ins[1]);
-  const auto& centers = buildATensor(ctx, ins[2]);
-  const auto& scores = buildATensor(ctx, ins[3]);
-  const auto& knn_idx = buildATensor(ctx, ins[4]);
-
-  auto grad_points = buildATensor(ctx, outs[0]);
-  auto grad_centers = buildATensor(ctx, outs[1]);
-  auto grad_scores = buildATensor(ctx, outs[2]);
-  assign_score_withk_backward(grad_out, points, centers, scores, knn_idx,
-                              grad_points, grad_centers, grad_scores, B, N0, N1,
-                              M, K, O, aggregate);
-}
-
-PARROTS_EXTENSION_REGISTER(assign_score_withk_forward)
-    .attr("B")
-    .attr("N0")
-    .attr("N1")
-    .attr("M")
-    .attr("K")
-    .attr("O")
-    .attr("aggregate")
-    .input(4)
-    .output(1)
-    .apply(assign_score_withk_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(assign_score_withk_backward)
-    .attr("B")
-    .attr("N0")
-    .attr("N1")
-    .attr("M")
-    .attr("K")
-    .attr("O")
-    .attr("aggregate")
-    .input(5)
-    .output(3)
-    .apply(assign_score_withk_backward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/assign_score_withk_pytorch.h b/mmcv/ops/csrc/parrots/assign_score_withk_pytorch.h
deleted file mode 100644
index 660594fee..000000000
--- a/mmcv/ops/csrc/parrots/assign_score_withk_pytorch.h
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef ASSIGN_SCORE_WITHK_PYTORCH_H
-#define ASSIGN_SCORE_WITHK_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void assign_score_withk_forward(const Tensor& points, const Tensor& centers,
-                                const Tensor& scores, const Tensor& knn_idx,
-                                Tensor& output, int B, int N0, int N1, int M,
-                                int K, int O, int aggregate);
-
-void assign_score_withk_backward(const Tensor& grad_out, const Tensor& points,
-                                 const Tensor& centers, const Tensor& scores,
-                                 const Tensor& knn_idx, Tensor& grad_points,
-                                 Tensor& grad_centers, Tensor& grad_scores,
-                                 int B, int N0, int N1, int M, int K, int O,
-                                 int aggregate);
-
-#endif  // ASSIGN_SCORE_WITHK_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/ball_query._parrots.cpp b/mmcv/ops/csrc/parrots/ball_query._parrots.cpp
deleted file mode 100644
index 01ab9739b..000000000
--- a/mmcv/ops/csrc/parrots/ball_query._parrots.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "ball_query_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void ball_query_parrots(CudaContext& ctx, const SSElement& attr,
-                        const OperatorBase::in_list_t& ins,
-                        OperatorBase::out_list_t& outs) {
-  int b, n, m, nsample;
-  float min_radius, max_radius;
-  SSAttrs(attr)
-      .get<int>("b", b)
-      .get<int>("n", n)
-      .get<int>("m", m)
-      .get<int>("nsample", nsample)
-      .get<float>("min_radius", min_radius)
-      .get<float>("max_radius", max_radius)
-      .done();
-
-  const auto& center_xyz = buildATensor(ctx, ins[0]);
-  const auto& xyz = buildATensor(ctx, ins[1]);
-  auto idx = buildATensor(ctx, outs[0]);
-  ball_query_forward(center_xyz, xyz, idx, b, n, m, min_radius, max_radius,
-                     nsample);
-}
-
-PARROTS_EXTENSION_REGISTER(ball_query_forward)
-    .attr("b")
-    .attr("n")
-    .attr("m")
-    .attr("nsample")
-    .attr("min_radius")
-    .attr("max_radius")
-    .input(2)
-    .output(1)
-    .apply(ball_query_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/ball_query.cpp b/mmcv/ops/csrc/parrots/ball_query.cpp
deleted file mode 100644
index 1c9e7a207..000000000
--- a/mmcv/ops/csrc/parrots/ball_query.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-// Modified from
-// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query.cpp
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void ball_query_forward_impl(int b, int n, int m, float min_radius,
-                             float max_radius, int nsample,
-                             const Tensor new_xyz, const Tensor xyz,
-                             Tensor idx) {
-  DISPATCH_DEVICE_IMPL(ball_query_forward_impl, b, n, m, min_radius, max_radius,
-                       nsample, new_xyz, xyz, idx);
-}
-
-void ball_query_forward(Tensor new_xyz_tensor, Tensor xyz_tensor,
-                        Tensor idx_tensor, int b, int n, int m,
-                        float min_radius, float max_radius, int nsample) {
-  ball_query_forward_impl(b, n, m, min_radius, max_radius, nsample,
-                          new_xyz_tensor, xyz_tensor, idx_tensor);
-}
diff --git a/mmcv/ops/csrc/parrots/ball_query_pytorch.h b/mmcv/ops/csrc/parrots/ball_query_pytorch.h
deleted file mode 100644
index 70026f315..000000000
--- a/mmcv/ops/csrc/parrots/ball_query_pytorch.h
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef BALL_QUERY_PYTORCH_H
-#define BALL_QUERY_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void ball_query_forward(const Tensor new_xyz, const Tensor xyz, Tensor idx,
-                        int b, int n, int m, float min_radius, float max_radius,
-                        int nsample);
-
-#endif  // BALL_QUERY_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/bbox_overlaps.cpp b/mmcv/ops/csrc/parrots/bbox_overlaps.cpp
deleted file mode 100644
index 187216fb0..000000000
--- a/mmcv/ops/csrc/parrots/bbox_overlaps.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void bbox_overlaps_impl(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
-                        const int mode, const bool aligned, const int offset) {
-  DISPATCH_DEVICE_IMPL(bbox_overlaps_impl, bboxes1, bboxes2, ious, mode,
-                       aligned, offset);
-}
-
-void bbox_overlaps(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
-                   const int mode, const bool aligned, const int offset) {
-  bbox_overlaps_impl(bboxes1, bboxes2, ious, mode, aligned, offset);
-}
diff --git a/mmcv/ops/csrc/parrots/bbox_overlaps_parrots.cpp b/mmcv/ops/csrc/parrots/bbox_overlaps_parrots.cpp
deleted file mode 100644
index 5f6264d3c..000000000
--- a/mmcv/ops/csrc/parrots/bbox_overlaps_parrots.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "bbox_overlaps_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-/*
- * void bbox_overlaps_cuda(const Tensor bboxes1, const Tensor bboxes2, Tensor
- * ious, const int mode, const bool aligned, const int offset);
- */
-void bbox_overlaps_parrots(CudaContext& ctx, const SSElement& attr,
-                           const OperatorBase::in_list_t& ins,
-                           OperatorBase::out_list_t& outs) {
-  int mode, offset;
-  bool aligned;
-  SSAttrs(attr)
-      .get<int>("mode", mode)
-      .get<bool>("aligned", aligned)
-      .get<int>("offset", offset)
-      .done();
-
-  const auto& bboxes1 = buildATensor(ctx, ins[0]);
-  const auto& bboxes2 = buildATensor(ctx, ins[1]);
-  auto ious = buildATensor(ctx, outs[0]);
-  bbox_overlaps_cuda(bboxes1, bboxes2, ious, mode, aligned, offset);
-}
-
-PARROTS_EXTENSION_REGISTER(bbox_overlaps)
-    .attr("mode")
-    .attr("aligned")
-    .attr("offset")
-    .input(2)
-    .output(1)
-    .apply(bbox_overlaps_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/bbox_overlaps_pytorch.h b/mmcv/ops/csrc/parrots/bbox_overlaps_pytorch.h
deleted file mode 100644
index 4f68aa339..000000000
--- a/mmcv/ops/csrc/parrots/bbox_overlaps_pytorch.h
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef BBOX_OVERLAPS_PYTORCH_H
-#define BBOX_OVERLAPS_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void bbox_overlaps_cuda(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
-                        const int mode, const bool aligned, const int offset);
-
-#endif  // BBOX_OVERLAPS_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/border_align.cpp b/mmcv/ops/csrc/parrots/border_align.cpp
deleted file mode 100644
index 565de6899..000000000
--- a/mmcv/ops/csrc/parrots/border_align.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void border_align_forward_impl(const Tensor &input, const Tensor &boxes,
-                               Tensor output, Tensor argmax_idx,
-                               const int pool_size) {
-  DISPATCH_DEVICE_IMPL(border_align_forward_impl, input, boxes, output,
-                       argmax_idx, pool_size);
-}
-
-void border_align_backward_impl(const Tensor &grad_output, const Tensor &boxes,
-                                const Tensor &argmax_idx, Tensor grad_input,
-                                const int pool_size) {
-  DISPATCH_DEVICE_IMPL(border_align_backward_impl, grad_output, boxes,
-                       argmax_idx, grad_input, pool_size);
-}
-
-void border_align_forward(const Tensor &input, const Tensor &boxes,
-                          Tensor output, Tensor argmax_idx,
-                          const int pool_size) {
-  border_align_forward_impl(input, boxes, output, argmax_idx, pool_size);
-}
-
-void border_align_backward(const Tensor &grad_output, const Tensor &boxes,
-                           const Tensor &argmax_idx, Tensor grad_input,
-                           const int pool_size) {
-  border_align_backward_impl(grad_output, boxes, argmax_idx, grad_input,
-                             pool_size);
-}
diff --git a/mmcv/ops/csrc/parrots/border_align_parrots.cpp b/mmcv/ops/csrc/parrots/border_align_parrots.cpp
deleted file mode 100644
index 8c3bea58c..000000000
--- a/mmcv/ops/csrc/parrots/border_align_parrots.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "border_align_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void border_align_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                       const OperatorBase::in_list_t& ins,
-                                       OperatorBase::out_list_t& outs) {
-  int pool_size;
-  SSAttrs(attr).get<int>("pool_size", pool_size).done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& boxes = buildATensor(ctx, ins[1]);
-
-  auto output = buildATensor(ctx, outs[0]);
-  auto argmax_idx = buildATensor(ctx, outs[1]);
-  border_align_forward_cuda(input, boxes, output, argmax_idx, pool_size);
-}
-
-void border_align_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                        const OperatorBase::in_list_t& ins,
-                                        OperatorBase::out_list_t& outs) {
-  int pool_size;
-  SSAttrs(attr).get<int>("pool_size", pool_size).done();
-
-  const auto& top_grad = buildATensor(ctx, ins[0]);
-  const auto& boxes = buildATensor(ctx, ins[1]);
-  const auto& argmax_idx = buildATensor(ctx, ins[2]);
-
-  auto bottom_grad = buildATensor(ctx, outs[0]);
-  border_align_backward_cuda(top_grad, boxes, argmax_idx, bottom_grad,
-                             pool_size);
-}
-
-PARROTS_EXTENSION_REGISTER(border_align_forward)
-    .attr("pool_size")
-    .input(2)
-    .output(2)
-    .apply(border_align_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(border_align_backward)
-    .attr("pool_size")
-    .input(3)
-    .output(1)
-    .apply(border_align_backward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/border_align_pytorch.h b/mmcv/ops/csrc/parrots/border_align_pytorch.h
deleted file mode 100644
index cb031e572..000000000
--- a/mmcv/ops/csrc/parrots/border_align_pytorch.h
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef BORDER_ALIGN_PYTORCH_H
-#define BORDER_ALIGN_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-#ifdef MMCV_WITH_CUDA
-void border_align_forward_cuda(const Tensor &input, const Tensor &boxes,
-                               Tensor output, Tensor argmax_idx,
-                               const int pool_size);
-
-void border_align_backward_cuda(const Tensor &grad_output, const Tensor &boxes,
-                                const Tensor &argmax_idx, Tensor grad_input,
-                                const int pool_size);
-#endif
-
-#endif  // BORDER_ALIGN_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/box_iou_rotated.cpp b/mmcv/ops/csrc/parrots/box_iou_rotated.cpp
deleted file mode 100644
index a2a4e0953..000000000
--- a/mmcv/ops/csrc/parrots/box_iou_rotated.cpp
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-// modified from
-// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void box_iou_rotated_impl(const Tensor boxes1, const Tensor boxes2, Tensor ious,
-                          const int mode_flag, const bool aligned) {
-  DISPATCH_DEVICE_IMPL(box_iou_rotated_impl, boxes1, boxes2, ious, mode_flag,
-                       aligned);
-}
-
-// Interface for Python
-// inline is needed to prevent multiple function definitions when this header is
-// included by different cpps
-void box_iou_rotated(const Tensor boxes1, const Tensor boxes2, Tensor ious,
-                     const int mode_flag, const bool aligned) {
-  box_iou_rotated_impl(boxes1, boxes2, ious, mode_flag, aligned);
-}
diff --git a/mmcv/ops/csrc/parrots/box_iou_rotated_parrots.cpp b/mmcv/ops/csrc/parrots/box_iou_rotated_parrots.cpp
deleted file mode 100644
index a90d64045..000000000
--- a/mmcv/ops/csrc/parrots/box_iou_rotated_parrots.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "box_iou_rotated_pytorch.h"
-
-using namespace parrots;
-
-/*
- * void box_iou_rotated_cpu(const Tensor boxes1, const Tensor boxes2, Tensor
- * ious, const int mode_flag, const bool aligned);
- */
-void box_iou_rotated_cpu_parrots(HostContext& ctx, const SSElement& attr,
-                                 const OperatorBase::in_list_t& ins,
-                                 OperatorBase::out_list_t& outs) {
-  bool aligned;
-  int mode_flag;
-  SSAttrs(attr)
-      .get<bool>("aligned", aligned)
-      .get<int>("mode_flag", mode_flag)
-      .done();
-
-  const auto& boxes1 = buildATensor(ctx, ins[0]);
-  const auto& boxes2 = buildATensor(ctx, ins[1]);
-  auto ious = buildATensor(ctx, outs[0]);
-  box_iou_rotated_cpu(boxes1, boxes2, ious, mode_flag, aligned);
-}
-
-#ifdef MMCV_WITH_CUDA
-/*
- * void box_iou_rotated_cuda(const Tensor boxes1, const Tensor boxes2, Tensor
- * ious, const int mode_flag, const bool aligned);
- */
-void box_iou_rotated_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                  const OperatorBase::in_list_t& ins,
-                                  OperatorBase::out_list_t& outs) {
-  bool aligned;
-  int mode_flag;
-  SSAttrs(attr)
-      .get<bool>("aligned", aligned)
-      .get<int>("mode_flag", mode_flag)
-      .done();
-
-  const auto& boxes1 = buildATensor(ctx, ins[0]);
-  const auto& boxes2 = buildATensor(ctx, ins[1]);
-  auto ious = buildATensor(ctx, outs[0]);
-  box_iou_rotated_cuda(boxes1, boxes2, ious, mode_flag, aligned);
-}
-#endif
-
-PARROTS_EXTENSION_REGISTER(box_iou_rotated)
-    .attr("aligned")
-    .attr("mode_flag")
-    .input(2)
-    .output(1)
-    .apply(box_iou_rotated_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(box_iou_rotated_cuda_parrots)
-#endif
-    .done();
diff --git a/mmcv/ops/csrc/parrots/box_iou_rotated_pytorch.h b/mmcv/ops/csrc/parrots/box_iou_rotated_pytorch.h
deleted file mode 100644
index afab70318..000000000
--- a/mmcv/ops/csrc/parrots/box_iou_rotated_pytorch.h
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef BOX_IOU_ROTATED_PYTORCH_H
-#define BOX_IOU_ROTATED_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void box_iou_rotated_cpu(const Tensor boxes1, const Tensor boxes2, Tensor ious,
-                         const int mode_flag, const bool aligned);
-
-#ifdef MMCV_WITH_CUDA
-void box_iou_rotated_cuda(const Tensor boxes1, const Tensor boxes2, Tensor ious,
-                          const int mode_flag, const bool aligned);
-#endif
-
-#endif  // BOX_IOU_ROTATED_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/carafe.cpp b/mmcv/ops/csrc/parrots/carafe.cpp
deleted file mode 100644
index a563aed94..000000000
--- a/mmcv/ops/csrc/parrots/carafe.cpp
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void carafe_forward_impl(Tensor features, Tensor masks, Tensor rfeatures,
-                         Tensor routput, Tensor rmasks, Tensor output,
-                         int kernel_size, int group_size, int scale_factor) {
-  DISPATCH_DEVICE_IMPL(carafe_forward_impl, features, masks, rfeatures, routput,
-                       rmasks, output, kernel_size, group_size, scale_factor);
-}
-
-void carafe_backward_impl(Tensor top_grad, Tensor rfeatures, Tensor masks,
-                          Tensor rtop_grad, Tensor rbottom_grad_hs,
-                          Tensor rbottom_grad, Tensor rmask_grad,
-                          Tensor bottom_grad, Tensor mask_grad, int kernel_size,
-                          int group_size, int scale_factor) {
-  DISPATCH_DEVICE_IMPL(carafe_backward_impl, top_grad, rfeatures, masks,
-                       rtop_grad, rbottom_grad_hs, rbottom_grad, rmask_grad,
-                       bottom_grad, mask_grad, kernel_size, group_size,
-                       scale_factor);
-}
-
-void carafe_forward(Tensor features, Tensor masks, Tensor rfeatures,
-                    Tensor routput, Tensor rmasks, Tensor output,
-                    int kernel_size, int group_size, int scale_factor) {
-  carafe_forward_impl(features, masks, rfeatures, routput, rmasks, output,
-                      kernel_size, group_size, scale_factor);
-}
-
-void carafe_backward(Tensor top_grad, Tensor rfeatures, Tensor masks,
-                     Tensor rtop_grad, Tensor rbottom_grad_hs,
-                     Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad,
-                     Tensor mask_grad, int kernel_size, int group_size,
-                     int scale_factor) {
-  carafe_backward_impl(top_grad, rfeatures, masks, rtop_grad, rbottom_grad_hs,
-                       rbottom_grad, rmask_grad, bottom_grad, mask_grad,
-                       kernel_size, group_size, scale_factor);
-}
diff --git a/mmcv/ops/csrc/parrots/carafe_naive.cpp b/mmcv/ops/csrc/parrots/carafe_naive.cpp
deleted file mode 100644
index 6e8917a61..000000000
--- a/mmcv/ops/csrc/parrots/carafe_naive.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void carafe_naive_forward_impl(Tensor features, Tensor masks, Tensor output,
-                               int kernel_size, int group_size,
-                               int scale_factor) {
-  DISPATCH_DEVICE_IMPL(carafe_naive_forward_impl, features, masks, output,
-                       kernel_size, group_size, scale_factor);
-}
-
-void carafe_naive_backward_impl(Tensor top_grad, Tensor features, Tensor masks,
-                                Tensor bottom_grad, Tensor mask_grad,
-                                int kernel_size, int group_size,
-                                int scale_factor) {
-  DISPATCH_DEVICE_IMPL(carafe_naive_backward_impl, top_grad, features, masks,
-                       bottom_grad, mask_grad, kernel_size, group_size,
-                       scale_factor);
-}
-
-void carafe_naive_forward(Tensor features, Tensor masks, Tensor output,
-                          int kernel_size, int group_size, int scale_factor) {
-  carafe_naive_forward_impl(features, masks, output, kernel_size, group_size,
-                            scale_factor);
-}
-
-void carafe_naive_backward(Tensor top_grad, Tensor features, Tensor masks,
-                           Tensor bottom_grad, Tensor mask_grad,
-                           int kernel_size, int group_size, int scale_factor) {
-  carafe_naive_backward_impl(top_grad, features, masks, bottom_grad, mask_grad,
-                             kernel_size, group_size, scale_factor);
-}
diff --git a/mmcv/ops/csrc/parrots/carafe_naive_parrots.cpp b/mmcv/ops/csrc/parrots/carafe_naive_parrots.cpp
deleted file mode 100644
index 9c16a3707..000000000
--- a/mmcv/ops/csrc/parrots/carafe_naive_parrots.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "carafe_naive_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-/*void carafe_naive_forward_cuda(Tensor features, Tensor masks, Tensor output,
- *                                int kernel_size, int group_size,
- *                                int scale_factor)
- */
-void carafe_naive_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                       const OperatorBase::in_list_t& ins,
-                                       OperatorBase::out_list_t& outs) {
-  int kernel_size, group_size, scale_factor;
-  SSAttrs(attr)
-      .get<int>("kernel_size", kernel_size)
-      .get<int>("group_size", group_size)
-      .get<int>("scale_factor", scale_factor)
-      .done();
-
-  const auto& features = buildATensor(ctx, ins[0]);
-  const auto& masks = buildATensor(ctx, ins[1]);
-
-  auto output = buildATensor(ctx, outs[0]);
-  carafe_naive_forward_cuda(features, masks, output, kernel_size, group_size,
-                            scale_factor);
-}
-
-/*void carafe_naive_backward_cuda(Tensor top_grad, Tensor features, Tensor
- * masks, Tensor bottom_grad, Tensor mask_grad, int kernel_size, int group_size,
- *                                int scale_factor);
- */
-void carafe_naive_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                        const OperatorBase::in_list_t& ins,
-                                        OperatorBase::out_list_t& outs) {
-  int kernel_size, group_size, scale_factor;
-  SSAttrs(attr)
-      .get<int>("kernel_size", kernel_size)
-      .get<int>("group_size", group_size)
-      .get<int>("scale_factor", scale_factor)
-      .done();
-
-  const auto& top_grad = buildATensor(ctx, ins[0]);
-  const auto& features = buildATensor(ctx, ins[1]);
-  const auto& masks = buildATensor(ctx, ins[2]);
-
-  auto bottom_grad = buildATensor(ctx, outs[0]);
-  auto mask_grad = buildATensor(ctx, outs[1]);
-  carafe_naive_backward_cuda(top_grad, features, masks, bottom_grad, mask_grad,
-                             kernel_size, group_size, scale_factor);
-}
-
-PARROTS_EXTENSION_REGISTER(carafe_naive_forward)
-    .attr("kernel_size")
-    .attr("group_size")
-    .attr("scale_factor")
-    .input(2)
-    .output(1)
-    .apply(carafe_naive_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(carafe_naive_backward)
-    .attr("kernel_size")
-    .attr("group_size")
-    .attr("scale_factor")
-    .input(3)
-    .output(2)
-    .apply(carafe_naive_backward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/carafe_naive_pytorch.h b/mmcv/ops/csrc/parrots/carafe_naive_pytorch.h
deleted file mode 100644
index 6df9b88c2..000000000
--- a/mmcv/ops/csrc/parrots/carafe_naive_pytorch.h
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef CARAFE_NAIVE_PYTORCH_H
-#define CARAFE_NAIVE_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void carafe_naive_forward_cuda(Tensor features, Tensor masks, Tensor output,
-                               int kernel_size, int group_size,
-                               int scale_factor);
-
-void carafe_naive_backward_cuda(Tensor top_grad, Tensor features, Tensor masks,
-                                Tensor bottom_grad, Tensor mask_grad,
-                                int kernel_size, int group_size,
-                                int scale_factor);
-#endif  // CARAFE_NAIVE_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/carafe_parrots.cpp b/mmcv/ops/csrc/parrots/carafe_parrots.cpp
deleted file mode 100644
index e99f59ef2..000000000
--- a/mmcv/ops/csrc/parrots/carafe_parrots.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "carafe_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-/*
- * void carafe_forward_cuda(Tensor features, Tensor masks, Tensor rfeatures,
- *                          Tensor routput, Tensor rmasks, Tensor output,
- *                          int kernel_size, int group_size, int scale_factor);
- */
-void carafe_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                 const OperatorBase::in_list_t& ins,
-                                 OperatorBase::out_list_t& outs) {
-  int kernel_size, group_size, scale_factor;
-  SSAttrs(attr)
-      .get<int>("kernel_size", kernel_size)
-      .get<int>("group_size", group_size)
-      .get<int>("scale_factor", scale_factor)
-      .done();
-
-  const auto& features = buildATensor(ctx, ins[0]);
-  const auto& masks = buildATensor(ctx, ins[1]);
-
-  auto rfeatures = buildATensor(ctx, outs[0]);
-  auto routput = buildATensor(ctx, outs[1]);
-  auto rmasks = buildATensor(ctx, outs[2]);
-  auto output = buildATensor(ctx, outs[3]);
-
-  carafe_forward_cuda(features, masks, rfeatures, routput, rmasks, output,
-                      kernel_size, group_size, scale_factor);
-}
-
-/*
- * void carafe_backward_cuda(Tensor top_grad, Tensor rfeatures, Tensor masks,
- *                           Tensor rtop_grad, Tensor rbottom_grad_hs,
- *                           Tensor rbottom_grad, Tensor rmask_grad,
- *                           Tensor bottom_grad, Tensor mask_grad, int
- * kernel_size, int group_size, int scale_factor);
- */
-void carafe_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                  const OperatorBase::in_list_t& ins,
-                                  OperatorBase::out_list_t& outs) {
-  int kernel_size, group_size, scale_factor;
-  SSAttrs(attr)
-      .get<int>("kernel_size", kernel_size)
-      .get<int>("group_size", group_size)
-      .get<int>("scale_factor", scale_factor)
-      .done();
-
-  const auto& top_grad = buildATensor(ctx, ins[0]);
-  const auto& rfeatures = buildATensor(ctx, ins[1]);
-  const auto& masks = buildATensor(ctx, ins[2]);
-
-  auto rtop_grad = buildATensor(ctx, outs[0]);
-  auto rbottom_grad_hs = buildATensor(ctx, outs[1]);
-  auto rbottom_grad = buildATensor(ctx, outs[2]);
-  auto rmask_grad = buildATensor(ctx, outs[3]);
-  auto bottom_grad = buildATensor(ctx, outs[4]);
-  auto mask_grad = buildATensor(ctx, outs[5]);
-
-  carafe_backward_cuda(top_grad, rfeatures, masks, rtop_grad, rbottom_grad_hs,
-                       rbottom_grad, rmask_grad, bottom_grad, mask_grad,
-                       kernel_size, group_size, scale_factor);
-}
-
-PARROTS_EXTENSION_REGISTER(carafe_forward)
-    .attr("kernel_size")
-    .attr("group_size")
-    .attr("scale_factor")
-    .input(2)
-    .output(4)
-    .apply(carafe_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(carafe_backward)
-    .attr("kernel_size")
-    .attr("group_size")
-    .attr("scale_factor")
-    .input(3)
-    .output(6)
-    .apply(carafe_backward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/carafe_pytorch.h b/mmcv/ops/csrc/parrots/carafe_pytorch.h
deleted file mode 100644
index 2b94d44d3..000000000
--- a/mmcv/ops/csrc/parrots/carafe_pytorch.h
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef CARAFE_PYTORCH_H
-#define CARAFE_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void carafe_forward_cuda(Tensor features, Tensor masks, Tensor rfeatures,
-                         Tensor routput, Tensor rmasks, Tensor output,
-                         int kernel_size, int group_size, int scale_factor);
-
-void carafe_backward_cuda(Tensor top_grad, Tensor rfeatures, Tensor masks,
-                          Tensor rtop_grad, Tensor rbottom_grad_hs,
-                          Tensor rbottom_grad, Tensor rmask_grad,
-                          Tensor bottom_grad, Tensor mask_grad, int kernel_size,
-                          int group_size, int scale_factor);
-#endif  // CARAFE_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/chamfer_distance.cpp b/mmcv/ops/csrc/parrots/chamfer_distance.cpp
deleted file mode 100644
index dcff69893..000000000
--- a/mmcv/ops/csrc/parrots/chamfer_distance.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-// Modified from
-// https://github.com/chrdiller/pyTorchChamferDistance/blob/master/chamfer_distance/chamfer_distance.cpp
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void chamfer_distance_forward_impl(const Tensor xyz1, const Tensor xyz2,
-                                   const Tensor dist1, const Tensor dist2,
-                                   const Tensor idx1, const Tensor idx2) {
-  DISPATCH_DEVICE_IMPL(chamfer_distance_forward_impl, xyz1, xyz2, dist1, dist2,
-                       idx1, idx2);
-}
-
-void chamfer_distance_backward_impl(const Tensor xyz1, const Tensor xyz2,
-                                    Tensor idx1, Tensor idx2, Tensor graddist1,
-                                    Tensor graddist2, Tensor gradxyz1,
-                                    Tensor gradxyz2) {
-  DISPATCH_DEVICE_IMPL(chamfer_distance_backward_impl, xyz1, xyz2, idx1, idx2,
-                       graddist1, graddist2, gradxyz1, gradxyz2);
-}
-
-void chamfer_distance_forward(const Tensor xyz1, const Tensor xyz2,
-                              const Tensor dist1, const Tensor dist2,
-                              const Tensor idx1, const Tensor idx2) {
-  chamfer_distance_forward_impl(xyz1, xyz2, dist1, dist2, idx1, idx2);
-}
-
-void chamfer_distance_backward(const Tensor xyz1, const Tensor xyz2,
-                               Tensor idx1, Tensor idx2, Tensor graddist1,
-                               Tensor graddist2, Tensor gradxyz1,
-                               Tensor gradxyz2) {
-  chamfer_distance_backward_impl(xyz1, xyz2, idx1, idx2, graddist1, graddist2,
-                                 gradxyz1, gradxyz2);
-}
diff --git a/mmcv/ops/csrc/parrots/chamfer_distance_parrots.cpp b/mmcv/ops/csrc/parrots/chamfer_distance_parrots.cpp
deleted file mode 100644
index db8eff1d6..000000000
--- a/mmcv/ops/csrc/parrots/chamfer_distance_parrots.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "chamfer_distance_pytorch.h"
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void chamfer_distance_forward_cuda_parrots(CudaContext& ctx,
-                                           const SSElement& attr,
-                                           const OperatorBase::in_list_t& ins,
-                                           OperatorBase::out_list_t& outs) {
-  auto xyz1 = buildATensor(ctx, ins[0]);
-  auto xyz2 = buildATensor(ctx, ins[1]);
-  auto dist1 = buildATensor(ctx, outs[0]);
-  auto dist2 = buildATensor(ctx, outs[1]);
-  auto idx1 = buildATensor(ctx, outs[2]);
-  auto idx2 = buildATensor(ctx, outs[3]);
-  chamfer_distance_forward(xyz1, xyz2, dist1, dist2, idx1, idx2);
-}
-
-void chamfer_distance_backward_cuda_parrots(CudaContext& ctx,
-                                            const SSElement& attr,
-                                            const OperatorBase::in_list_t& ins,
-                                            OperatorBase::out_list_t& outs) {
-  auto xyz1 = buildATensor(ctx, ins[0]);
-  auto xyz2 = buildATensor(ctx, ins[1]);
-  auto idx1 = buildATensor(ctx, ins[2]);
-  auto idx2 = buildATensor(ctx, ins[3]);
-  auto graddist1 = buildATensor(ctx, ins[4]);
-  auto graddist2 = buildATensor(ctx, ins[5]);
-  auto gradxyz1 = buildATensor(ctx, outs[0]);
-  auto gradxyz2 = buildATensor(ctx, outs[1]);
-  chamfer_distance_backward(xyz1, xyz2, idx1, idx2, graddist1, graddist2,
-                            gradxyz1, gradxyz2);
-}
-
-PARROTS_EXTENSION_REGISTER(chamfer_distance_forward)
-    .input(2)
-    .output(4)
-    .apply(chamfer_distance_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(chamfer_distance_backward)
-    .input(6)
-    .output(2)
-    .apply(chamfer_distance_backward_cuda_parrots)
-    .done();
-
-#endif
diff --git a/mmcv/ops/csrc/parrots/chamfer_distance_pytorch.h b/mmcv/ops/csrc/parrots/chamfer_distance_pytorch.h
deleted file mode 100644
index 6405526b0..000000000
--- a/mmcv/ops/csrc/parrots/chamfer_distance_pytorch.h
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef ACTIVE_CHAMFER_DISTANCE_PYTORCH_H
-#define ACTIVE_CHAMFER_DISTANCE_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void chamfer_distance_forward(const Tensor xyz1, const Tensor xyz2,
-                              const Tensor dist1, const Tensor dist2,
-                              const Tensor idx1, const Tensor idx);
-
-void chamfer_distance_backward(const Tensor xyz1, const Tensor xyz2,
-                               Tensor idx1, Tensor idx2, Tensor graddist1,
-                               Tensor graddist2, Tensor gradxyz1,
-                               Tensor gradxyz2);
-
-#endif  // ACTIVE_CHAMFER_DISTANCE_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/contour_expand.cpp b/mmcv/ops/csrc/parrots/contour_expand.cpp
deleted file mode 100644
index 586c48ee4..000000000
--- a/mmcv/ops/csrc/parrots/contour_expand.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-// It is modified from https://github.com/whai362/PSENet
-#include <iostream>
-#include <queue>
-
-#include "pytorch_cpp_helper.hpp"
-
-using namespace std;
-
-class Point2d {
- public:
-  int x;
-  int y;
-
-  Point2d() : x(0), y(0) {}
-  Point2d(int _x, int _y) : x(_x), y(_y) {}
-};
-
-void kernel_dilate(const uint8_t *data, IntArrayRef data_shape,
-                   const int *label_map, int &label_num, int &min_area,
-                   vector<vector<int>> &text_line) {
-  std::vector<int> area(label_num + 1);
-  int kernel_num = data_shape[0];
-  int height = data_shape[1];
-  int width = data_shape[2];
-
-  for (int x = 0; x < height; ++x) {
-    for (int y = 0; y < width; ++y) {
-      int label = label_map[x * width + y];
-      if (label == 0) continue;
-      area[label] += 1;
-    }
-  }
-
-  queue<Point2d> queue, next_queue;
-  for (int x = 0; x < height; ++x) {
-    vector<int> row(width);
-    for (int y = 0; y < width; ++y) {
-      int label = label_map[x * width + y];
-      if (label == 0) continue;
-      if (area[label] < min_area) continue;
-
-      Point2d point(x, y);
-      queue.push(point);
-      row[y] = label;
-    }
-    text_line.emplace_back(row);
-  }
-
-  int dx[] = {-1, 1, 0, 0};
-  int dy[] = {0, 0, -1, 1};
-  vector<int> kernel_step(kernel_num);
-  std::for_each(kernel_step.begin(), kernel_step.end(),
-                [=](int &k) { return k * height * width; });
-
-  for (int kernel_id = kernel_num - 2; kernel_id >= 0; --kernel_id) {
-    while (!queue.empty()) {
-      Point2d point = queue.front();
-      queue.pop();
-      int x = point.x;
-      int y = point.y;
-      int label = text_line[x][y];
-
-      bool is_edge = true;
-      for (int d = 0; d < 4; ++d) {
-        int tmp_x = x + dx[d];
-        int tmp_y = y + dy[d];
-
-        if (tmp_x < 0 || tmp_x >= height) continue;
-        if (tmp_y < 0 || tmp_y >= width) continue;
-        int kernel_value = data[kernel_step[kernel_id] + tmp_x * width + tmp_y];
-        if (kernel_value == 0) continue;
-        if (text_line[tmp_x][tmp_y] > 0) continue;
-
-        Point2d point(tmp_x, tmp_y);
-        queue.push(point);
-        text_line[tmp_x][tmp_y] = label;
-        is_edge = false;
-      }
-
-      if (is_edge) {
-        next_queue.push(point);
-      }
-    }
-    swap(queue, next_queue);
-  }
-}
-
-std::vector<std::vector<int>> contour_expand(Tensor kernel_mask,
-                                             Tensor internal_kernel_label,
-                                             int min_kernel_area,
-                                             int kernel_num) {
-  kernel_mask = kernel_mask.contiguous();
-  internal_kernel_label = internal_kernel_label.contiguous();
-  assert(kernel_mask.dim() == 3);
-  assert(internal_kernel_label.dim() == 2);
-  assert(kernel_mask.size(1) == internal_kernel_label.size(0));
-  assert(kernel_mask.size(2) == internal_kernel_label.size(1));
-  CHECK_CPU_INPUT(kernel_mask);
-  CHECK_CPU_INPUT(internal_kernel_label);
-  auto ptr_data = kernel_mask.data_ptr<uint8_t>();
-  IntArrayRef data_shape = kernel_mask.sizes();
-
-  auto data_label_map = internal_kernel_label.data_ptr<int32_t>();
-  vector<vector<int>> text_line;
-
-  kernel_dilate(ptr_data, data_shape, data_label_map, kernel_num,
-                min_kernel_area, text_line);
-
-  return text_line;
-}
diff --git a/mmcv/ops/csrc/parrots/contour_expand_parrots.cpp b/mmcv/ops/csrc/parrots/contour_expand_parrots.cpp
deleted file mode 100644
index 1581fdc83..000000000
--- a/mmcv/ops/csrc/parrots/contour_expand_parrots.cpp
+++ /dev/null
@@ -1,43 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "contour_expand_pytorch.h"
-
-using namespace parrots;
-using namespace std;
-
-template <typename T>
-void contour_expand_parrots(T& ctx, const SSElement& attr,
-                            const OperatorBase::in_list_t& ins,
-                            OperatorBase::out_list_t& outs) {
-  int min_kernel_area, kernel_num;
-  SSAttrs(attr)
-      .get<int>("min_kernel_area", min_kernel_area)
-      .get<int>("kernel_num", kernel_num)
-      .done();
-  at::Tensor kernel_mask;
-  at::Tensor internal_kernel_label;
-  kernel_mask = buildATensor(ctx, ins[0]);
-  internal_kernel_label = buildATensor(ctx, ins[1]);
-  auto out = contour_expand(kernel_mask, internal_kernel_label, min_kernel_area,
-                            kernel_num);
-  int n = out.size(), m = 0;
-  for (int i = 0; i < n; ++i)
-    if (m < out[i].size()) m = out[i].size();
-  auto options = torch::TensorOptions().dtype(at::kInt);
-  auto tensor = torch::zeros({n, m}, options);
-  for (int i = 0; i < n; i++)
-    tensor.slice(0, i, i + 1) =
-        torch::from_blob(out[i].data(), {out[i].size()}, options);
-  updateDArray(ctx, tensor, outs[0]);
-}
-
-PARROTS_EXTENSION_REGISTER(contour_expand)
-    .attr("min_kernel_area")
-    .attr("kernel_num")
-    .input(2)
-    .output(1)
-    .apply(contour_expand_parrots<HostContext>)
-    .done();
diff --git a/mmcv/ops/csrc/parrots/contour_expand_pytorch.h b/mmcv/ops/csrc/parrots/contour_expand_pytorch.h
deleted file mode 100644
index 881bbac3c..000000000
--- a/mmcv/ops/csrc/parrots/contour_expand_pytorch.h
+++ /dev/null
@@ -1,12 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef CONTOUR_EXPAND_PYTORCH_H
-#define CONTOUR_EXPAND_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-std::vector<std::vector<int>> contour_expand(Tensor kernel_mask,
-                                             Tensor internal_kernel_label,
-                                             int min_kernel_area,
-                                             int kernel_num);
-
-#endif  // CONTOUR_EXPAND_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/convex_iou.cpp b/mmcv/ops/csrc/parrots/convex_iou.cpp
deleted file mode 100644
index 79f2028b5..000000000
--- a/mmcv/ops/csrc/parrots/convex_iou.cpp
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-// modified from
-// https://github.com/SDL-GuoZonghao/BeyondBoundingBox/tree/main/mmdet/ops/iou/src
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void convex_iou_impl(const Tensor pointsets, const Tensor polygons,
-                     Tensor ious) {
-  DISPATCH_DEVICE_IMPL(convex_iou_impl, pointsets, polygons, ious);
-}
-
-void convex_iou(const Tensor pointsets, const Tensor polygons, Tensor ious) {
-  convex_iou_impl(pointsets, polygons, ious);
-}
-
-void convex_giou_impl(const Tensor pointsets, const Tensor polygons,
-                      Tensor output) {
-  DISPATCH_DEVICE_IMPL(convex_giou_impl, pointsets, polygons, output);
-}
-
-void convex_giou(const Tensor pointsets, const Tensor polygons, Tensor output) {
-  convex_giou_impl(pointsets, polygons, output);
-}
diff --git a/mmcv/ops/csrc/parrots/convex_iou_parrots.cpp b/mmcv/ops/csrc/parrots/convex_iou_parrots.cpp
deleted file mode 100644
index bf766542f..000000000
--- a/mmcv/ops/csrc/parrots/convex_iou_parrots.cpp
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "convex_iou_pytorch.h"
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void convex_iou_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                     const OperatorBase::in_list_t& ins,
-                                     OperatorBase::out_list_t& outs) {
-  auto pointsets = buildATensor(ctx, ins[0]);
-  auto polygons = buildATensor(ctx, ins[1]);
-  auto ious = buildATensor(ctx, outs[0]);
-  convex_iou(pointsets, polygons, ious);
-}
-
-void convex_giou_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                      const OperatorBase::in_list_t& ins,
-                                      OperatorBase::out_list_t& outs) {
-  auto pointsets = buildATensor(ctx, ins[0]);
-  auto polygons = buildATensor(ctx, ins[1]);
-  auto output = buildATensor(ctx, outs[0]);
-  convex_giou(pointsets, polygons, output);
-}
-
-PARROTS_EXTENSION_REGISTER(convex_iou)
-    .input(2)
-    .output(1)
-    .apply(convex_iou_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(convex_giou)
-    .input(2)
-    .output(1)
-    .apply(convex_giou_forward_cuda_parrots)
-    .done();
-
-#endif
diff --git a/mmcv/ops/csrc/parrots/convex_iou_pytorch.h b/mmcv/ops/csrc/parrots/convex_iou_pytorch.h
deleted file mode 100644
index 4f16a1ce4..000000000
--- a/mmcv/ops/csrc/parrots/convex_iou_pytorch.h
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef CONVEX_IOU_PYTORCH_H
-#define CONVEX_IOU_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void convex_iou(const Tensor pointsets, const Tensor polygons, Tensor ious);
-
-void convex_giou(const Tensor pointsets, const Tensor polygons, Tensor output);
-
-#endif  // RIROI_ALIGN_ROTATED_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/correlation.cpp b/mmcv/ops/csrc/parrots/correlation.cpp
deleted file mode 100644
index f4adba2a0..000000000
--- a/mmcv/ops/csrc/parrots/correlation.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-#include <iostream>
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void correlation_forward_impl(Tensor input1, Tensor input2, Tensor output,
-                              int kH, int kW, int patchH, int patchW, int padH,
-                              int padW, int dilationH, int dilationW,
-                              int dilation_patchH, int dilation_patchW, int dH,
-                              int dW) {
-  DISPATCH_DEVICE_IMPL(correlation_forward_impl, input1, input2, output, kH, kW,
-                       patchH, patchW, padH, padW, dilationH, dilationW,
-                       dilation_patchH, dilation_patchW, dH, dW);
-}
-
-void correlation_backward_impl(Tensor grad_output, Tensor input1, Tensor input2,
-                               Tensor grad_input1, Tensor grad_input2, int kH,
-                               int kW, int patchH, int patchW, int padH,
-                               int padW, int dilationH, int dilationW,
-                               int dilation_patchH, int dilation_patchW, int dH,
-                               int dW) {
-  DISPATCH_DEVICE_IMPL(correlation_backward_impl, grad_output, input1, input2,
-                       grad_input1, grad_input2, kH, kW, patchH, patchW, padH,
-                       padW, dilationH, dilationW, dilation_patchH,
-                       dilation_patchW, dH, dW);
-}
-
-void correlation_forward(Tensor input1, Tensor input2, Tensor output, int kH,
-                         int kW, int patchH, int patchW, int padH, int padW,
-                         int dilationH, int dilationW, int dilation_patchH,
-                         int dilation_patchW, int dH, int dW) {
-  correlation_forward_impl(input1, input2, output, kH, kW, patchH, patchW, padH,
-                           padW, dilationH, dilationW, dilation_patchH,
-                           dilation_patchW, dH, dW);
-}
-
-void correlation_backward(Tensor grad_output, Tensor input1, Tensor input2,
-                          Tensor grad_input1, Tensor grad_input2, int kH,
-                          int kW, int patchH, int patchW, int padH, int padW,
-                          int dilationH, int dilationW, int dilation_patchH,
-                          int dilation_patchW, int dH, int dW) {
-  correlation_backward_impl(grad_output, input1, input2, grad_input1,
-                            grad_input2, kH, kW, patchH, patchW, padH, padW,
-                            dilationH, dilationW, dilation_patchH,
-                            dilation_patchW, dH, dW);
-}
diff --git a/mmcv/ops/csrc/parrots/correlation_parrots.cpp b/mmcv/ops/csrc/parrots/correlation_parrots.cpp
deleted file mode 100644
index b1e287d06..000000000
--- a/mmcv/ops/csrc/parrots/correlation_parrots.cpp
+++ /dev/null
@@ -1,176 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "correlation_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void correlation_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                      const OperatorBase::in_list_t& ins,
-                                      OperatorBase::out_list_t& outs) {
-  int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH,
-      dilation_patchW, dH, dW;
-  SSAttrs(attr)
-      .get<int>("kH", kH)
-      .get<int>("kW", kW)
-      .get<int>("patchH", patchH)
-      .get<int>("patchW", patchW)
-      .get<int>("padH", padH)
-      .get<int>("padW", padW)
-      .get<int>("dilationH", dilationH)
-      .get<int>("dilationW", dilationW)
-      .get<int>("dilation_patchH", dilation_patchH)
-      .get<int>("dilation_patchW", dilation_patchW)
-      .get<int>("dH", dH)
-      .get<int>("dW", dW)
-      .done();
-
-  auto input1 = buildATensor(ctx, ins[0]);
-  auto input2 = buildATensor(ctx, ins[1]);
-
-  auto output = buildATensor(ctx, outs[0]);
-
-  correlation_forward(input1, input2, output, kH, kW, patchH, patchW, padH,
-                      padW, dilationH, dilationW, dilation_patchH,
-                      dilation_patchW, dH, dW);
-}
-
-void correlation_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                       const OperatorBase::in_list_t& ins,
-                                       OperatorBase::out_list_t& outs) {
-  int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH,
-      dilation_patchW, dH, dW;
-  SSAttrs(attr)
-      .get<int>("kH", kH)
-      .get<int>("kW", kW)
-      .get<int>("patchH", patchH)
-      .get<int>("patchW", patchW)
-      .get<int>("padH", padH)
-      .get<int>("padW", padW)
-      .get<int>("dilationH", dilationH)
-      .get<int>("dilationW", dilationW)
-      .get<int>("dilation_patchH", dilation_patchH)
-      .get<int>("dilation_patchW", dilation_patchW)
-      .get<int>("dH", dH)
-      .get<int>("dW", dW)
-      .done();
-
-  auto grad_output = buildATensor(ctx, ins[0]);
-  auto input1 = buildATensor(ctx, ins[1]);
-  auto input2 = buildATensor(ctx, ins[2]);
-
-  auto grad_input1 = buildATensor(ctx, outs[0]);
-  auto grad_input2 = buildATensor(ctx, outs[1]);
-
-  correlation_backward(grad_output, input1, input2, grad_input1, grad_input2,
-                       kH, kW, patchH, patchW, padH, padW, dilationH, dilationW,
-                       dilation_patchH, dilation_patchW, dH, dW);
-}
-#endif
-
-void correlation_forward_cpu_parrots(HostContext& ctx, const SSElement& attr,
-                                     const OperatorBase::in_list_t& ins,
-                                     OperatorBase::out_list_t& outs) {
-  int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH,
-      dilation_patchW, dH, dW;
-  SSAttrs(attr)
-      .get<int>("kH", kH)
-      .get<int>("kW", kW)
-      .get<int>("patchH", patchH)
-      .get<int>("patchW", patchW)
-      .get<int>("padH", padH)
-      .get<int>("padW", padW)
-      .get<int>("dilationH", dilationH)
-      .get<int>("dilationW", dilationW)
-      .get<int>("dilation_patchH", dilation_patchH)
-      .get<int>("dilation_patchW", dilation_patchW)
-      .get<int>("dH", dH)
-      .get<int>("dW", dW)
-      .done();
-
-  auto input1 = buildATensor(ctx, ins[0]);
-  auto input2 = buildATensor(ctx, ins[1]);
-
-  auto output = buildATensor(ctx, outs[0]);
-
-  correlation_forward(input1, input2, output, kH, kW, patchH, patchW, padH,
-                      padW, dilationH, dilationW, dilation_patchH,
-                      dilation_patchW, dH, dW);
-}
-
-void correlation_backward_cpu_parrots(HostContext& ctx, const SSElement& attr,
-                                      const OperatorBase::in_list_t& ins,
-                                      OperatorBase::out_list_t& outs) {
-  int kH, kW, patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH,
-      dilation_patchW, dH, dW;
-  SSAttrs(attr)
-      .get<int>("kH", kH)
-      .get<int>("kW", kW)
-      .get<int>("patchH", patchH)
-      .get<int>("patchW", patchW)
-      .get<int>("padH", padH)
-      .get<int>("padW", padW)
-      .get<int>("dilationH", dilationH)
-      .get<int>("dilationW", dilationW)
-      .get<int>("dilation_patchH", dilation_patchH)
-      .get<int>("dilation_patchW", dilation_patchW)
-      .get<int>("dH", dH)
-      .get<int>("dW", dW)
-      .done();
-
-  auto grad_output = buildATensor(ctx, ins[0]);
-  auto input1 = buildATensor(ctx, ins[1]);
-  auto input2 = buildATensor(ctx, ins[2]);
-
-  auto grad_input1 = buildATensor(ctx, outs[0]);
-  auto grad_input2 = buildATensor(ctx, outs[1]);
-
-  correlation_backward(grad_output, input1, input2, grad_input1, grad_input2,
-                       kH, kW, patchH, patchW, padH, padW, dilationH, dilationW,
-                       dilation_patchH, dilation_patchW, dH, dW);
-}
-
-PARROTS_EXTENSION_REGISTER(correlation_forward)
-    .attr("kH")
-    .attr("kW")
-    .attr("patchH")
-    .attr("patchW")
-    .attr("padH")
-    .attr("padW")
-    .attr("dilationH")
-    .attr("dilationW")
-    .attr("dilation_patchH")
-    .attr("dilation_patchW")
-    .attr("dH")
-    .attr("dW")
-    .input(2)
-    .output(1)
-    .apply(correlation_forward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(correlation_forward_cuda_parrots)
-#endif
-    .done();
-
-PARROTS_EXTENSION_REGISTER(correlation_backward)
-    .attr("kH")
-    .attr("kW")
-    .attr("patchH")
-    .attr("patchW")
-    .attr("padH")
-    .attr("padW")
-    .attr("dilationH")
-    .attr("dilationW")
-    .attr("dilation_patchH")
-    .attr("dilation_patchW")
-    .attr("dH")
-    .attr("dW")
-    .input(3)
-    .output(2)
-    .apply(correlation_backward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(correlation_backward_cuda_parrots)
-#endif
-    .done();
diff --git a/mmcv/ops/csrc/parrots/correlation_pytorch.h b/mmcv/ops/csrc/parrots/correlation_pytorch.h
deleted file mode 100644
index 806fcaa71..000000000
--- a/mmcv/ops/csrc/parrots/correlation_pytorch.h
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef CORRELATION_PYTORCH_H
-#define CORRELATION_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void correlation_forward(Tensor input1, Tensor input2, Tensor output, int kH,
-                         int kW, int patchH, int patchW, int padH, int padW,
-                         int dilationH, int dilationW, int dilation_patchH,
-                         int dilation_patchW, int dH, int dW);
-
-void correlation_backward(Tensor grad_output, Tensor input1, Tensor input2,
-                          Tensor grad_input1, Tensor grad_input2, int kH,
-                          int kW, int patchH, int patchW, int padH, int padW,
-                          int dilationH, int dilationW, int dilation_patchH,
-                          int dilation_patchW, int dH, int dW);
-
-#endif  // CORRELATION_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/cudabind.cpp b/mmcv/ops/csrc/parrots/cudabind.cpp
deleted file mode 100644
index 4521ddf4a..000000000
--- a/mmcv/ops/csrc/parrots/cudabind.cpp
+++ /dev/null
@@ -1,1626 +0,0 @@
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void AssignScoreWithKForwardCUDAKernelLauncher(
-    int B, int N0, int N1, int M, int K, int O, int aggregate,
-    const Tensor& points, const Tensor& centers, const Tensor& scores,
-    const Tensor& knn_idx, Tensor& output);
-
-void AssignScoreWithKBackwardCUDAKernelLauncher(
-    int B, int N0, int N1, int M, int K, int O, int aggregate,
-    const Tensor& grad_out, const Tensor& points, const Tensor& centers,
-    const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
-    Tensor& grad_centers, Tensor& grad_scores);
-
-void assign_score_withk_forward_cuda(int B, int N0, int N1, int M, int K, int O,
-                                     int aggregate, const Tensor& points,
-                                     const Tensor& centers,
-                                     const Tensor& scores,
-                                     const Tensor& knn_idx, Tensor& output) {
-  AssignScoreWithKForwardCUDAKernelLauncher(
-      B, N0, N1, M, K, O, aggregate, points, centers, scores, knn_idx, output);
-};
-
-void assign_score_withk_backward_cuda(
-    int B, int N0, int N1, int M, int K, int O, int aggregate,
-    const Tensor& grad_out, const Tensor& points, const Tensor& centers,
-    const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
-    Tensor& grad_centers, Tensor& grad_scores) {
-  AssignScoreWithKBackwardCUDAKernelLauncher(
-      B, N0, N1, M, K, O, aggregate, grad_out, points, centers, scores, knn_idx,
-      grad_points, grad_centers, grad_scores);
-};
-
-void assign_score_withk_forward_impl(int B, int N0, int N1, int M, int K, int O,
-                                     int aggregate, const Tensor& points,
-                                     const Tensor& centers,
-                                     const Tensor& scores,
-                                     const Tensor& knn_idx, Tensor& output);
-
-void assign_score_withk_backward_impl(
-    int B, int N0, int N1, int M, int K, int O, int aggregate,
-    const Tensor& grad_out, const Tensor& points, const Tensor& centers,
-    const Tensor& scores, const Tensor& knn_idx, Tensor& grad_points,
-    Tensor& grad_centers, Tensor& grad_scores);
-
-REGISTER_DEVICE_IMPL(assign_score_withk_forward_impl, CUDA,
-                     assign_score_withk_forward_cuda);
-REGISTER_DEVICE_IMPL(assign_score_withk_backward_impl, CUDA,
-                     assign_score_withk_backward_cuda);
-
-void BallQueryForwardCUDAKernelLauncher(int b, int n, int m, float min_radius,
-                                        float max_radius, int nsample,
-                                        const Tensor new_xyz, const Tensor xyz,
-                                        Tensor idx);
-
-void ball_query_forward_cuda(int b, int n, int m, float min_radius,
-                             float max_radius, int nsample,
-                             const Tensor new_xyz, const Tensor xyz,
-                             Tensor idx) {
-  BallQueryForwardCUDAKernelLauncher(b, n, m, min_radius, max_radius, nsample,
-                                     new_xyz, xyz, idx);
-};
-
-void ball_query_forward_impl(int b, int n, int m, float min_radius,
-                             float max_radius, int nsample,
-                             const Tensor new_xyz, const Tensor xyz,
-                             Tensor idx);
-REGISTER_DEVICE_IMPL(ball_query_forward_impl, CUDA, ball_query_forward_cuda);
-
-void BBoxOverlapsCUDAKernelLauncher(const Tensor bboxes1, const Tensor bboxes2,
-                                    Tensor ious, const int mode,
-                                    const bool aligned, const int offset);
-
-void bbox_overlaps_cuda(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
-                        const int mode, const bool aligned, const int offset) {
-  BBoxOverlapsCUDAKernelLauncher(bboxes1, bboxes2, ious, mode, aligned, offset);
-}
-
-void bbox_overlaps_impl(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
-                        const int mode, const bool aligned, const int offset);
-REGISTER_DEVICE_IMPL(bbox_overlaps_impl, CUDA, bbox_overlaps_cuda);
-
-void BorderAlignForwardCUDAKernelLauncher(const Tensor& input,
-                                          const Tensor& boxes, Tensor output,
-                                          Tensor argmax_idx,
-                                          const int pool_size);
-
-void BorderAlignBackwardCUDAKernelLauncher(const Tensor& grad_output,
-                                           const Tensor& boxes,
-                                           const Tensor& argmax_idx,
-                                           Tensor grad_input,
-                                           const int pool_size);
-
-void border_align_forward_cuda(const Tensor& input, const Tensor& boxes,
-                               Tensor output, Tensor argmax_idx,
-                               const int pool_size) {
-  BorderAlignForwardCUDAKernelLauncher(input, boxes, output, argmax_idx,
-                                       pool_size);
-}
-
-void border_align_backward_cuda(const Tensor& grad_output, const Tensor& boxes,
-                                const Tensor& argmax_idx, Tensor grad_input,
-                                const int pool_size) {
-  BorderAlignBackwardCUDAKernelLauncher(grad_output, boxes, argmax_idx,
-                                        grad_input, pool_size);
-}
-
-void border_align_forward_impl(const Tensor& input, const Tensor& boxes,
-                               Tensor output, Tensor argmax_idx,
-                               const int pool_size);
-
-void border_align_backward_impl(const Tensor& grad_output, const Tensor& boxes,
-                                const Tensor& argmax_idx, Tensor grad_input,
-                                const int pool_size);
-
-REGISTER_DEVICE_IMPL(border_align_forward_impl, CUDA,
-                     border_align_forward_cuda);
-REGISTER_DEVICE_IMPL(border_align_backward_impl, CUDA,
-                     border_align_backward_cuda);
-
-void box_iou_rotated_cuda(const Tensor boxes1, const Tensor boxes2, Tensor ious,
-                          const int mode_flag, const bool aligned);
-
-void box_iou_rotated_impl(const Tensor boxes1, const Tensor boxes2, Tensor ious,
-                          const int mode_flag, const bool aligned);
-REGISTER_DEVICE_IMPL(box_iou_rotated_impl, CUDA, box_iou_rotated_cuda);
-
-void CARAFEForwardCUDAKernelLauncher(const Tensor features, const Tensor masks,
-                                     Tensor rfeatures, Tensor routput,
-                                     Tensor rmasks, Tensor output,
-                                     const int kernel_size,
-                                     const int group_size,
-                                     const int scale_factor);
-
-void CARAFEBackwardCUDAKernelLauncher(
-    const Tensor top_grad, const Tensor rfeatures, const Tensor masks,
-    Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad,
-    Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad,
-    const int kernel_size, const int group_size, const int scale_factor);
-
-void carafe_forward_cuda(Tensor features, Tensor masks, Tensor rfeatures,
-                         Tensor routput, Tensor rmasks, Tensor output,
-                         int kernel_size, int group_size, int scale_factor) {
-  CARAFEForwardCUDAKernelLauncher(features, masks, rfeatures, routput, rmasks,
-                                  output, kernel_size, group_size,
-                                  scale_factor);
-}
-
-void carafe_backward_cuda(Tensor top_grad, Tensor rfeatures, Tensor masks,
-                          Tensor rtop_grad, Tensor rbottom_grad_hs,
-                          Tensor rbottom_grad, Tensor rmask_grad,
-                          Tensor bottom_grad, Tensor mask_grad, int kernel_size,
-                          int group_size, int scale_factor) {
-  CARAFEBackwardCUDAKernelLauncher(top_grad, rfeatures, masks, rtop_grad,
-                                   rbottom_grad_hs, rbottom_grad, rmask_grad,
-                                   bottom_grad, mask_grad, kernel_size,
-                                   group_size, scale_factor);
-}
-
-void carafe_forward_impl(Tensor features, Tensor masks, Tensor rfeatures,
-                         Tensor routput, Tensor rmasks, Tensor output,
-                         int kernel_size, int group_size, int scale_factor);
-
-void carafe_backward_impl(Tensor top_grad, Tensor rfeatures, Tensor masks,
-                          Tensor rtop_grad, Tensor rbottom_grad_hs,
-                          Tensor rbottom_grad, Tensor rmask_grad,
-                          Tensor bottom_grad, Tensor mask_grad, int kernel_size,
-                          int group_size, int scale_factor);
-
-REGISTER_DEVICE_IMPL(carafe_forward_impl, CUDA, carafe_forward_cuda);
-REGISTER_DEVICE_IMPL(carafe_backward_impl, CUDA, carafe_backward_cuda);
-
-void CARAFENAIVEForwardCUDAKernelLauncher(const Tensor features,
-                                          const Tensor masks, Tensor output,
-                                          const int kernel_size,
-                                          const int group_size,
-                                          const int scale_factor);
-
-void CARAFENAIVEBackwardCUDAKernelLauncher(
-    const Tensor top_grad, const Tensor features, const Tensor masks,
-    Tensor bottom_grad, Tensor mask_grad, const int kernel_size,
-    const int group_size, const int scale_factor);
-
-void carafe_naive_forward_cuda(Tensor features, Tensor masks, Tensor output,
-                               int kernel_size, int group_size,
-                               int scale_factor) {
-  CARAFENAIVEForwardCUDAKernelLauncher(features, masks, output, kernel_size,
-                                       group_size, scale_factor);
-}
-
-void carafe_naive_backward_cuda(Tensor top_grad, Tensor features, Tensor masks,
-                                Tensor bottom_grad, Tensor mask_grad,
-                                int kernel_size, int group_size,
-                                int scale_factor) {
-  CARAFENAIVEBackwardCUDAKernelLauncher(top_grad, features, masks, bottom_grad,
-                                        mask_grad, kernel_size, group_size,
-                                        scale_factor);
-}
-void carafe_naive_forward_impl(Tensor features, Tensor masks, Tensor output,
-                               int kernel_size, int group_size,
-                               int scale_factor);
-
-void carafe_naive_backward_impl(Tensor top_grad, Tensor features, Tensor masks,
-                                Tensor bottom_grad, Tensor mask_grad,
-                                int kernel_size, int group_size,
-                                int scale_factor);
-
-REGISTER_DEVICE_IMPL(carafe_naive_forward_impl, CUDA,
-                     carafe_naive_forward_cuda);
-REGISTER_DEVICE_IMPL(carafe_naive_backward_impl, CUDA,
-                     carafe_naive_backward_cuda);
-
-void CorrelationForwardCUDAKernelLauncher(Tensor input1, Tensor input2,
-                                          Tensor output, int kH, int kW,
-                                          int patchH, int patchW, int padH,
-                                          int padW, int dilationH,
-                                          int dilationW, int dilation_patchH,
-                                          int dilation_patchW, int dH, int dW);
-
-void CorrelationBackwardCUDAKernelLauncher(Tensor grad_output, Tensor input1,
-                                           Tensor input2, Tensor grad_input1,
-                                           Tensor grad_input2, int kH, int kW,
-                                           int patchH, int patchW, int padH,
-                                           int padW, int dilationH,
-                                           int dilationW, int dilation_patchH,
-                                           int dilation_patchW, int dH, int dW);
-
-void correlation_forward_cuda(Tensor input1, Tensor input2, Tensor output,
-                              int kH, int kW, int patchH, int patchW, int padH,
-                              int padW, int dilationH, int dilationW,
-                              int dilation_patchH, int dilation_patchW, int dH,
-                              int dW) {
-  CorrelationForwardCUDAKernelLauncher(
-      input1, input2, output, kH, kW, patchH, patchW, padH, padW, dilationH,
-      dilationW, dilation_patchH, dilation_patchW, dH, dW);
-}
-
-void correlation_backward_cuda(Tensor grad_output, Tensor input1, Tensor input2,
-                               Tensor grad_input1, Tensor grad_input2, int kH,
-                               int kW, int patchH, int patchW, int padH,
-                               int padW, int dilationH, int dilationW,
-                               int dilation_patchH, int dilation_patchW, int dH,
-                               int dW) {
-  CorrelationBackwardCUDAKernelLauncher(
-      grad_output, input1, input2, grad_input1, grad_input2, kH, kW, patchH,
-      patchW, padH, padW, dilationH, dilationW, dilation_patchH,
-      dilation_patchW, dH, dW);
-}
-
-void correlation_forward_impl(Tensor input1, Tensor input2, Tensor output,
-                              int kH, int kW, int patchH, int patchW, int padH,
-                              int padW, int dilationH, int dilationW,
-                              int dilation_patchH, int dilation_patchW, int dH,
-                              int dW);
-
-void correlation_backward_impl(Tensor grad_output, Tensor input1, Tensor input2,
-                               Tensor grad_input1, Tensor grad_input2, int kH,
-                               int kW, int patchH, int patchW, int padH,
-                               int padW, int dilationH, int dilationW,
-                               int dilation_patchH, int dilation_patchW, int dH,
-                               int dW);
-
-REGISTER_DEVICE_IMPL(correlation_forward_impl, CUDA, correlation_forward_cuda);
-REGISTER_DEVICE_IMPL(correlation_backward_impl, CUDA,
-                     correlation_backward_cuda);
-
-void deformable_im2col_cuda(Tensor data_im, Tensor data_offset,
-                            const int channels, const int height,
-                            const int width, const int ksize_h,
-                            const int ksize_w, const int pad_h, const int pad_w,
-                            const int stride_h, const int stride_w,
-                            const int dilation_h, const int dilation_w,
-                            const int parallel_imgs, const int deformable_group,
-                            Tensor data_col);
-
-void deformable_col2im_cuda(Tensor data_col, Tensor data_offset,
-                            const int channels, const int height,
-                            const int width, const int ksize_h,
-                            const int ksize_w, const int pad_h, const int pad_w,
-                            const int stride_h, const int stride_w,
-                            const int dilation_h, const int dilation_w,
-                            const int parallel_imgs, const int deformable_group,
-                            Tensor grad_im);
-
-void deformable_col2im_coord_cuda(
-    Tensor data_col, Tensor data_im, Tensor data_offset, const int channels,
-    const int height, const int width, const int ksize_h, const int ksize_w,
-    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
-    const int dilation_h, const int dilation_w, const int parallel_imgs,
-    const int deformable_group, Tensor grad_offset);
-
-void deformable_im2col_impl(Tensor data_im, Tensor data_offset,
-                            const int channels, const int height,
-                            const int width, const int ksize_h,
-                            const int ksize_w, const int pad_h, const int pad_w,
-                            const int stride_h, const int stride_w,
-                            const int dilation_h, const int dilation_w,
-                            const int parallel_imgs, const int deformable_group,
-                            Tensor data_col);
-
-void deformable_col2im_impl(Tensor data_col, Tensor data_offset,
-                            const int channels, const int height,
-                            const int width, const int ksize_h,
-                            const int ksize_w, const int pad_h, const int pad_w,
-                            const int stride_h, const int stride_w,
-                            const int dilation_h, const int dilation_w,
-                            const int parallel_imgs, const int deformable_group,
-                            Tensor grad_im);
-
-void deformable_col2im_coord_impl(
-    Tensor data_col, Tensor data_im, Tensor data_offset, const int channels,
-    const int height, const int width, const int ksize_h, const int ksize_w,
-    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
-    const int dilation_h, const int dilation_w, const int parallel_imgs,
-    const int deformable_group, Tensor grad_offset);
-
-REGISTER_DEVICE_IMPL(deformable_im2col_impl, CUDA, deformable_im2col_cuda);
-REGISTER_DEVICE_IMPL(deformable_col2im_impl, CUDA, deformable_col2im_cuda);
-REGISTER_DEVICE_IMPL(deformable_col2im_coord_impl, CUDA,
-                     deformable_col2im_coord_cuda);
-
-void DeformRoIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois,
-                                            Tensor offset, Tensor output,
-                                            int pooled_height, int pooled_width,
-                                            float spatial_scale,
-                                            int sampling_ratio, float gamma);
-
-void DeformRoIPoolBackwardCUDAKernelLauncher(
-    Tensor grad_output, Tensor input, Tensor rois, Tensor offset,
-    Tensor grad_input, Tensor grad_offset, int pooled_height, int pooled_width,
-    float spatial_scale, int sampling_ratio, float gamma);
-
-void deform_roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor offset,
-                                  Tensor output, int pooled_height,
-                                  int pooled_width, float spatial_scale,
-                                  int sampling_ratio, float gamma) {
-  DeformRoIPoolForwardCUDAKernelLauncher(input, rois, offset, output,
-                                         pooled_height, pooled_width,
-                                         spatial_scale, sampling_ratio, gamma);
-}
-
-void deform_roi_pool_backward_cuda(Tensor grad_output, Tensor input,
-                                   Tensor rois, Tensor offset,
-                                   Tensor grad_input, Tensor grad_offset,
-                                   int pooled_height, int pooled_width,
-                                   float spatial_scale, int sampling_ratio,
-                                   float gamma) {
-  DeformRoIPoolBackwardCUDAKernelLauncher(
-      grad_output, input, rois, offset, grad_input, grad_offset, pooled_height,
-      pooled_width, spatial_scale, sampling_ratio, gamma);
-}
-
-void deform_roi_pool_forward_impl(Tensor input, Tensor rois, Tensor offset,
-                                  Tensor output, int pooled_height,
-                                  int pooled_width, float spatial_scale,
-                                  int sampling_ratio, float gamma);
-
-void deform_roi_pool_backward_impl(Tensor grad_output, Tensor input,
-                                   Tensor rois, Tensor offset,
-                                   Tensor grad_input, Tensor grad_offset,
-                                   int pooled_height, int pooled_width,
-                                   float spatial_scale, int sampling_ratio,
-                                   float gamma);
-
-REGISTER_DEVICE_IMPL(deform_roi_pool_forward_impl, CUDA,
-                     deform_roi_pool_forward_cuda);
-REGISTER_DEVICE_IMPL(deform_roi_pool_backward_impl, CUDA,
-                     deform_roi_pool_backward_cuda);
-
-void SigmoidFocalLossForwardCUDAKernelLauncher(Tensor input, Tensor target,
-                                               Tensor weight, Tensor output,
-                                               const float gamma,
-                                               const float alpha);
-
-void SigmoidFocalLossBackwardCUDAKernelLauncher(Tensor input, Tensor target,
-                                                Tensor weight,
-                                                Tensor grad_input,
-                                                const float gamma,
-                                                const float alpha);
-
-void SoftmaxFocalLossForwardCUDAKernelLauncher(Tensor softmax, Tensor target,
-                                               Tensor weight, Tensor output,
-                                               const float gamma,
-                                               const float alpha);
-
-void SoftmaxFocalLossBackwardCUDAKernelLauncher(Tensor softmax, Tensor target,
-                                                Tensor weight, Tensor buff,
-                                                Tensor grad_input,
-                                                const float gamma,
-                                                const float alpha);
-
-void sigmoid_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight,
-                                     Tensor output, float gamma, float alpha) {
-  SigmoidFocalLossForwardCUDAKernelLauncher(input, target, weight, output,
-                                            gamma, alpha);
-}
-
-void sigmoid_focal_loss_backward_cuda(Tensor input, Tensor target,
-                                      Tensor weight, Tensor grad_input,
-                                      float gamma, float alpha) {
-  SigmoidFocalLossBackwardCUDAKernelLauncher(input, target, weight, grad_input,
-                                             gamma, alpha);
-}
-
-void softmax_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight,
-                                     Tensor output, float gamma, float alpha) {
-  SoftmaxFocalLossForwardCUDAKernelLauncher(input, target, weight, output,
-                                            gamma, alpha);
-}
-
-void softmax_focal_loss_backward_cuda(Tensor input, Tensor target,
-                                      Tensor weight, Tensor buff,
-                                      Tensor grad_input, float gamma,
-                                      float alpha) {
-  SoftmaxFocalLossBackwardCUDAKernelLauncher(input, target, weight, buff,
-                                             grad_input, gamma, alpha);
-}
-
-void sigmoid_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight,
-                                     Tensor output, float gamma, float alpha);
-
-void sigmoid_focal_loss_backward_impl(Tensor input, Tensor target,
-                                      Tensor weight, Tensor grad_input,
-                                      float gamma, float alpha);
-
-void softmax_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight,
-                                     Tensor output, float gamma, float alpha);
-
-void softmax_focal_loss_backward_impl(Tensor input, Tensor target,
-                                      Tensor weight, Tensor buff,
-                                      Tensor grad_input, float gamma,
-                                      float alpha);
-
-REGISTER_DEVICE_IMPL(sigmoid_focal_loss_forward_impl, CUDA,
-                     sigmoid_focal_loss_forward_cuda);
-REGISTER_DEVICE_IMPL(sigmoid_focal_loss_backward_impl, CUDA,
-                     sigmoid_focal_loss_backward_cuda);
-REGISTER_DEVICE_IMPL(softmax_focal_loss_forward_impl, CUDA,
-                     softmax_focal_loss_forward_cuda);
-REGISTER_DEVICE_IMPL(softmax_focal_loss_backward_impl, CUDA,
-                     softmax_focal_loss_backward_cuda);
-
-void FurthestPointSamplingForwardCUDAKernelLauncher(int b, int n, int m,
-                                                    const float* dataset,
-                                                    float* temp, int* idxs);
-
-void FurthestPointSamplingWithDistForwardCUDAKernelLauncher(
-    int b, int n, int m, const float* dataset, float* temp, int* idxs);
-
-void furthest_point_sampling_forward_cuda(Tensor points_tensor,
-                                          Tensor temp_tensor, Tensor idx_tensor,
-                                          int b, int n, int m) {
-  const float* dataset = points_tensor.data_ptr<float>();
-  float* temp = temp_tensor.data_ptr<float>();
-  int* idxs = idx_tensor.data_ptr<int>();
-  FurthestPointSamplingForwardCUDAKernelLauncher(b, n, m, dataset, temp, idxs);
-}
-
-void furthest_point_sampling_with_dist_forward_cuda(Tensor points_tensor,
-                                                    Tensor temp_tensor,
-                                                    Tensor idx_tensor, int b,
-                                                    int n, int m) {
-  const float* dataset = points_tensor.data_ptr<float>();
-  float* temp = temp_tensor.data_ptr<float>();
-  int* idxs = idx_tensor.data_ptr<int>();
-  FurthestPointSamplingWithDistForwardCUDAKernelLauncher(b, n, m, dataset, temp,
-                                                         idxs);
-}
-
-void furthest_point_sampling_forward_impl(Tensor points_tensor,
-                                          Tensor temp_tensor, Tensor idx_tensor,
-                                          int b, int n, int m);
-
-void furthest_point_sampling_with_dist_forward_impl(Tensor points_tensor,
-                                                    Tensor temp_tensor,
-                                                    Tensor idx_tensor, int b,
-                                                    int n, int m);
-
-REGISTER_DEVICE_IMPL(furthest_point_sampling_forward_impl, CUDA,
-                     furthest_point_sampling_forward_cuda);
-REGISTER_DEVICE_IMPL(furthest_point_sampling_with_dist_forward_impl, CUDA,
-                     furthest_point_sampling_with_dist_forward_cuda);
-
-torch::Tensor fused_bias_leakyrelu_op(const torch::Tensor& input,
-                                      const torch::Tensor& bias,
-                                      const torch::Tensor& refer, int act,
-                                      int grad, float alpha, float scale);
-
-torch::Tensor fused_bias_leakyrelu_op_impl(const torch::Tensor& input,
-                                           const torch::Tensor& bias,
-                                           const torch::Tensor& refer, int act,
-                                           int grad, float alpha, float scale);
-REGISTER_DEVICE_IMPL(fused_bias_leakyrelu_op_impl, CUDA,
-                     fused_bias_leakyrelu_op);
-
-void GatherPointsForwardCUDAKernelLauncher(int b, int c, int n, int npoints,
-                                           const Tensor points,
-                                           const Tensor idx, Tensor out);
-
-void GatherPointsBackwardCUDAKernelLauncher(int b, int c, int n, int npoints,
-                                            const Tensor grad_out,
-                                            const Tensor idx,
-                                            Tensor grad_points);
-
-void gather_points_forward_cuda(int b, int c, int n, int npoints,
-                                const Tensor points, const Tensor idx,
-                                Tensor out) {
-  GatherPointsForwardCUDAKernelLauncher(b, c, n, npoints, points, idx, out);
-};
-
-void gather_points_backward_cuda(int b, int c, int n, int npoints,
-                                 const Tensor grad_out, const Tensor idx,
-                                 Tensor grad_points) {
-  GatherPointsBackwardCUDAKernelLauncher(b, c, n, npoints, grad_out, idx,
-                                         grad_points);
-};
-
-void gather_points_forward_impl(int b, int c, int n, int npoints,
-                                const Tensor points, const Tensor idx,
-                                Tensor out);
-
-void gather_points_backward_impl(int b, int c, int n, int npoints,
-                                 const Tensor grad_out, const Tensor idx,
-                                 Tensor grad_points);
-
-REGISTER_DEVICE_IMPL(gather_points_forward_impl, CUDA,
-                     gather_points_forward_cuda);
-REGISTER_DEVICE_IMPL(gather_points_backward_impl, CUDA,
-                     gather_points_backward_cuda);
-
-void GroupPointsForwardCUDAKernelLauncher(int b, int c, int n, int npoints,
-                                          int nsample, const Tensor points,
-                                          const Tensor idx, Tensor out);
-
-void GroupPointsBackwardCUDAKernelLauncher(int b, int c, int n, int npoints,
-                                           int nsample, const Tensor grad_out,
-                                           const Tensor idx,
-                                           Tensor grad_points);
-
-void group_points_forward_cuda(int b, int c, int n, int npoints, int nsample,
-                               const Tensor points, const Tensor idx,
-                               Tensor out) {
-  GroupPointsForwardCUDAKernelLauncher(b, c, n, npoints, nsample, points, idx,
-                                       out);
-};
-
-void group_points_backward_cuda(int b, int c, int n, int npoints, int nsample,
-                                const Tensor grad_out, const Tensor idx,
-                                Tensor grad_points) {
-  GroupPointsBackwardCUDAKernelLauncher(b, c, n, npoints, nsample, grad_out,
-                                        idx, grad_points);
-};
-
-void group_points_forward_impl(int b, int c, int n, int npoints, int nsample,
-                               const Tensor points, const Tensor idx,
-                               Tensor out);
-
-void group_points_backward_impl(int b, int c, int n, int npoints, int nsample,
-                                const Tensor grad_out, const Tensor idx,
-                                Tensor grad_points);
-
-REGISTER_DEVICE_IMPL(group_points_forward_impl, CUDA,
-                     group_points_forward_cuda);
-REGISTER_DEVICE_IMPL(group_points_backward_impl, CUDA,
-                     group_points_backward_cuda);
-
-void KNNForwardCUDAKernelLauncher(int b, int n, int m, int nsample,
-                                  const Tensor xyz, const Tensor new_xyz,
-                                  Tensor idx, Tensor dist2);
-
-void knn_forward_cuda(int b, int n, int m, int nsample, const Tensor xyz,
-                      const Tensor new_xyz, Tensor idx, Tensor dist2) {
-  KNNForwardCUDAKernelLauncher(b, n, m, nsample, xyz, new_xyz, idx, dist2);
-}
-
-void knn_forward_impl(int b, int n, int m, int nsample, const Tensor xyz,
-                      const Tensor new_xyz, Tensor idx, Tensor dist2);
-REGISTER_DEVICE_IMPL(knn_forward_impl, CUDA, knn_forward_cuda);
-
-void MaskedIm2colForwardCUDAKernelLauncher(const Tensor bottom_data,
-                                           const Tensor mask_h_idx,
-                                           const Tensor mask_w_idx,
-                                           Tensor top_data, const int kernel_h,
-                                           const int kernel_w, const int pad_h,
-                                           const int pad_w);
-
-void MaskedCol2imForwardCUDAKernelLauncher(const Tensor bottom_data,
-                                           const Tensor mask_h_idx,
-                                           const Tensor mask_w_idx,
-                                           Tensor top_data, const int height,
-                                           const int width, const int channels);
-
-void masked_im2col_forward_cuda(const Tensor im, const Tensor mask_h_idx,
-                                const Tensor mask_w_idx, Tensor col,
-                                const int kernel_h, const int kernel_w,
-                                const int pad_h, const int pad_w) {
-  // im: (n, ic, h, w), kernel size (kh, kw)
-  // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh)
-  MaskedIm2colForwardCUDAKernelLauncher(im, mask_h_idx, mask_w_idx, col,
-                                        kernel_h, kernel_w, pad_h, pad_w);
-}
-
-void masked_col2im_forward_cuda(const Tensor col, const Tensor mask_h_idx,
-                                const Tensor mask_w_idx, Tensor im, int height,
-                                int width, int channels) {
-  // im: (n, ic, h, w), kernel size (kh, kw)
-  // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh)
-  MaskedCol2imForwardCUDAKernelLauncher(col, mask_h_idx, mask_w_idx, im, height,
-                                        width, channels);
-}
-
-void masked_im2col_forward_impl(const Tensor im, const Tensor mask_h_idx,
-                                const Tensor mask_w_idx, Tensor col,
-                                const int kernel_h, const int kernel_w,
-                                const int pad_h, const int pad_w);
-
-void masked_col2im_forward_impl(const Tensor col, const Tensor mask_h_idx,
-                                const Tensor mask_w_idx, Tensor im, int height,
-                                int width, int channels);
-
-REGISTER_DEVICE_IMPL(masked_im2col_forward_impl, CUDA,
-                     masked_im2col_forward_cuda);
-REGISTER_DEVICE_IMPL(masked_col2im_forward_impl, CUDA,
-                     masked_col2im_forward_cuda);
-
-void modulated_deformable_im2col_cuda(
-    const Tensor data_im, const Tensor data_offset, const Tensor data_mask,
-    const int batch_size, const int channels, const int height_im,
-    const int width_im, const int height_col, const int width_col,
-    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
-    const int stride_h, const int stride_w, const int dilation_h,
-    const int dilation_w, const int deformable_group, Tensor data_col);
-
-void modulated_deformable_col2im_cuda(
-    const Tensor data_col, const Tensor data_offset, const Tensor data_mask,
-    const int batch_size, const int channels, const int height_im,
-    const int width_im, const int height_col, const int width_col,
-    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
-    const int stride_h, const int stride_w, const int dilation_h,
-    const int dilation_w, const int deformable_group, Tensor grad_im);
-
-void modulated_deformable_col2im_coord_cuda(
-    const Tensor data_col, const Tensor data_im, const Tensor data_offset,
-    const Tensor data_mask, const int batch_size, const int channels,
-    const int height_im, const int width_im, const int height_col,
-    const int width_col, const int kernel_h, const int kernel_w,
-    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
-    const int dilation_h, const int dilation_w, const int deformable_group,
-    Tensor grad_offset, Tensor grad_mask);
-
-void modulated_deformable_im2col_impl(
-    const Tensor data_im, const Tensor data_offset, const Tensor data_mask,
-    const int batch_size, const int channels, const int height_im,
-    const int width_im, const int height_col, const int width_col,
-    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
-    const int stride_h, const int stride_w, const int dilation_h,
-    const int dilation_w, const int deformable_group, Tensor data_col);
-
-void modulated_deformable_col2im_impl(
-    const Tensor data_col, const Tensor data_offset, const Tensor data_mask,
-    const int batch_size, const int channels, const int height_im,
-    const int width_im, const int height_col, const int width_col,
-    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
-    const int stride_h, const int stride_w, const int dilation_h,
-    const int dilation_w, const int deformable_group, Tensor grad_im);
-
-void modulated_deformable_col2im_coord_impl(
-    const Tensor data_col, const Tensor data_im, const Tensor data_offset,
-    const Tensor data_mask, const int batch_size, const int channels,
-    const int height_im, const int width_im, const int height_col,
-    const int width_col, const int kernel_h, const int kernel_w,
-    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
-    const int dilation_h, const int dilation_w, const int deformable_group,
-    Tensor grad_offset, Tensor grad_mask);
-
-REGISTER_DEVICE_IMPL(modulated_deformable_im2col_impl, CUDA,
-                     modulated_deformable_im2col_cuda);
-REGISTER_DEVICE_IMPL(modulated_deformable_col2im_impl, CUDA,
-                     modulated_deformable_col2im_cuda);
-REGISTER_DEVICE_IMPL(modulated_deformable_col2im_coord_impl, CUDA,
-                     modulated_deformable_col2im_coord_cuda);
-
-Tensor ms_deform_attn_cuda_forward(const Tensor& value,
-                                   const Tensor& spatial_shapes,
-                                   const Tensor& level_start_index,
-                                   const Tensor& sampling_loc,
-                                   const Tensor& attn_weight,
-                                   const int im2col_step);
-
-void ms_deform_attn_cuda_backward(
-    const Tensor& value, const Tensor& spatial_shapes,
-    const Tensor& level_start_index, const Tensor& sampling_loc,
-    const Tensor& attn_weight, const Tensor& grad_output, Tensor& grad_value,
-    Tensor& grad_sampling_loc, Tensor& grad_attn_weight, const int im2col_step);
-
-Tensor ms_deform_attn_impl_forward(const Tensor& value,
-                                   const Tensor& spatial_shapes,
-                                   const Tensor& level_start_index,
-                                   const Tensor& sampling_loc,
-                                   const Tensor& attn_weight,
-                                   const int im2col_step);
-
-void ms_deform_attn_impl_backward(
-    const Tensor& value, const Tensor& spatial_shapes,
-    const Tensor& level_start_index, const Tensor& sampling_loc,
-    const Tensor& attn_weight, const Tensor& grad_output, Tensor& grad_value,
-    Tensor& grad_sampling_loc, Tensor& grad_attn_weight, const int im2col_step);
-
-REGISTER_DEVICE_IMPL(ms_deform_attn_impl_forward, CUDA,
-                     ms_deform_attn_cuda_forward);
-REGISTER_DEVICE_IMPL(ms_deform_attn_impl_backward, CUDA,
-                     ms_deform_attn_cuda_backward);
-
-Tensor NMSCUDAKernelLauncher(Tensor boxes, Tensor scores, float iou_threshold,
-                             int offset);
-
-Tensor nms_cuda(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
-  return NMSCUDAKernelLauncher(boxes, scores, iou_threshold, offset);
-}
-
-Tensor nms_impl(Tensor boxes, Tensor scores, float iou_threshold, int offset);
-REGISTER_DEVICE_IMPL(nms_impl, CUDA, nms_cuda);
-
-void PointsInBoxesPartForwardCUDAKernelLauncher(int batch_size, int boxes_num,
-                                                int pts_num, const Tensor boxes,
-                                                const Tensor pts,
-                                                Tensor box_idx_of_points);
-
-void PointsInBoxesAllForwardCUDAKernelLauncher(int batch_size, int boxes_num,
-                                               int pts_num, const Tensor boxes,
-                                               const Tensor pts,
-                                               Tensor box_idx_of_points);
-
-void points_in_boxes_part_forward_cuda(int batch_size, int boxes_num,
-                                       int pts_num, const Tensor boxes,
-                                       const Tensor pts,
-                                       Tensor box_idx_of_points) {
-  PointsInBoxesPartForwardCUDAKernelLauncher(batch_size, boxes_num, pts_num,
-                                             boxes, pts, box_idx_of_points);
-};
-
-void points_in_boxes_all_forward_cuda(int batch_size, int boxes_num,
-                                      int pts_num, const Tensor boxes,
-                                      const Tensor pts,
-                                      Tensor box_idx_of_points) {
-  PointsInBoxesAllForwardCUDAKernelLauncher(batch_size, boxes_num, pts_num,
-                                            boxes, pts, box_idx_of_points);
-};
-
-void points_in_boxes_part_forward_impl(int batch_size, int boxes_num,
-                                       int pts_num, const Tensor boxes,
-                                       const Tensor pts,
-                                       Tensor box_idx_of_points);
-
-void points_in_boxes_all_forward_impl(int batch_size, int boxes_num,
-                                      int pts_num, const Tensor boxes,
-                                      const Tensor pts,
-                                      Tensor box_idx_of_points);
-REGISTER_DEVICE_IMPL(points_in_boxes_part_forward_impl, CUDA,
-                     points_in_boxes_part_forward_cuda);
-REGISTER_DEVICE_IMPL(points_in_boxes_all_forward_impl, CUDA,
-                     points_in_boxes_all_forward_cuda);
-
-void PSAMaskForwardCUDAKernelLauncher(const int psa_type, const Tensor input,
-                                      Tensor output, const int num_,
-                                      const int h_feature, const int w_feature,
-                                      const int h_mask, const int w_mask,
-                                      const int half_h_mask,
-                                      const int half_w_mask);
-
-void PSAMaskBackwardCUDAKernelLauncher(
-    const int psa_type, const Tensor grad_output, Tensor grad_input,
-    const int num_, const int h_feature, const int w_feature, const int h_mask,
-    const int w_mask, const int half_h_mask, const int half_w_mask);
-
-void psamask_forward_cuda(const int psa_type, const Tensor input, Tensor output,
-                          const int num_, const int h_feature,
-                          const int w_feature, const int h_mask,
-                          const int w_mask, const int half_h_mask,
-                          const int half_w_mask) {
-  PSAMaskForwardCUDAKernelLauncher(psa_type, input, output, num_, h_feature,
-                                   w_feature, h_mask, w_mask, half_h_mask,
-                                   half_w_mask);
-}
-
-void psamask_backward_cuda(const int psa_type, const Tensor grad_output,
-                           Tensor grad_input, const int num_,
-                           const int h_feature, const int w_feature,
-                           const int h_mask, const int w_mask,
-                           const int half_h_mask, const int half_w_mask) {
-  PSAMaskBackwardCUDAKernelLauncher(psa_type, grad_output, grad_input, num_,
-                                    h_feature, w_feature, h_mask, w_mask,
-                                    half_h_mask, half_w_mask);
-}
-
-void psamask_forward_impl(const int psa_type, const Tensor input, Tensor output,
-                          const int num_, const int h_feature,
-                          const int w_feature, const int h_mask,
-                          const int w_mask, const int half_h_mask,
-                          const int half_w_mask);
-
-void psamask_backward_impl(const int psa_type, const Tensor grad_output,
-                           Tensor grad_input, const int num_,
-                           const int h_feature, const int w_feature,
-                           const int h_mask, const int w_mask,
-                           const int half_h_mask, const int half_w_mask);
-REGISTER_DEVICE_IMPL(psamask_forward_impl, CUDA, psamask_forward_cuda);
-REGISTER_DEVICE_IMPL(psamask_backward_impl, CUDA, psamask_backward_cuda);
-
-void ROIAlignForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output,
-                                       Tensor argmax_y, Tensor argmax_x,
-                                       int aligned_height, int aligned_width,
-                                       float spatial_scale, int sampling_ratio,
-                                       int pool_mode, bool aligned);
-
-void ROIAlignBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois,
-                                        Tensor argmax_y, Tensor argmax_x,
-                                        Tensor grad_input, int aligned_height,
-                                        int aligned_width, float spatial_scale,
-                                        int sampling_ratio, int pool_mode,
-                                        bool aligned);
-
-void roi_align_forward_cuda(Tensor input, Tensor rois, Tensor output,
-                            Tensor argmax_y, Tensor argmax_x,
-                            int aligned_height, int aligned_width,
-                            float spatial_scale, int sampling_ratio,
-                            int pool_mode, bool aligned) {
-  ROIAlignForwardCUDAKernelLauncher(
-      input, rois, output, argmax_y, argmax_x, aligned_height, aligned_width,
-      spatial_scale, sampling_ratio, pool_mode, aligned);
-}
-
-void roi_align_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax_y,
-                             Tensor argmax_x, Tensor grad_input,
-                             int aligned_height, int aligned_width,
-                             float spatial_scale, int sampling_ratio,
-                             int pool_mode, bool aligned) {
-  ROIAlignBackwardCUDAKernelLauncher(
-      grad_output, rois, argmax_y, argmax_x, grad_input, aligned_height,
-      aligned_width, spatial_scale, sampling_ratio, pool_mode, aligned);
-}
-
-void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output,
-                            Tensor argmax_y, Tensor argmax_x,
-                            int aligned_height, int aligned_width,
-                            float spatial_scale, int sampling_ratio,
-                            int pool_mode, bool aligned);
-
-void roi_align_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax_y,
-                             Tensor argmax_x, Tensor grad_input,
-                             int aligned_height, int aligned_width,
-                             float spatial_scale, int sampling_ratio,
-                             int pool_mode, bool aligned);
-
-REGISTER_DEVICE_IMPL(roi_align_forward_impl, CUDA, roi_align_forward_cuda);
-REGISTER_DEVICE_IMPL(roi_align_backward_impl, CUDA, roi_align_backward_cuda);
-
-void ROIAlignRotatedForwardCUDAKernelLauncher(
-    const at::Tensor input, const at::Tensor rois, const float spatial_scale,
-    const int sampling_ratio, const bool aligned, const bool clockwise,
-    const int channels, const int height, const int width, const int num_rois,
-    const int pooled_height, const int pooled_width, at::Tensor output);
-
-void ROIAlignRotatedBackwardCUDAKernelLauncher(
-    const at::Tensor top_grad, const at::Tensor rois, const float spatial_scale,
-    const int sampling_ratio, const bool aligned, const bool clockwise,
-    const int channels, const int height, const int width, const int num_rois,
-    const int pooled_height, const int pooled_width, at::Tensor bottom_grad);
-
-void roi_align_rotated_forward_cuda(Tensor input, Tensor rois, Tensor output,
-                                    int aligned_height, int aligned_width,
-                                    float spatial_scale, int sampling_ratio,
-                                    bool aligned, bool clockwise) {
-  // Number of ROIs
-  int num_rois = rois.size(0);
-  int size_rois = rois.size(1);
-
-  if (size_rois != 6) {
-    AT_ERROR("wrong roi size");
-  }
-
-  int num_channels = input.size(1);
-  int data_height = input.size(2);
-  int data_width = input.size(3);
-  ROIAlignRotatedForwardCUDAKernelLauncher(
-      input, rois, spatial_scale, sampling_ratio, aligned, clockwise,
-      num_channels, data_height, data_width, num_rois, aligned_height,
-      aligned_width, output);
-}
-
-void roi_align_rotated_backward_cuda(Tensor top_grad, Tensor rois,
-                                     Tensor bottom_grad, int aligned_height,
-                                     int aligned_width, float spatial_scale,
-                                     int sampling_ratio, bool aligned,
-                                     bool clockwise) {
-  // Number of ROIs
-  int num_rois = rois.size(0);
-  int size_rois = rois.size(1);
-  if (size_rois != 6) {
-    AT_ERROR("wrong roi size");
-  }
-
-  int num_channels = bottom_grad.size(1);
-  int data_height = bottom_grad.size(2);
-  int data_width = bottom_grad.size(3);
-  ROIAlignRotatedBackwardCUDAKernelLauncher(
-      top_grad, rois, spatial_scale, sampling_ratio, aligned, clockwise,
-      num_channels, data_height, data_width, num_rois, aligned_height,
-      aligned_width, bottom_grad);
-}
-
-void roi_align_rotated_forward_impl(Tensor input, Tensor rois, Tensor output,
-                                    int aligned_height, int aligned_width,
-                                    float spatial_scale, int sampling_ratio,
-                                    bool aligned, bool clockwise);
-
-void roi_align_rotated_backward_impl(Tensor top_grad, Tensor rois,
-                                     Tensor bottom_grad, int aligned_height,
-                                     int aligned_width, float spatial_scale,
-                                     int sampling_ratio, bool aligned,
-                                     bool clockwise);
-REGISTER_DEVICE_IMPL(roi_align_rotated_forward_impl, CUDA,
-                     roi_align_rotated_forward_cuda);
-REGISTER_DEVICE_IMPL(roi_align_rotated_backward_impl, CUDA,
-                     roi_align_rotated_backward_cuda);
-
-void RiROIAlignRotatedForwardCUDAKernelLauncher(
-    const at::Tensor features, const at::Tensor rois, const float spatial_scale,
-    const int num_samples, const bool clockwise, const int channels,
-    const int height, const int width, const int num_rois,
-    const int pooled_height, const int pooled_width, const int num_orientations,
-    at::Tensor output);
-
-void RiROIAlignRotatedBackwardCUDAKernelLauncher(
-    const at::Tensor top_grad, const at::Tensor rois, const float spatial_scale,
-    const int num_samples, const bool clockwise, const int channels,
-    const int height, const int width, const int num_rois,
-    const int pooled_height, const int pooled_width, const int num_orientations,
-    at::Tensor bottom_grad);
-
-void riroi_align_rotated_forward_cuda(Tensor features, Tensor rois,
-                                      Tensor output, int pooled_height,
-                                      int pooled_width, float spatial_scale,
-                                      int num_samples, int num_orientations,
-                                      bool clockwise) {
-  // Number of ROIs
-  int num_rois = rois.size(0);
-  int size_rois = rois.size(1);
-  if (size_rois != 6) {
-    AT_ERROR("wrong roi size");
-  }
-  CHECK_CONTIGUOUS(features);
-  CHECK_CONTIGUOUS(rois);
-  int num_channels = features.size(1) / num_orientations;
-  int data_height = features.size(2);
-  int data_width = features.size(3);
-  RiROIAlignRotatedForwardCUDAKernelLauncher(
-      features, rois, spatial_scale, num_samples, clockwise, num_channels,
-      data_height, data_width, num_rois, pooled_height, pooled_width,
-      num_orientations, output);
-}
-
-void riroi_align_rotated_backward_cuda(Tensor top_grad, Tensor rois,
-                                       Tensor bottom_grad, int pooled_height,
-                                       int pooled_width, float spatial_scale,
-                                       int num_samples, int num_orientations,
-                                       bool clockwise) {
-  // Number of ROIs
-  int num_rois = rois.size(0);
-  int size_rois = rois.size(1);
-  if (size_rois != 6) {
-    AT_ERROR("wrong roi size");
-  }
-  CHECK_CONTIGUOUS(top_grad);
-  CHECK_CONTIGUOUS(rois);
-  int num_channels = bottom_grad.size(1) / num_orientations;
-  int data_height = bottom_grad.size(2);
-  int data_width = bottom_grad.size(3);
-  RiROIAlignRotatedBackwardCUDAKernelLauncher(
-      top_grad, rois, spatial_scale, num_samples, clockwise, num_channels,
-      data_height, data_width, num_rois, pooled_height, pooled_width,
-      num_orientations, bottom_grad);
-}
-
-void riroi_align_rotated_forward_impl(Tensor features, Tensor rois,
-                                      Tensor output, int pooled_height,
-                                      int pooled_width, float spatial_scale,
-                                      int num_samples, int num_orientations,
-                                      bool clockwise);
-
-void riroi_align_rotated_backward_impl(Tensor top_grad, Tensor rois,
-                                       Tensor bottom_grad, int pooled_height,
-                                       int pooled_width, float spatial_scale,
-                                       int num_samples, int num_orientations,
-                                       bool clockwise);
-
-REGISTER_DEVICE_IMPL(riroi_align_rotated_forward_impl, CUDA,
-                     riroi_align_rotated_forward_cuda);
-REGISTER_DEVICE_IMPL(riroi_align_rotated_backward_impl, CUDA,
-                     riroi_align_rotated_backward_cuda);
-
-void RoiawarePool3dForwardCUDAKernelLauncher(
-    int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x,
-    int out_y, int out_z, const Tensor rois, const Tensor pts,
-    const Tensor pts_feature, Tensor argmax, Tensor pts_idx_of_voxels,
-    Tensor pooled_features, int pool_method);
-
-void RoiawarePool3dBackwardCUDAKernelLauncher(
-    int boxes_num, int out_x, int out_y, int out_z, int channels,
-    int max_pts_each_voxel, const Tensor pts_idx_of_voxels, const Tensor argmax,
-    const Tensor grad_out, Tensor grad_in, int pool_method);
-
-void roiaware_pool3d_forward_cuda(int boxes_num, int pts_num, int channels,
-                                  int max_pts_each_voxel, int out_x, int out_y,
-                                  int out_z, const Tensor rois,
-                                  const Tensor pts, const Tensor pts_feature,
-                                  Tensor argmax, Tensor pts_idx_of_voxels,
-                                  Tensor pooled_features, int pool_method) {
-  RoiawarePool3dForwardCUDAKernelLauncher(
-      boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
-      rois, pts, pts_feature, argmax, pts_idx_of_voxels, pooled_features,
-      pool_method);
-};
-
-void roiaware_pool3d_backward_cuda(int boxes_num, int out_x, int out_y,
-                                   int out_z, int channels,
-                                   int max_pts_each_voxel,
-                                   const Tensor pts_idx_of_voxels,
-                                   const Tensor argmax, const Tensor grad_out,
-                                   Tensor grad_in, int pool_method) {
-  RoiawarePool3dBackwardCUDAKernelLauncher(
-      boxes_num, out_x, out_y, out_z, channels, max_pts_each_voxel,
-      pts_idx_of_voxels, argmax, grad_out, grad_in, pool_method);
-};
-
-void roiaware_pool3d_forward_impl(int boxes_num, int pts_num, int channels,
-                                  int max_pts_each_voxel, int out_x, int out_y,
-                                  int out_z, const Tensor rois,
-                                  const Tensor pts, const Tensor pts_feature,
-                                  Tensor argmax, Tensor pts_idx_of_voxels,
-                                  Tensor pooled_features, int pool_method);
-
-void roiaware_pool3d_backward_impl(int boxes_num, int out_x, int out_y,
-                                   int out_z, int channels,
-                                   int max_pts_each_voxel,
-                                   const Tensor pts_idx_of_voxels,
-                                   const Tensor argmax, const Tensor grad_out,
-                                   Tensor grad_in, int pool_method);
-
-REGISTER_DEVICE_IMPL(roiaware_pool3d_forward_impl, CUDA,
-                     roiaware_pool3d_forward_cuda);
-REGISTER_DEVICE_IMPL(roiaware_pool3d_backward_impl, CUDA,
-                     roiaware_pool3d_backward_cuda);
-
-void RoIPointPool3dForwardCUDAKernelLauncher(
-    int batch_size, int pts_num, int boxes_num, int feature_in_len,
-    int sampled_pts_num, const Tensor xyz, const Tensor boxes3d,
-    const Tensor pts_feature, Tensor pooled_features, Tensor pooled_empty_flag);
-
-void roipoint_pool3d_forward_cuda(int batch_size, int pts_num, int boxes_num,
-                                  int feature_in_len, int sampled_pts_num,
-                                  const Tensor xyz, const Tensor boxes3d,
-                                  const Tensor pts_feature,
-                                  Tensor pooled_features,
-                                  Tensor pooled_empty_flag) {
-  RoIPointPool3dForwardCUDAKernelLauncher(
-      batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, xyz,
-      boxes3d, pts_feature, pooled_features, pooled_empty_flag);
-};
-
-void roipoint_pool3d_forward_impl(int batch_size, int pts_num, int boxes_num,
-                                  int feature_in_len, int sampled_pts_num,
-                                  const Tensor xyz, const Tensor boxes3d,
-                                  const Tensor pts_feature,
-                                  Tensor pooled_features,
-                                  Tensor pooled_empty_flag);
-REGISTER_DEVICE_IMPL(roipoint_pool3d_forward_impl, CUDA,
-                     roipoint_pool3d_forward_cuda);
-
-void ROIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output,
-                                      Tensor argmax, int pooled_height,
-                                      int pooled_width, float spatial_scale);
-
-void ROIPoolBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois,
-                                       Tensor argmax, Tensor grad_input,
-                                       int pooled_height, int pooled_width,
-                                       float spatial_scale);
-
-void roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor output,
-                           Tensor argmax, int pooled_height, int pooled_width,
-                           float spatial_scale) {
-  ROIPoolForwardCUDAKernelLauncher(input, rois, output, argmax, pooled_height,
-                                   pooled_width, spatial_scale);
-}
-
-void roi_pool_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax,
-                            Tensor grad_input, int pooled_height,
-                            int pooled_width, float spatial_scale) {
-  ROIPoolBackwardCUDAKernelLauncher(grad_output, rois, argmax, grad_input,
-                                    pooled_height, pooled_width, spatial_scale);
-}
-
-void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output,
-                           Tensor argmax, int pooled_height, int pooled_width,
-                           float spatial_scale);
-void roi_pool_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax,
-                            Tensor grad_input, int pooled_height,
-                            int pooled_width, float spatial_scale);
-REGISTER_DEVICE_IMPL(roi_pool_forward_impl, CUDA, roi_pool_forward_cuda);
-REGISTER_DEVICE_IMPL(roi_pool_backward_impl, CUDA, roi_pool_backward_cuda);
-
-typedef enum { SUM = 0, MEAN = 1, MAX = 2 } reduce_t;
-
-std::vector<at::Tensor> DynamicPointToVoxelForwardCUDAKernelLauncher(
-    const at::Tensor& feats, const at::Tensor& coors,
-    const reduce_t reduce_type);
-
-void DynamicPointToVoxelBackwardCUDAKernelLauncher(
-    at::Tensor& grad_feats, const at::Tensor& grad_reduced_feats,
-    const at::Tensor& feats, const at::Tensor& reduced_feats,
-    const at::Tensor& coors_map, const at::Tensor& reduce_count,
-    const reduce_t reduce_type);
-
-std::vector<torch::Tensor> dynamic_point_to_voxel_forward_cuda(
-    const torch::Tensor& feats, const torch::Tensor& coors,
-    const reduce_t reduce_type) {
-  return DynamicPointToVoxelForwardCUDAKernelLauncher(feats, coors,
-                                                      reduce_type);
-};
-
-void dynamic_point_to_voxel_backward_cuda(
-    torch::Tensor& grad_feats, const torch::Tensor& grad_reduced_feats,
-    const torch::Tensor& feats, const torch::Tensor& reduced_feats,
-    const torch::Tensor& coors_idx, const torch::Tensor& reduce_count,
-    const reduce_t reduce_type) {
-  DynamicPointToVoxelBackwardCUDAKernelLauncher(grad_feats, grad_reduced_feats,
-                                                feats, reduced_feats, coors_idx,
-                                                reduce_count, reduce_type);
-};
-
-std::vector<torch::Tensor> dynamic_point_to_voxel_forward_impl(
-    const torch::Tensor& feats, const torch::Tensor& coors,
-    const reduce_t reduce_type);
-
-void dynamic_point_to_voxel_backward_impl(
-    torch::Tensor& grad_feats, const torch::Tensor& grad_reduced_feats,
-    const torch::Tensor& feats, const torch::Tensor& reduced_feats,
-    const torch::Tensor& coors_idx, const torch::Tensor& reduce_count,
-    const reduce_t reduce_type);
-
-REGISTER_DEVICE_IMPL(dynamic_point_to_voxel_forward_impl, CUDA,
-                     dynamic_point_to_voxel_forward_cuda);
-REGISTER_DEVICE_IMPL(dynamic_point_to_voxel_backward_impl, CUDA,
-                     dynamic_point_to_voxel_backward_cuda);
-
-void SyncBNForwardMeanCUDAKernelLauncher(const Tensor input, Tensor mean);
-
-void SyncBNForwardVarCUDAKernelLauncher(const Tensor input, const Tensor mean,
-                                        Tensor var);
-
-void SyncBNForwardOutputCUDAKernelLauncher(
-    const Tensor input, const Tensor mean, const Tensor var,
-    Tensor running_mean, Tensor running_var, const Tensor weight,
-    const Tensor bias, Tensor norm, Tensor std, Tensor output, float eps,
-    float momentum, int group_size);
-
-void SyncBNBackwardParamCUDAKernelLauncher(const Tensor grad_output,
-                                           const Tensor norm,
-                                           Tensor grad_weight,
-                                           Tensor grad_bias);
-
-void SyncBNBackwardDataCUDAKernelLauncher(const Tensor grad_output,
-                                          const Tensor weight,
-                                          const Tensor grad_weight,
-                                          const Tensor grad_bias,
-                                          const Tensor norm, const Tensor std,
-                                          Tensor grad_input);
-
-void sync_bn_forward_mean_cuda(const Tensor input, Tensor mean) {
-  SyncBNForwardMeanCUDAKernelLauncher(input, mean);
-}
-
-void sync_bn_forward_var_cuda(const Tensor input, const Tensor mean,
-                              Tensor var) {
-  SyncBNForwardVarCUDAKernelLauncher(input, mean, var);
-}
-
-void sync_bn_forward_output_cuda(const Tensor input, const Tensor mean,
-                                 const Tensor var, Tensor running_mean,
-                                 Tensor running_var, const Tensor weight,
-                                 const Tensor bias, Tensor norm, Tensor std,
-                                 Tensor output, float eps, float momentum,
-                                 int group_size) {
-  SyncBNForwardOutputCUDAKernelLauncher(input, mean, var, running_mean,
-                                        running_var, weight, bias, norm, std,
-                                        output, eps, momentum, group_size);
-}
-
-void sync_bn_backward_param_cuda(const Tensor grad_output, const Tensor norm,
-                                 Tensor grad_weight, Tensor grad_bias) {
-  SyncBNBackwardParamCUDAKernelLauncher(grad_output, norm, grad_weight,
-                                        grad_bias);
-}
-
-void sync_bn_backward_data_cuda(const Tensor grad_output, const Tensor weight,
-                                const Tensor grad_weight,
-                                const Tensor grad_bias, const Tensor norm,
-                                const Tensor std, Tensor grad_input) {
-  SyncBNBackwardDataCUDAKernelLauncher(grad_output, weight, grad_weight,
-                                       grad_bias, norm, std, grad_input);
-}
-
-void sync_bn_forward_mean_impl(const Tensor input, Tensor mean);
-
-void sync_bn_forward_var_impl(const Tensor input, const Tensor mean,
-                              Tensor var);
-
-void sync_bn_forward_output_impl(const Tensor input, const Tensor mean,
-                                 const Tensor var, Tensor running_mean,
-                                 Tensor running_var, const Tensor weight,
-                                 const Tensor bias, Tensor norm, Tensor std,
-                                 Tensor output, float eps, float momentum,
-                                 int group_size);
-
-void sync_bn_backward_param_impl(const Tensor grad_output, const Tensor norm,
-                                 Tensor grad_weight, Tensor grad_bias);
-
-void sync_bn_backward_data_impl(const Tensor grad_output, const Tensor weight,
-                                const Tensor grad_weight,
-                                const Tensor grad_bias, const Tensor norm,
-                                const Tensor std, Tensor grad_input);
-
-REGISTER_DEVICE_IMPL(sync_bn_forward_mean_impl, CUDA,
-                     sync_bn_forward_mean_cuda);
-REGISTER_DEVICE_IMPL(sync_bn_forward_var_impl, CUDA, sync_bn_forward_var_cuda);
-REGISTER_DEVICE_IMPL(sync_bn_forward_output_impl, CUDA,
-                     sync_bn_forward_output_cuda);
-REGISTER_DEVICE_IMPL(sync_bn_backward_param_impl, CUDA,
-                     sync_bn_backward_param_cuda);
-REGISTER_DEVICE_IMPL(sync_bn_backward_data_impl, CUDA,
-                     sync_bn_backward_data_cuda);
-
-void ThreeInterpolateForwardCUDAKernelLauncher(int b, int c, int m, int n,
-                                               const Tensor points,
-                                               const Tensor idx,
-                                               const Tensor weight, Tensor out);
-
-void ThreeInterpolateBackwardCUDAKernelLauncher(int b, int c, int n, int m,
-                                                const Tensor grad_out,
-                                                const Tensor idx,
-                                                const Tensor weight,
-                                                Tensor grad_points);
-
-void three_interpolate_forward_cuda(int b, int c, int m, int n,
-                                    const Tensor points, const Tensor idx,
-                                    const Tensor weight, Tensor out) {
-  ThreeInterpolateForwardCUDAKernelLauncher(b, c, m, n, points, idx, weight,
-                                            out);
-};
-
-void three_interpolate_backward_cuda(int b, int c, int n, int m,
-                                     const Tensor grad_out, const Tensor idx,
-                                     const Tensor weight, Tensor grad_points) {
-  ThreeInterpolateBackwardCUDAKernelLauncher(b, c, n, m, grad_out, idx, weight,
-                                             grad_points);
-};
-
-void three_interpolate_forward_impl(int b, int c, int m, int n,
-                                    const Tensor points, const Tensor idx,
-                                    const Tensor weight, Tensor out);
-
-void three_interpolate_backward_impl(int b, int c, int n, int m,
-                                     const Tensor grad_out, const Tensor idx,
-                                     const Tensor weight, Tensor grad_points);
-REGISTER_DEVICE_IMPL(three_interpolate_forward_impl, CUDA,
-                     three_interpolate_forward_cuda);
-REGISTER_DEVICE_IMPL(three_interpolate_backward_impl, CUDA,
-                     three_interpolate_backward_cuda);
-
-void ThreeNNForwardCUDAKernelLauncher(int b, int n, int m, const Tensor unknown,
-                                      const Tensor known, Tensor dist2,
-                                      Tensor idx);
-
-void three_nn_forward_cuda(int b, int n, int m, const Tensor unknown,
-                           const Tensor known, Tensor dist2, Tensor idx) {
-  ThreeNNForwardCUDAKernelLauncher(b, n, m, unknown, known, dist2, idx);
-};
-
-void three_nn_forward_impl(int b, int n, int m, const Tensor unknown,
-                           const Tensor known, Tensor dist2, Tensor idx);
-REGISTER_DEVICE_IMPL(three_nn_forward_impl, CUDA, three_nn_forward_cuda);
-
-void TINShiftForwardCUDAKernelLauncher(Tensor input, Tensor shift,
-                                       Tensor output);
-
-void TINShiftBackwardCUDAKernelLauncher(Tensor grad_output, Tensor shift,
-                                        Tensor grad_input);
-
-void tin_shift_forward_cuda(Tensor input, Tensor shift, Tensor output) {
-  TINShiftForwardCUDAKernelLauncher(input, shift, output);
-}
-
-void tin_shift_backward_cuda(Tensor grad_output, Tensor shift,
-                             Tensor grad_input) {
-  TINShiftBackwardCUDAKernelLauncher(grad_output, shift, grad_input);
-}
-
-void tin_shift_forward_impl(Tensor input, Tensor shift, Tensor output);
-void tin_shift_backward_impl(Tensor grad_output, Tensor shift,
-                             Tensor grad_input);
-REGISTER_DEVICE_IMPL(tin_shift_forward_impl, CUDA, tin_shift_forward_cuda);
-REGISTER_DEVICE_IMPL(tin_shift_backward_impl, CUDA, tin_shift_backward_cuda);
-
-torch::Tensor upfirdn2d_op(const torch::Tensor& input,
-                           const torch::Tensor& kernel, int up_x, int up_y,
-                           int down_x, int down_y, int pad_x0, int pad_x1,
-                           int pad_y0, int pad_y1);
-
-torch::Tensor upfirdn2d_op_impl(const torch::Tensor& input,
-                                const torch::Tensor& kernel, int up_x, int up_y,
-                                int down_x, int down_y, int pad_x0, int pad_x1,
-                                int pad_y0, int pad_y1);
-REGISTER_DEVICE_IMPL(upfirdn2d_op_impl, CUDA, upfirdn2d_op);
-
-int HardVoxelizeForwardCUDAKernelLauncher(
-    const at::Tensor& points, at::Tensor& voxels, at::Tensor& coors,
-    at::Tensor& num_points_per_voxel, const std::vector<float> voxel_size,
-    const std::vector<float> coors_range, const int max_points,
-    const int max_voxels, const int NDim = 3);
-
-int NondeterministicHardVoxelizeForwardCUDAKernelLauncher(
-    const at::Tensor& points, at::Tensor& voxels, at::Tensor& coors,
-    at::Tensor& num_points_per_voxel, const std::vector<float> voxel_size,
-    const std::vector<float> coors_range, const int max_points,
-    const int max_voxels, const int NDim = 3);
-
-void DynamicVoxelizeForwardCUDAKernelLauncher(
-    const at::Tensor& points, at::Tensor& coors,
-    const std::vector<float> voxel_size, const std::vector<float> coors_range,
-    const int NDim = 3);
-
-int hard_voxelize_forward_cuda(const at::Tensor& points, at::Tensor& voxels,
-                               at::Tensor& coors,
-                               at::Tensor& num_points_per_voxel,
-                               const std::vector<float> voxel_size,
-                               const std::vector<float> coors_range,
-                               const int max_points, const int max_voxels,
-                               const int NDim) {
-  return HardVoxelizeForwardCUDAKernelLauncher(
-      points, voxels, coors, num_points_per_voxel, voxel_size, coors_range,
-      max_points, max_voxels, NDim);
-};
-
-int nondeterministic_hard_voxelize_forward_cuda(
-    const at::Tensor& points, at::Tensor& voxels, at::Tensor& coors,
-    at::Tensor& num_points_per_voxel, const std::vector<float> voxel_size,
-    const std::vector<float> coors_range, const int max_points,
-    const int max_voxels, const int NDim) {
-  return NondeterministicHardVoxelizeForwardCUDAKernelLauncher(
-      points, voxels, coors, num_points_per_voxel, voxel_size, coors_range,
-      max_points, max_voxels, NDim);
-};
-
-void dynamic_voxelize_forward_cuda(const at::Tensor& points, at::Tensor& coors,
-                                   const std::vector<float> voxel_size,
-                                   const std::vector<float> coors_range,
-                                   const int NDim) {
-  DynamicVoxelizeForwardCUDAKernelLauncher(points, coors, voxel_size,
-                                           coors_range, NDim);
-};
-
-int hard_voxelize_forward_impl(const at::Tensor& points, at::Tensor& voxels,
-                               at::Tensor& coors,
-                               at::Tensor& num_points_per_voxel,
-                               const std::vector<float> voxel_size,
-                               const std::vector<float> coors_range,
-                               const int max_points, const int max_voxels,
-                               const int NDim);
-
-int nondeterministic_hard_voxelize_forward_impl(
-    const at::Tensor& points, at::Tensor& voxels, at::Tensor& coors,
-    at::Tensor& num_points_per_voxel, const std::vector<float> voxel_size,
-    const std::vector<float> coors_range, const int max_points,
-    const int max_voxels, const int NDim);
-
-void dynamic_voxelize_forward_impl(const at::Tensor& points, at::Tensor& coors,
-                                   const std::vector<float> voxel_size,
-                                   const std::vector<float> coors_range,
-                                   const int NDim);
-
-REGISTER_DEVICE_IMPL(hard_voxelize_forward_impl, CUDA,
-                     hard_voxelize_forward_cuda);
-REGISTER_DEVICE_IMPL(nondeterministic_hard_voxelize_forward_impl, CUDA,
-                     nondeterministic_hard_voxelize_forward_cuda);
-REGISTER_DEVICE_IMPL(dynamic_voxelize_forward_impl, CUDA,
-                     dynamic_voxelize_forward_cuda);
-
-void RotatedFeatureAlignForwardCUDAKernelLauncher(const Tensor features,
-                                                  const Tensor best_bboxes,
-                                                  const float spatial_scale,
-                                                  const int points,
-                                                  Tensor output);
-
-void RotatedFeatureAlignBackwardCUDAKernelLauncher(const Tensor top_grad,
-                                                   const Tensor best_bboxes,
-                                                   const float spatial_scale,
-                                                   const int points,
-                                                   Tensor bottom_grad);
-
-void rotated_feature_align_forward_cuda(const Tensor features,
-                                        const Tensor best_bboxes,
-                                        const float spatial_scale,
-                                        const int points, Tensor output) {
-  RotatedFeatureAlignForwardCUDAKernelLauncher(features, best_bboxes,
-                                               spatial_scale, points, output);
-};
-
-void rotated_feature_align_backward_cuda(const Tensor top_grad,
-                                         const Tensor best_bboxes,
-                                         const float spatial_scale,
-                                         const int points, Tensor bottom_grad) {
-  RotatedFeatureAlignBackwardCUDAKernelLauncher(
-      top_grad, best_bboxes, spatial_scale, points, bottom_grad);
-};
-
-void rotated_feature_align_forward_impl(const Tensor features,
-                                        const Tensor best_bboxes,
-                                        const float spatial_scale,
-                                        const int points, Tensor output);
-
-void rotated_feature_align_backward_impl(const Tensor top_grad,
-                                         const Tensor best_bboxes,
-                                         const float spatial_scale,
-                                         const int points, Tensor bottom_grad);
-
-REGISTER_DEVICE_IMPL(rotated_feature_align_forward_impl, CUDA,
-                     rotated_feature_align_forward_cuda);
-REGISTER_DEVICE_IMPL(rotated_feature_align_backward_impl, CUDA,
-                     rotated_feature_align_backward_cuda);
-
-void PointsInPolygonsForwardCUDAKernelLauncher(const at::Tensor points,
-                                               const at::Tensor polygons,
-                                               const int rows, const int cols,
-                                               at::Tensor output);
-
-void points_in_polygons_forward_cuda(const Tensor points, const Tensor polygons,
-                                     Tensor output, const int rows,
-                                     const int cols) {
-  PointsInPolygonsForwardCUDAKernelLauncher(points, polygons, rows, cols,
-                                            output);
-};
-
-void points_in_polygons_forward_impl(const Tensor points, const Tensor polygons,
-                                     Tensor output, const int rows,
-                                     const int cols);
-
-REGISTER_DEVICE_IMPL(points_in_polygons_forward_impl, CUDA,
-                     points_in_polygons_forward_cuda);
-
-void MinAreaPolygonsCUDAKernelLauncher(const Tensor pointsets, Tensor polygons);
-
-void min_area_polygons_cuda(const Tensor pointsets, Tensor polygons) {
-  MinAreaPolygonsCUDAKernelLauncher(pointsets, polygons);
-}
-
-void min_area_polygons_impl(const Tensor pointsets, Tensor polygons);
-
-REGISTER_DEVICE_IMPL(min_area_polygons_impl, CUDA, min_area_polygons_cuda);
-
-void ActiveRotatedFilterForwardCUDAKernelLauncher(const Tensor input,
-                                                  const Tensor indices,
-                                                  Tensor output);
-
-void ActiveRotatedFilterBackwardCUDAKernelLauncher(const Tensor grad_out,
-                                                   const Tensor indices,
-                                                   Tensor grad_in);
-
-void active_rotated_filter_forward_cuda(const Tensor input,
-                                        const Tensor indices, Tensor output) {
-  ActiveRotatedFilterForwardCUDAKernelLauncher(input, indices, output);
-};
-
-void active_rotated_filter_backward_cuda(const Tensor grad_out,
-                                         const Tensor indices, Tensor grad_in) {
-  ActiveRotatedFilterBackwardCUDAKernelLauncher(grad_out, indices, grad_in);
-};
-
-void active_rotated_filter_forward_impl(const Tensor input,
-                                        const Tensor indices, Tensor output);
-
-void active_rotated_filter_backward_impl(const Tensor grad_out,
-                                         const Tensor indices, Tensor grad_in);
-
-REGISTER_DEVICE_IMPL(active_rotated_filter_forward_impl, CUDA,
-                     active_rotated_filter_forward_cuda);
-REGISTER_DEVICE_IMPL(active_rotated_filter_backward_impl, CUDA,
-                     active_rotated_filter_backward_cuda);
-
-void ConvexIoUCUDAKernelLauncher(const Tensor pointsets, const Tensor polygons,
-                                 Tensor ious);
-
-void ConvexGIoUCUDAKernelLauncher(const Tensor pointsets, const Tensor polygons,
-                                  Tensor output);
-
-void convex_iou_cuda(const Tensor pointsets, const Tensor polygons,
-                     Tensor ious) {
-  ConvexIoUCUDAKernelLauncher(pointsets, polygons, ious);
-}
-
-void convex_giou_cuda(const Tensor pointsets, const Tensor polygons,
-                      Tensor output) {
-  ConvexGIoUCUDAKernelLauncher(pointsets, polygons, output);
-}
-
-void convex_iou_impl(const Tensor pointsets, const Tensor polygons,
-                     Tensor ious);
-
-void convex_giou_impl(const Tensor pointsets, const Tensor polygons,
-                      Tensor output);
-
-REGISTER_DEVICE_IMPL(convex_iou_impl, CUDA, convex_iou_cuda);
-REGISTER_DEVICE_IMPL(convex_giou_impl, CUDA, convex_giou_cuda);
-
-Tensor DiffIoURotatedSortVerticesCUDAKernelLauncher(Tensor vertices,
-                                                    Tensor mask,
-                                                    Tensor num_valid);
-
-Tensor diff_iou_rotated_sort_vertices_forward_cuda(Tensor vertices, Tensor mask,
-                                                   Tensor num_valid) {
-  return DiffIoURotatedSortVerticesCUDAKernelLauncher(vertices, mask,
-                                                      num_valid);
-}
-
-Tensor diff_iou_rotated_sort_vertices_forward_impl(Tensor vertices, Tensor mask,
-                                                   Tensor num_valid);
-
-REGISTER_DEVICE_IMPL(diff_iou_rotated_sort_vertices_forward_impl, CUDA,
-                     diff_iou_rotated_sort_vertices_forward_cuda);
-
-void ChamferDistanceForwardCUDAKernelLauncher(
-    const Tensor xyz1, const Tensor xyz2, const Tensor dist1,
-    const Tensor dist2, const Tensor idx1, const Tensor idx2);
-
-void ChamferDistanceBackwardCUDAKernelLauncher(
-    const Tensor xyz1, const Tensor xyz2, Tensor idx1, Tensor idx2,
-    Tensor grad_dist1, Tensor grad_dist2, Tensor grad_xyz1, Tensor grad_xyz2);
-
-void chamfer_distance_forward_cuda(const Tensor xyz1, const Tensor xyz2,
-                                   const Tensor dist1, const Tensor dist2,
-                                   const Tensor idx1, const Tensor idx2) {
-  ChamferDistanceForwardCUDAKernelLauncher(xyz1, xyz2, dist1, dist2, idx1,
-                                           idx2);
-};
-
-void chamfer_distance_backward_cuda(const Tensor xyz1, const Tensor xyz2,
-                                    Tensor idx1, Tensor idx2, Tensor graddist1,
-                                    Tensor graddist2, Tensor gradxyz1,
-                                    Tensor gradxyz2) {
-  ChamferDistanceBackwardCUDAKernelLauncher(xyz1, xyz2, idx1, idx2, graddist1,
-                                            graddist2, gradxyz1, gradxyz2);
-};
-
-void chamfer_distance_forward_impl(const Tensor xyz1, const Tensor xyz2,
-                                   const Tensor dist1, const Tensor dist2,
-                                   const Tensor idx1, const Tensor idx2);
-
-void chamfer_distance_backward_impl(const Tensor xyz1, const Tensor xyz2,
-                                    Tensor idx1, Tensor idx2, Tensor graddist1,
-                                    Tensor graddist2, Tensor gradxyz1,
-                                    Tensor gradxyz2);
-
-REGISTER_DEVICE_IMPL(chamfer_distance_forward_impl, CUDA,
-                     chamfer_distance_forward_cuda);
-REGISTER_DEVICE_IMPL(chamfer_distance_backward_impl, CUDA,
-                     chamfer_distance_backward_cuda);
-
-void PrROIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois,
-                                        Tensor output, int pooled_height,
-                                        int pooled_width, float spatial_scale);
-
-void PrROIPoolBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois,
-                                         Tensor grad_input, int pooled_height,
-                                         int pooled_width, float spatial_scale);
-
-void PrROIPoolCoorBackwardCUDAKernelLauncher(
-    Tensor output, Tensor grad_output, Tensor input, Tensor rois,
-    Tensor grad_rois, int pooled_height, int pooled_width, float spatial_scale);
-
-void prroi_pool_forward_cuda(Tensor input, Tensor rois, Tensor output,
-                             int pooled_height, int pooled_width,
-                             float spatial_scale) {
-  PrROIPoolForwardCUDAKernelLauncher(input, rois, output, pooled_height,
-                                     pooled_width, spatial_scale);
-}
-
-void prroi_pool_backward_cuda(Tensor grad_output, Tensor rois,
-                              Tensor grad_input, int pooled_height,
-                              int pooled_width, float spatial_scale) {
-  PrROIPoolBackwardCUDAKernelLauncher(grad_output, rois, grad_input,
-                                      pooled_height, pooled_width,
-                                      spatial_scale);
-}
-
-void prroi_pool_coor_backward_cuda(Tensor output, Tensor grad_output,
-                                   Tensor input, Tensor rois, Tensor grad_rois,
-                                   int pooled_height, int pooled_width,
-                                   float spatial_scale) {
-  PrROIPoolCoorBackwardCUDAKernelLauncher(output, grad_output, input, rois,
-                                          grad_rois, pooled_height,
-                                          pooled_width, spatial_scale);
-}
-
-void prroi_pool_forward_impl(Tensor input, Tensor rois, Tensor output,
-                             int pooled_height, int pooled_width,
-                             float spatial_scale);
-void prroi_pool_backward_impl(Tensor grad_output, Tensor rois,
-                              Tensor grad_input, int pooled_height,
-                              int pooled_width, float spatial_scale);
-void prroi_pool_coor_backward_impl(Tensor output, Tensor grad_output,
-                                   Tensor input, Tensor rois, Tensor grad_rois,
-                                   int pooled_height, int pooled_width,
-                                   float spatial_scale);
-REGISTER_DEVICE_IMPL(prroi_pool_forward_impl, CUDA, prroi_pool_forward_cuda);
-REGISTER_DEVICE_IMPL(prroi_pool_backward_impl, CUDA, prroi_pool_backward_cuda);
-REGISTER_DEVICE_IMPL(prroi_pool_coor_backward_impl, CUDA,
-                     prroi_pool_coor_backward_cuda);
diff --git a/mmcv/ops/csrc/parrots/deform_conv.cpp b/mmcv/ops/csrc/parrots/deform_conv.cpp
deleted file mode 100644
index 86690b939..000000000
--- a/mmcv/ops/csrc/parrots/deform_conv.cpp
+++ /dev/null
@@ -1,517 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void deformable_im2col_impl(Tensor data_im, Tensor data_offset,
-                            const int channels, const int height,
-                            const int width, const int ksize_h,
-                            const int ksize_w, const int pad_h, const int pad_w,
-                            const int stride_h, const int stride_w,
-                            const int dilation_h, const int dilation_w,
-                            const int parallel_imgs, const int deformable_group,
-                            Tensor data_col) {
-  DISPATCH_DEVICE_IMPL(deformable_im2col_impl, data_im, data_offset, channels,
-                       height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h,
-                       stride_w, dilation_h, dilation_w, parallel_imgs,
-                       deformable_group, data_col);
-}
-
-void deformable_col2im_impl(Tensor data_col, Tensor data_offset,
-                            const int channels, const int height,
-                            const int width, const int ksize_h,
-                            const int ksize_w, const int pad_h, const int pad_w,
-                            const int stride_h, const int stride_w,
-                            const int dilation_h, const int dilation_w,
-                            const int parallel_imgs, const int deformable_group,
-                            Tensor grad_im) {
-  DISPATCH_DEVICE_IMPL(deformable_col2im_impl, data_col, data_offset, channels,
-                       height, width, ksize_h, ksize_w, pad_h, pad_w, stride_h,
-                       stride_w, dilation_h, dilation_w, parallel_imgs,
-                       deformable_group, grad_im);
-}
-
-void deformable_col2im_coord_impl(
-    Tensor data_col, Tensor data_im, Tensor data_offset, const int channels,
-    const int height, const int width, const int ksize_h, const int ksize_w,
-    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
-    const int dilation_h, const int dilation_w, const int parallel_imgs,
-    const int deformable_group, Tensor grad_offset) {
-  DISPATCH_DEVICE_IMPL(deformable_col2im_coord_impl, data_col, data_im,
-                       data_offset, channels, height, width, ksize_h, ksize_w,
-                       pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
-                       parallel_imgs, deformable_group, grad_offset);
-}
-
-void deform_conv_shape_check(at::Tensor input, at::Tensor offset,
-                             at::Tensor *gradOutput, at::Tensor weight, int kH,
-                             int kW, int dH, int dW, int padH, int padW,
-                             int dilationH, int dilationW, int group,
-                             int deformable_group) {
-  TORCH_CHECK(
-      weight.ndimension() == 4,
-      "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, but got: %s",
-      weight.ndimension());
-
-  TORCH_CHECK(weight.is_contiguous(), "weight tensor has to be contiguous");
-
-  TORCH_CHECK(kW > 0 && kH > 0,
-              "kernel size should be greater than zero, but got kH: %d kW: %d",
-              kH, kW);
-
-  TORCH_CHECK((weight.size(2) == kH && weight.size(3) == kW),
-              "kernel size should be consistent with weight, ",
-              "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d",
-              kH, kW, weight.size(2), weight.size(3));
-
-  TORCH_CHECK(dW > 0 && dH > 0,
-              "stride should be greater than zero, but got dH: %d dW: %d", dH,
-              dW);
-
-  TORCH_CHECK(
-      dilationW > 0 && dilationH > 0,
-      "dilation should be greater than 0, but got dilationH: %d dilationW: %d",
-      dilationH, dilationW);
-
-  int ndim = input.ndimension();
-  int dimf = 0;
-  int dimh = 1;
-  int dimw = 2;
-
-  if (ndim == 4) {
-    dimf++;
-    dimh++;
-    dimw++;
-  }
-
-  TORCH_CHECK(ndim == 3 || ndim == 4,
-              "3D or 4D input tensor expected but got: %s", ndim);
-
-  long nInputPlane = weight.size(1) * group;
-  long inputHeight = input.size(dimh);
-  long inputWidth = input.size(dimw);
-  long nOutputPlane = weight.size(0);
-  long outputHeight =
-      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
-  long outputWidth =
-      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
-
-  TORCH_CHECK(nInputPlane % deformable_group == 0,
-              "input channels must divide deformable group size");
-
-  if (outputWidth < 1 || outputHeight < 1)
-    AT_ERROR(
-        "Given input size: (%ld x %ld x %ld). "
-        "Calculated output size: (%ld x %ld x %ld). Output size is too small",
-        nInputPlane, inputHeight, inputWidth, nOutputPlane, outputHeight,
-        outputWidth);
-
-  TORCH_CHECK(input.size(1) == nInputPlane,
-              "invalid number of input planes, expected: %d, but got: %d",
-              nInputPlane, input.size(1));
-
-  TORCH_CHECK((inputHeight >= kH && inputWidth >= kW),
-              "input image is smaller than kernel");
-
-  TORCH_CHECK(
-      (offset.size(2) == outputHeight && offset.size(3) == outputWidth),
-      "invalid spatial size of offset, expected height: %d width: %d, but "
-      "got height: %d width: %d",
-      outputHeight, outputWidth, offset.size(2), offset.size(3));
-
-  TORCH_CHECK((offset.size(1) == deformable_group * 2 * kH * kW),
-              "invalid number of channels of offset");
-
-  if (gradOutput != NULL) {
-    TORCH_CHECK(
-        gradOutput->size(dimf) == nOutputPlane,
-        "invalid number of gradOutput planes, expected: %d, but got: %d",
-        nOutputPlane, gradOutput->size(dimf));
-
-    TORCH_CHECK(
-        (gradOutput->size(dimh) == outputHeight &&
-         gradOutput->size(dimw) == outputWidth),
-        "invalid size of gradOutput, expected height: %d width: %d , but "
-        "got height: %d width: %d",
-        outputHeight, outputWidth, gradOutput->size(dimh),
-        gradOutput->size(dimw));
-  }
-}
-
-void deform_conv_forward(Tensor input, Tensor weight, Tensor offset,
-                         Tensor output, Tensor columns, Tensor ones, int kW,
-                         int kH, int dW, int dH, int padW, int padH,
-                         int dilationW, int dilationH, int group,
-                         int deformable_group, int im2col_step) {
-  if (input.device().is_cuda()) {
-#ifdef MMCV_WITH_CUDA
-    CHECK_CUDA_INPUT(input);
-    CHECK_CUDA_INPUT(offset);
-    CHECK_CUDA_INPUT(weight);
-    CHECK_CUDA_INPUT(output);
-    CHECK_CUDA_INPUT(columns);
-    CHECK_CUDA_INPUT(ones);
-#else
-    AT_ERROR("DeformConv is not compiled with GPU support");
-#endif
-  } else {
-    CHECK_CPU_INPUT(input);
-    CHECK_CPU_INPUT(offset);
-    CHECK_CPU_INPUT(weight);
-    CHECK_CPU_INPUT(output);
-    CHECK_CPU_INPUT(columns);
-    CHECK_CPU_INPUT(ones);
-  }
-
-  deform_conv_shape_check(input, offset, NULL, weight, kH, kW, dH, dW, padH,
-                          padW, dilationH, dilationW, group, deformable_group);
-  at::DeviceGuard guard(input.device());
-
-  int batch = 1;
-  if (input.ndimension() == 3) {
-    // Force batch
-    batch = 0;
-    input.unsqueeze_(0);
-    offset.unsqueeze_(0);
-  }
-
-  // todo: assert batchsize dividable by im2col_step
-
-  long batchSize = input.size(0);
-  long nInputPlane = input.size(1);
-  long inputHeight = input.size(2);
-  long inputWidth = input.size(3);
-
-  long nOutputPlane = weight.size(0);
-
-  long outputWidth =
-      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
-  long outputHeight =
-      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
-
-  TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
-
-  output = output.view({batchSize / im2col_step, im2col_step, nOutputPlane,
-                        outputHeight, outputWidth});
-  columns = at::zeros(
-      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
-      input.options());
-
-  if (ones.ndimension() != 2 ||
-      ones.size(0) * ones.size(1) < outputHeight * outputWidth) {
-    ones = at::ones({outputHeight, outputWidth}, input.options());
-  }
-
-  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
-                      inputHeight, inputWidth});
-  offset =
-      offset.view({batchSize / im2col_step, im2col_step,
-                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});
-
-  Tensor output_buffer = at::zeros({batchSize / im2col_step, nOutputPlane,
-                                    im2col_step * outputHeight, outputWidth},
-                                   output.options());
-
-  output_buffer = output_buffer.view(
-      {output_buffer.size(0), group, output_buffer.size(1) / group,
-       output_buffer.size(2), output_buffer.size(3)});
-
-  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
-    deformable_im2col_impl(input[elt], offset[elt], nInputPlane, inputHeight,
-                           inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
-                           dilationW, im2col_step, deformable_group, columns);
-
-    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
-    weight = weight.view({group, weight.size(0) / group, weight.size(1),
-                          weight.size(2), weight.size(3)});
-
-    for (int g = 0; g < group; g++) {
-      output_buffer[elt][g] = output_buffer[elt][g]
-                                  .flatten(1)
-                                  .addmm_(weight[g].flatten(1), columns[g])
-                                  .view_as(output_buffer[elt][g]);
-    }
-    columns =
-        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
-    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
-                          weight.size(3), weight.size(4)});
-  }
-
-  output_buffer = output_buffer.view(
-      {output_buffer.size(0), output_buffer.size(1) * output_buffer.size(2),
-       output_buffer.size(3), output_buffer.size(4)});
-
-  output_buffer = output_buffer.view({batchSize / im2col_step, nOutputPlane,
-                                      im2col_step, outputHeight, outputWidth});
-  output_buffer.transpose_(1, 2);
-  output.copy_(output_buffer);
-  output = output.view({batchSize, nOutputPlane, outputHeight, outputWidth});
-
-  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
-  offset = offset.view(
-      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
-
-  if (batch == 0) {
-    output = output.view({nOutputPlane, outputHeight, outputWidth});
-    input = input.view({nInputPlane, inputHeight, inputWidth});
-    offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});
-  }
-}
-
-void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput,
-                                Tensor gradInput, Tensor gradOffset,
-                                Tensor weight, Tensor columns, int kW, int kH,
-                                int dW, int dH, int padW, int padH,
-                                int dilationW, int dilationH, int group,
-                                int deformable_group, int im2col_step) {
-  if (input.device().is_cuda()) {
-#ifdef MMCV_WITH_CUDA
-    CHECK_CUDA_INPUT(input);
-    CHECK_CUDA_INPUT(offset);
-    CHECK_CUDA_INPUT(gradOutput);
-    CHECK_CUDA_INPUT(gradInput);
-    CHECK_CUDA_INPUT(gradOffset);
-    CHECK_CUDA_INPUT(weight);
-    CHECK_CUDA_INPUT(columns);
-#else
-    AT_ERROR("DeformConv is not compiled with GPU support");
-#endif
-  } else {
-    CHECK_CPU_INPUT(input);
-    CHECK_CPU_INPUT(offset);
-    CHECK_CPU_INPUT(gradOutput);
-    CHECK_CPU_INPUT(gradInput);
-    CHECK_CPU_INPUT(gradOffset);
-    CHECK_CPU_INPUT(weight);
-    CHECK_CPU_INPUT(columns);
-  }
-  deform_conv_shape_check(input, offset, &gradOutput, weight, kH, kW, dH, dW,
-                          padH, padW, dilationH, dilationW, group,
-                          deformable_group);
-
-  at::DeviceGuard guard(input.device());
-
-  int batch = 1;
-  if (input.ndimension() == 3) {
-    // Force batch
-    batch = 0;
-    input = input.view({1, input.size(0), input.size(1), input.size(2)});
-    offset = offset.view({1, offset.size(0), offset.size(1), offset.size(2)});
-    gradOutput = gradOutput.view(
-        {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});
-  }
-
-  long batchSize = input.size(0);
-  long nInputPlane = input.size(1);
-  long inputHeight = input.size(2);
-  long inputWidth = input.size(3);
-
-  long nOutputPlane = weight.size(0);
-
-  long outputWidth =
-      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
-  long outputHeight =
-      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
-
-  TORCH_CHECK((offset.size(0) == batchSize), 3, "invalid batch size of offset");
-  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
-  columns = at::zeros(
-      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
-      input.options());
-
-  // change order of grad output
-  gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step,
-                                nOutputPlane, outputHeight, outputWidth});
-  gradOutput.transpose_(1, 2);
-
-  gradInput = gradInput.view({batchSize / im2col_step, im2col_step, nInputPlane,
-                              inputHeight, inputWidth});
-  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
-                      inputHeight, inputWidth});
-  gradOffset = gradOffset.view({batchSize / im2col_step, im2col_step,
-                                deformable_group * 2 * kH * kW, outputHeight,
-                                outputWidth});
-  offset =
-      offset.view({batchSize / im2col_step, im2col_step,
-                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});
-
-  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
-    // divide into groups
-    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
-    weight = weight.view({group, weight.size(0) / group, weight.size(1),
-                          weight.size(2), weight.size(3)});
-    gradOutput = gradOutput.view(
-        {gradOutput.size(0), group, gradOutput.size(1) / group,
-         gradOutput.size(2), gradOutput.size(3), gradOutput.size(4)});
-
-    for (int g = 0; g < group; g++) {
-      columns[g] = columns[g].addmm_(weight[g].flatten(1).transpose(0, 1),
-                                     gradOutput[elt][g].flatten(1), 0.0f, 1.0f);
-    }
-
-    columns =
-        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
-    gradOutput = gradOutput.view(
-        {gradOutput.size(0), gradOutput.size(1) * gradOutput.size(2),
-         gradOutput.size(3), gradOutput.size(4), gradOutput.size(5)});
-
-    deformable_col2im_coord_impl(columns, input[elt], offset[elt], nInputPlane,
-                                 inputHeight, inputWidth, kH, kW, padH, padW,
-                                 dH, dW, dilationH, dilationW, im2col_step,
-                                 deformable_group, gradOffset[elt]);
-
-    deformable_col2im_impl(columns, offset[elt], nInputPlane, inputHeight,
-                           inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
-                           dilationW, im2col_step, deformable_group,
-                           gradInput[elt]);
-
-    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
-                          weight.size(3), weight.size(4)});
-  }
-
-  gradOutput.transpose_(1, 2);
-  gradOutput =
-      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});
-
-  gradInput = gradInput.view({batchSize, nInputPlane, inputHeight, inputWidth});
-  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
-  gradOffset = gradOffset.view(
-      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
-  offset = offset.view(
-      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
-
-  if (batch == 0) {
-    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
-    input = input.view({nInputPlane, inputHeight, inputWidth});
-    gradInput = gradInput.view({nInputPlane, inputHeight, inputWidth});
-    offset = offset.view({offset.size(1), offset.size(2), offset.size(3)});
-    gradOffset =
-        gradOffset.view({offset.size(1), offset.size(2), offset.size(3)});
-  }
-}
-
-void deform_conv_backward_parameters(Tensor input, Tensor offset,
-                                     Tensor gradOutput, Tensor gradWeight,
-                                     Tensor columns, Tensor ones, int kW,
-                                     int kH, int dW, int dH, int padW, int padH,
-                                     int dilationW, int dilationH, int group,
-                                     int deformable_group, float scale,
-                                     int im2col_step) {
-  if (input.device().is_cuda()) {
-#ifdef MMCV_WITH_CUDA
-    CHECK_CUDA_INPUT(input);
-    CHECK_CUDA_INPUT(offset);
-    CHECK_CUDA_INPUT(gradOutput);
-    CHECK_CUDA_INPUT(gradWeight);
-    CHECK_CUDA_INPUT(columns);
-    CHECK_CUDA_INPUT(ones);
-#else
-    AT_ERROR("DeformConv is not compiled with GPU support");
-#endif
-  } else {
-    CHECK_CPU_INPUT(input);
-    CHECK_CPU_INPUT(offset);
-    CHECK_CPU_INPUT(gradOutput);
-    CHECK_CPU_INPUT(gradWeight);
-    CHECK_CPU_INPUT(columns);
-    CHECK_CPU_INPUT(ones);
-  }
-
-  deform_conv_shape_check(input, offset, &gradOutput, gradWeight, kH, kW, dH,
-                          dW, padH, padW, dilationH, dilationW, group,
-                          deformable_group);
-  at::DeviceGuard guard(input.device());
-
-  int batch = 1;
-
-  if (input.ndimension() == 3) {
-    // Force batch
-    batch = 0;
-    input = input.view(
-        at::IntList({1, input.size(0), input.size(1), input.size(2)}));
-    gradOutput = gradOutput.view(
-        {1, gradOutput.size(0), gradOutput.size(1), gradOutput.size(2)});
-  }
-
-  long batchSize = input.size(0);
-  long nInputPlane = input.size(1);
-  long inputHeight = input.size(2);
-  long inputWidth = input.size(3);
-
-  long nOutputPlane = gradWeight.size(0);
-
-  long outputWidth =
-      (inputWidth + 2 * padW - (dilationW * (kW - 1) + 1)) / dW + 1;
-  long outputHeight =
-      (inputHeight + 2 * padH - (dilationH * (kH - 1) + 1)) / dH + 1;
-
-  TORCH_CHECK((offset.size(0) == batchSize), "invalid batch size of offset");
-
-  columns = at::zeros(
-      {nInputPlane * kW * kH, im2col_step * outputHeight * outputWidth},
-      input.options());
-
-  gradOutput = gradOutput.view({batchSize / im2col_step, im2col_step,
-                                nOutputPlane, outputHeight, outputWidth});
-  gradOutput.transpose_(1, 2);
-
-  Tensor gradOutputBuffer = at::zeros_like(gradOutput);
-  gradOutputBuffer =
-      gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane, im2col_step,
-                             outputHeight, outputWidth});
-  gradOutputBuffer = gradOutputBuffer.contiguous();
-  gradOutputBuffer.copy_(gradOutput);
-  gradOutputBuffer =
-      gradOutputBuffer.view({batchSize / im2col_step, nOutputPlane,
-                             im2col_step * outputHeight, outputWidth});
-
-  gradOutput.transpose_(1, 2);
-  gradOutput =
-      gradOutput.view({batchSize, nOutputPlane, outputHeight, outputWidth});
-
-  input = input.view({batchSize / im2col_step, im2col_step, nInputPlane,
-                      inputHeight, inputWidth});
-  offset =
-      offset.view({batchSize / im2col_step, im2col_step,
-                   deformable_group * 2 * kH * kW, outputHeight, outputWidth});
-
-  for (int elt = 0; elt < batchSize / im2col_step; elt++) {
-    deformable_im2col_impl(input[elt], offset[elt], nInputPlane, inputHeight,
-                           inputWidth, kH, kW, padH, padW, dH, dW, dilationH,
-                           dilationW, im2col_step, deformable_group, columns);
-
-    // divide into group
-    gradOutputBuffer = gradOutputBuffer.view(
-        {gradOutputBuffer.size(0), group, gradOutputBuffer.size(1) / group,
-         gradOutputBuffer.size(2), gradOutputBuffer.size(3)});
-    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
-    gradWeight =
-        gradWeight.view({group, gradWeight.size(0) / group, gradWeight.size(1),
-                         gradWeight.size(2), gradWeight.size(3)});
-
-    for (int g = 0; g < group; g++) {
-      gradWeight[g] = gradWeight[g]
-                          .flatten(1)
-                          .addmm_(gradOutputBuffer[elt][g].flatten(1),
-                                  columns[g].transpose(1, 0), 1.0, scale)
-                          .view_as(gradWeight[g]);
-    }
-    gradOutputBuffer = gradOutputBuffer.view(
-        {gradOutputBuffer.size(0),
-         gradOutputBuffer.size(1) * gradOutputBuffer.size(2),
-         gradOutputBuffer.size(3), gradOutputBuffer.size(4)});
-    columns =
-        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
-    gradWeight = gradWeight.view({gradWeight.size(0) * gradWeight.size(1),
-                                  gradWeight.size(2), gradWeight.size(3),
-                                  gradWeight.size(4)});
-  }
-
-  input = input.view({batchSize, nInputPlane, inputHeight, inputWidth});
-  offset = offset.view(
-      {batchSize, deformable_group * 2 * kH * kW, outputHeight, outputWidth});
-
-  if (batch == 0) {
-    gradOutput = gradOutput.view({nOutputPlane, outputHeight, outputWidth});
-    input = input.view({nInputPlane, inputHeight, inputWidth});
-  }
-}
diff --git a/mmcv/ops/csrc/parrots/deform_conv_parrots.cpp b/mmcv/ops/csrc/parrots/deform_conv_parrots.cpp
deleted file mode 100644
index c07a170df..000000000
--- a/mmcv/ops/csrc/parrots/deform_conv_parrots.cpp
+++ /dev/null
@@ -1,273 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "deform_conv_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void deform_conv_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                      const OperatorBase::in_list_t& ins,
-                                      OperatorBase::out_list_t& outs) {
-  int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group,
-      im2col_step;
-  SSAttrs(attr)
-      .get<int>("kW", kW)
-      .get<int>("kH", kH)
-      .get<int>("dW", dW)
-      .get<int>("dH", dH)
-      .get<int>("padW", padW)
-      .get<int>("padH", padH)
-      .get<int>("dilationW", dilationW)
-      .get<int>("dilationH", dilationH)
-      .get<int>("group", group)
-      .get<int>("deformable_group", deformable_group)
-      .get<int>("im2col_step", im2col_step)
-      .done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& weight = buildATensor(ctx, ins[1]);
-  const auto& offset = buildATensor(ctx, ins[2]);
-
-  auto output = buildATensor(ctx, outs[0]);
-  auto columns = buildATensor(ctx, outs[1]);
-  auto ones = buildATensor(ctx, outs[2]);
-
-  deform_conv_forward(input, weight, offset, output, columns, ones, kW, kH, dW,
-                      dH, padW, padH, dilationW, dilationH, group,
-                      deformable_group, im2col_step);
-}
-
-void deform_conv_backward_input_cuda_parrots(CudaContext& ctx,
-                                             const SSElement& attr,
-                                             const OperatorBase::in_list_t& ins,
-                                             OperatorBase::out_list_t& outs) {
-  int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group,
-      im2col_step;
-  SSAttrs(attr)
-      .get<int>("kW", kW)
-      .get<int>("kH", kH)
-      .get<int>("dW", dW)
-      .get<int>("dH", dH)
-      .get<int>("padW", padW)
-      .get<int>("padH", padH)
-      .get<int>("dilationW", dilationW)
-      .get<int>("dilationH", dilationH)
-      .get<int>("group", group)
-      .get<int>("deformable_group", deformable_group)
-      .get<int>("im2col_step", im2col_step)
-      .done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& offset = buildATensor(ctx, ins[1]);
-  const auto& gradOutput = buildATensor(ctx, ins[2]);
-
-  auto gradInput = buildATensor(ctx, outs[0]);
-  auto gradOffset = buildATensor(ctx, outs[1]);
-  auto weight = buildATensor(ctx, outs[2]);
-  auto columns = buildATensor(ctx, outs[3]);
-
-  deform_conv_backward_input(input, offset, gradOutput, gradInput, gradOffset,
-                             weight, columns, kW, kH, dW, dH, padW, padH,
-                             dilationW, dilationH, group, deformable_group,
-                             im2col_step);
-}
-
-void deform_conv_backward_parameters_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group,
-      im2col_step;
-  float scale;
-  SSAttrs(attr)
-      .get<int>("kW", kW)
-      .get<int>("kH", kH)
-      .get<int>("dW", dW)
-      .get<int>("dH", dH)
-      .get<int>("padW", padW)
-      .get<int>("padH", padH)
-      .get<int>("dilationW", dilationW)
-      .get<int>("dilationH", dilationH)
-      .get<int>("group", group)
-      .get<int>("deformable_group", deformable_group)
-      .get<float>("scale", scale)
-      .get<int>("im2col_step", im2col_step)
-      .done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& offset = buildATensor(ctx, ins[1]);
-  const auto& gradOutput = buildATensor(ctx, ins[2]);
-
-  auto gradWeight = buildATensor(ctx, outs[0]);
-  auto columns = buildATensor(ctx, outs[1]);
-  auto ones = buildATensor(ctx, outs[2]);
-  deform_conv_backward_parameters(input, offset, gradOutput, gradWeight,
-                                  columns, ones, kW, kH, dW, dH, padW, padH,
-                                  dilationW, dilationH, group, deformable_group,
-                                  scale, im2col_step);
-}
-#endif
-
-void deform_conv_forward_cpu_parrots(HostContext& ctx, const SSElement& attr,
-                                     const OperatorBase::in_list_t& ins,
-                                     OperatorBase::out_list_t& outs) {
-  int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group,
-      im2col_step;
-  SSAttrs(attr)
-      .get<int>("kW", kW)
-      .get<int>("kH", kH)
-      .get<int>("dW", dW)
-      .get<int>("dH", dH)
-      .get<int>("padW", padW)
-      .get<int>("padH", padH)
-      .get<int>("dilationW", dilationW)
-      .get<int>("dilationH", dilationH)
-      .get<int>("group", group)
-      .get<int>("deformable_group", deformable_group)
-      .get<int>("im2col_step", im2col_step)
-      .done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& weight = buildATensor(ctx, ins[1]);
-  const auto& offset = buildATensor(ctx, ins[2]);
-
-  auto output = buildATensor(ctx, outs[0]);
-  auto columns = buildATensor(ctx, outs[1]);
-  auto ones = buildATensor(ctx, outs[2]);
-
-  deform_conv_forward(input, weight, offset, output, columns, ones, kW, kH, dW,
-                      dH, padW, padH, dilationW, dilationH, group,
-                      deformable_group, im2col_step);
-}
-
-void deform_conv_backward_input_cpu_parrots(HostContext& ctx,
-                                            const SSElement& attr,
-                                            const OperatorBase::in_list_t& ins,
-                                            OperatorBase::out_list_t& outs) {
-  int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group,
-      im2col_step;
-  SSAttrs(attr)
-      .get<int>("kW", kW)
-      .get<int>("kH", kH)
-      .get<int>("dW", dW)
-      .get<int>("dH", dH)
-      .get<int>("padW", padW)
-      .get<int>("padH", padH)
-      .get<int>("dilationW", dilationW)
-      .get<int>("dilationH", dilationH)
-      .get<int>("group", group)
-      .get<int>("deformable_group", deformable_group)
-      .get<int>("im2col_step", im2col_step)
-      .done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& offset = buildATensor(ctx, ins[1]);
-  const auto& gradOutput = buildATensor(ctx, ins[2]);
-
-  auto gradInput = buildATensor(ctx, outs[0]);
-  auto gradOffset = buildATensor(ctx, outs[1]);
-  auto weight = buildATensor(ctx, outs[2]);
-  auto columns = buildATensor(ctx, outs[3]);
-
-  deform_conv_backward_input(input, offset, gradOutput, gradInput, gradOffset,
-                             weight, columns, kW, kH, dW, dH, padW, padH,
-                             dilationW, dilationH, group, deformable_group,
-                             im2col_step);
-}
-
-void deform_conv_backward_parameters_cpu_parrots(
-    HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  int kW, kH, dW, dH, padW, padH, dilationW, dilationH, group, deformable_group,
-      im2col_step;
-  float scale;
-  SSAttrs(attr)
-      .get<int>("kW", kW)
-      .get<int>("kH", kH)
-      .get<int>("dW", dW)
-      .get<int>("dH", dH)
-      .get<int>("padW", padW)
-      .get<int>("padH", padH)
-      .get<int>("dilationW", dilationW)
-      .get<int>("dilationH", dilationH)
-      .get<int>("group", group)
-      .get<int>("deformable_group", deformable_group)
-      .get<float>("scale", scale)
-      .get<int>("im2col_step", im2col_step)
-      .done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& offset = buildATensor(ctx, ins[1]);
-  const auto& gradOutput = buildATensor(ctx, ins[2]);
-
-  auto gradWeight = buildATensor(ctx, outs[0]);
-  auto columns = buildATensor(ctx, outs[1]);
-  auto ones = buildATensor(ctx, outs[2]);
-  deform_conv_backward_parameters(input, offset, gradOutput, gradWeight,
-                                  columns, ones, kW, kH, dW, dH, padW, padH,
-                                  dilationW, dilationH, group, deformable_group,
-                                  scale, im2col_step);
-}
-
-PARROTS_EXTENSION_REGISTER(deform_conv_forward)
-    .attr("kW")
-    .attr("kH")
-    .attr("dW")
-    .attr("dH")
-    .attr("padW")
-    .attr("padH")
-    .attr("dilationW")
-    .attr("dilationH")
-    .attr("group")
-    .attr("deformable_group")
-    .attr("im2col_step")
-    .input(3)
-    .output(3)
-    .apply(deform_conv_forward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(deform_conv_forward_cuda_parrots)
-#endif
-    .done();
-
-PARROTS_EXTENSION_REGISTER(deform_conv_backward_input)
-    .attr("kW")
-    .attr("kH")
-    .attr("dW")
-    .attr("dH")
-    .attr("padW")
-    .attr("padH")
-    .attr("dilationW")
-    .attr("dilationH")
-    .attr("group")
-    .attr("deformable_group")
-    .attr("im2col_step")
-    .input(3)
-    .output(4)
-    .apply(deform_conv_backward_input_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(deform_conv_backward_input_cuda_parrots)
-#endif
-    .done();
-
-PARROTS_EXTENSION_REGISTER(deform_conv_backward_parameters)
-    .attr("kW")
-    .attr("kH")
-    .attr("dW")
-    .attr("dH")
-    .attr("padW")
-    .attr("padH")
-    .attr("dilationW")
-    .attr("dilationH")
-    .attr("group")
-    .attr("deformable_group")
-    .attr("scale")
-    .attr("im2col_step")
-    .input(3)
-    .output(3)
-    .apply(deform_conv_backward_parameters_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(deform_conv_backward_parameters_cuda_parrots)
-#endif
-    .done();
diff --git a/mmcv/ops/csrc/parrots/deform_conv_pytorch.h b/mmcv/ops/csrc/parrots/deform_conv_pytorch.h
deleted file mode 100644
index e0d3d40d1..000000000
--- a/mmcv/ops/csrc/parrots/deform_conv_pytorch.h
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef DEFORM_CONV_PYTORCH_H
-#define DEFORM_CONV_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void deform_conv_forward(Tensor input, Tensor weight, Tensor offset,
-                         Tensor output, Tensor columns, Tensor ones, int kW,
-                         int kH, int dW, int dH, int padW, int padH,
-                         int dilationW, int dilationH, int group,
-                         int deformable_group, int im2col_step);
-
-void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput,
-                                Tensor gradInput, Tensor gradOffset,
-                                Tensor weight, Tensor columns, int kW, int kH,
-                                int dW, int dH, int padW, int padH,
-                                int dilationW, int dilationH, int group,
-                                int deformable_group, int im2col_step);
-
-void deform_conv_backward_parameters(Tensor input, Tensor offset,
-                                     Tensor gradOutput, Tensor gradWeight,
-                                     Tensor columns, Tensor ones, int kW,
-                                     int kH, int dW, int dH, int padW, int padH,
-                                     int dilationW, int dilationH, int group,
-                                     int deformable_group, float scale,
-                                     int im2col_step);
-
-#endif  // DEFORM_CONV_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/deform_roi_pool.cpp b/mmcv/ops/csrc/parrots/deform_roi_pool.cpp
deleted file mode 100644
index 4fb78a96e..000000000
--- a/mmcv/ops/csrc/parrots/deform_roi_pool.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void deform_roi_pool_forward_impl(Tensor input, Tensor rois, Tensor offset,
-                                  Tensor output, int pooled_height,
-                                  int pooled_width, float spatial_scale,
-                                  int sampling_ratio, float gamma) {
-  DISPATCH_DEVICE_IMPL(deform_roi_pool_forward_impl, input, rois, offset,
-                       output, pooled_height, pooled_width, spatial_scale,
-                       sampling_ratio, gamma);
-}
-
-void deform_roi_pool_backward_impl(Tensor grad_output, Tensor input,
-                                   Tensor rois, Tensor offset,
-                                   Tensor grad_input, Tensor grad_offset,
-                                   int pooled_height, int pooled_width,
-                                   float spatial_scale, int sampling_ratio,
-                                   float gamma) {
-  DISPATCH_DEVICE_IMPL(deform_roi_pool_backward_impl, grad_output, input, rois,
-                       offset, grad_input, grad_offset, pooled_height,
-                       pooled_width, spatial_scale, sampling_ratio, gamma);
-}
-
-void deform_roi_pool_forward(Tensor input, Tensor rois, Tensor offset,
-                             Tensor output, int pooled_height, int pooled_width,
-                             float spatial_scale, int sampling_ratio,
-                             float gamma) {
-  deform_roi_pool_forward_impl(input, rois, offset, output, pooled_height,
-                               pooled_width, spatial_scale, sampling_ratio,
-                               gamma);
-}
-
-void deform_roi_pool_backward(Tensor grad_output, Tensor input, Tensor rois,
-                              Tensor offset, Tensor grad_input,
-                              Tensor grad_offset, int pooled_height,
-                              int pooled_width, float spatial_scale,
-                              int sampling_ratio, float gamma) {
-  deform_roi_pool_backward_impl(grad_output, input, rois, offset, grad_input,
-                                grad_offset, pooled_height, pooled_width,
-                                spatial_scale, sampling_ratio, gamma);
-}
diff --git a/mmcv/ops/csrc/parrots/deform_roi_pool_parrots.cpp b/mmcv/ops/csrc/parrots/deform_roi_pool_parrots.cpp
deleted file mode 100644
index fc2701d52..000000000
--- a/mmcv/ops/csrc/parrots/deform_roi_pool_parrots.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "deform_roi_pool_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-/*void deform_roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor offset,
- *                                  Tensor output, int pooled_height,
- *                                  int pooled_width, float spatial_scale,
- *                                  int sampling_ratio, float gamma);
- */
-void deform_roi_pool_forward_cuda_parrots(CudaContext& ctx,
-                                          const SSElement& attr,
-                                          const OperatorBase::in_list_t& ins,
-                                          OperatorBase::out_list_t& outs) {
-  int pooled_height;
-  int pooled_width;
-  float spatial_scale;
-  int sampling_ratio;
-  float gamma;
-  SSAttrs(attr)
-      .get<int>("pooled_height", pooled_height)
-      .get<int>("pooled_width", pooled_width)
-      .get<float>("spatial_scale", spatial_scale)
-      .get<int>("sampling_ratio", sampling_ratio)
-      .get<float>("gamma", gamma)
-      .done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& rois = buildATensor(ctx, ins[1]);
-  const auto& offset = buildATensor(ctx, ins[2]);
-
-  auto output = buildATensor(ctx, outs[0]);
-  deform_roi_pool_forward_cuda(input, rois, offset, output, pooled_height,
-                               pooled_width, spatial_scale, sampling_ratio,
-                               gamma);
-}
-
-/*void deform_roi_pool_backward_cuda(Tensor grad_output, Tensor input,
- *                                   Tensor rois, Tensor offset,
- *                                   Tensor grad_input, Tensor grad_offset,
- *                                   int pooled_height, int pooled_width,
- *                                   float spatial_scale, int sampling_ratio,
- *                                   float gamma);
- */
-void deform_roi_pool_backward_cuda_parrots(CudaContext& ctx,
-                                           const SSElement& attr,
-                                           const OperatorBase::in_list_t& ins,
-                                           OperatorBase::out_list_t& outs) {
-  int pooled_height;
-  int pooled_width;
-  float spatial_scale;
-  int sampling_ratio;
-  float gamma;
-
-  SSAttrs(attr)
-      .get<int>("pooled_height", pooled_height)
-      .get<int>("pooled_width", pooled_width)
-      .get<float>("spatial_scale", spatial_scale)
-      .get<int>("sampling_ratio", sampling_ratio)
-      .get<float>("gamma", gamma)
-      .done();
-
-  const auto& grad_output = buildATensor(ctx, ins[0]);
-  const auto& input = buildATensor(ctx, ins[1]);
-  const auto& rois = buildATensor(ctx, ins[2]);
-  const auto& offset = buildATensor(ctx, ins[3]);
-
-  auto grad_input = buildATensor(ctx, outs[0]);
-  auto grad_offset = buildATensor(ctx, outs[1]);
-
-  deform_roi_pool_backward_cuda(grad_output, input, rois, offset, grad_input,
-                                grad_offset, pooled_height, pooled_width,
-                                spatial_scale, sampling_ratio, gamma);
-}
-
-PARROTS_EXTENSION_REGISTER(deform_roi_pool_forward)
-    .attr("pooled_height")
-    .attr("pooled_width")
-    .attr("spatial_scale")
-    .attr("sampling_ratio")
-    .attr("gamma")
-    .input(3)
-    .output(1)
-    .apply(deform_roi_pool_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(deform_roi_pool_backward)
-    .attr("pooled_height")
-    .attr("pooled_width")
-    .attr("spatial_scale")
-    .attr("sampling_ratio")
-    .attr("gamma")
-    .input(4)
-    .output(2)
-    .apply(deform_roi_pool_backward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/deform_roi_pool_pytorch.h b/mmcv/ops/csrc/parrots/deform_roi_pool_pytorch.h
deleted file mode 100644
index ac0f2c324..000000000
--- a/mmcv/ops/csrc/parrots/deform_roi_pool_pytorch.h
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef DEFORM_ROI_POOL_PYTORCH_H
-#define DEFORM_ROI_POOL_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void deform_roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor offset,
-                                  Tensor output, int pooled_height,
-                                  int pooled_width, float spatial_scale,
-                                  int sampling_ratio, float gamma);
-
-void deform_roi_pool_backward_cuda(Tensor grad_output, Tensor input,
-                                   Tensor rois, Tensor offset,
-                                   Tensor grad_input, Tensor grad_offset,
-                                   int pooled_height, int pooled_width,
-                                   float spatial_scale, int sampling_ratio,
-                                   float gamma);
-#endif  // DEFORM_ROI_POOL_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/diff_iou_rotated.cpp b/mmcv/ops/csrc/parrots/diff_iou_rotated.cpp
deleted file mode 100644
index 2361b7fbe..000000000
--- a/mmcv/ops/csrc/parrots/diff_iou_rotated.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-Tensor diff_iou_rotated_sort_vertices_forward_impl(Tensor vertices, Tensor mask,
-                                                   Tensor num_valid) {
-  return DISPATCH_DEVICE_IMPL(diff_iou_rotated_sort_vertices_forward_impl,
-                              vertices, mask, num_valid);
-}
-
-Tensor diff_iou_rotated_sort_vertices_forward(Tensor vertices, Tensor mask,
-                                              Tensor num_valid) {
-  return diff_iou_rotated_sort_vertices_forward_impl(vertices, mask, num_valid);
-}
diff --git a/mmcv/ops/csrc/parrots/diff_iou_rotated_parrots.cpp b/mmcv/ops/csrc/parrots/diff_iou_rotated_parrots.cpp
deleted file mode 100644
index b4d3e0e05..000000000
--- a/mmcv/ops/csrc/parrots/diff_iou_rotated_parrots.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "diff_iou_rotated_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void diff_iou_rotated_sort_vertices_forward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  at::Tensor boxes, scores, dets;
-  auto vertices = buildATensor(ctx, ins[0]);
-  auto mask = buildATensor(ctx, ins[1]);
-  auto num_valid = buildATensor(ctx, ins[2]);
-  auto out =
-      diff_iou_rotated_sort_vertices_forward_cuda(vertices, mask, num_valid);
-  updateDArray(ctx, out, outs[0]);
-}
-
-PARROTS_EXTENSION_REGISTER(diff_iou_rotated_sort_vertices_forward)
-    .input(3)
-    .output(1)
-    .apply(diff_iou_rotated_sort_vertices_forward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/diff_iou_rotated_pytorch.h b/mmcv/ops/csrc/parrots/diff_iou_rotated_pytorch.h
deleted file mode 100644
index ef911ecc2..000000000
--- a/mmcv/ops/csrc/parrots/diff_iou_rotated_pytorch.h
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef DIFF_IOU_ROTATED_PYTORCH_H
-#define DIFF_IOU_ROTATED_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-Tensor diff_iou_rotated_sort_vertices_forward_cuda(Tensor vertices, Tensor mask,
-                                                   Tensor num_valid);
-
-#endif  // DIFF_IOU_ROTATED_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/focal_loss.cpp b/mmcv/ops/csrc/parrots/focal_loss.cpp
deleted file mode 100644
index ed0e21865..000000000
--- a/mmcv/ops/csrc/parrots/focal_loss.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void sigmoid_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight,
-                                     Tensor output, float gamma, float alpha) {
-  DISPATCH_DEVICE_IMPL(sigmoid_focal_loss_forward_impl, input, target, weight,
-                       output, gamma, alpha);
-}
-
-void sigmoid_focal_loss_backward_impl(Tensor input, Tensor target,
-                                      Tensor weight, Tensor grad_input,
-                                      float gamma, float alpha) {
-  DISPATCH_DEVICE_IMPL(sigmoid_focal_loss_backward_impl, input, target, weight,
-                       grad_input, gamma, alpha);
-}
-
-void softmax_focal_loss_forward_impl(Tensor input, Tensor target, Tensor weight,
-                                     Tensor output, float gamma, float alpha) {
-  DISPATCH_DEVICE_IMPL(softmax_focal_loss_forward_impl, input, target, weight,
-                       output, gamma, alpha);
-}
-
-void softmax_focal_loss_backward_impl(Tensor input, Tensor target,
-                                      Tensor weight, Tensor buff,
-                                      Tensor grad_input, float gamma,
-                                      float alpha) {
-  DISPATCH_DEVICE_IMPL(softmax_focal_loss_backward_impl, input, target, weight,
-                       buff, grad_input, gamma, alpha);
-}
-
-void sigmoid_focal_loss_forward(Tensor input, Tensor target, Tensor weight,
-                                Tensor output, float gamma, float alpha) {
-  sigmoid_focal_loss_forward_impl(input, target, weight, output, gamma, alpha);
-}
-
-void sigmoid_focal_loss_backward(Tensor input, Tensor target, Tensor weight,
-                                 Tensor grad_input, float gamma, float alpha) {
-  sigmoid_focal_loss_backward_impl(input, target, weight, grad_input, gamma,
-                                   alpha);
-}
-
-void softmax_focal_loss_forward(Tensor input, Tensor target, Tensor weight,
-                                Tensor output, float gamma, float alpha) {
-  softmax_focal_loss_forward_impl(input, target, weight, output, gamma, alpha);
-}
-
-void softmax_focal_loss_backward(Tensor input, Tensor target, Tensor weight,
-                                 Tensor buff, Tensor grad_input, float gamma,
-                                 float alpha) {
-  softmax_focal_loss_backward_impl(input, target, weight, buff, grad_input,
-                                   gamma, alpha);
-}
diff --git a/mmcv/ops/csrc/parrots/focal_loss_parrots.cpp b/mmcv/ops/csrc/parrots/focal_loss_parrots.cpp
deleted file mode 100644
index 044e200c4..000000000
--- a/mmcv/ops/csrc/parrots/focal_loss_parrots.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "focal_loss_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void sigmoid_focal_loss_forward_cuda_parrots(CudaContext& ctx,
-                                             const SSElement& attr,
-                                             const OperatorBase::in_list_t& ins,
-                                             OperatorBase::out_list_t& outs) {
-  float gamma;
-  float alpha;
-  SSAttrs(attr).get<float>("gamma", gamma).get<float>("alpha", alpha).done();
-
-  // get inputs and outputs
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& target = buildATensor(ctx, ins[1]);
-  const auto& weight = buildATensor(ctx, ins[2]);
-
-  auto output = buildATensor(ctx, outs[0]);
-
-  sigmoid_focal_loss_forward_cuda(input, target, weight, output, gamma, alpha);
-}
-
-void sigmoid_focal_loss_backward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  float gamma;
-  float alpha;
-  SSAttrs(attr).get<float>("gamma", gamma).get<float>("alpha", alpha).done();
-
-  // get inputs and outputs
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& target = buildATensor(ctx, ins[1]);
-  const auto& weight = buildATensor(ctx, ins[2]);
-
-  auto grad_input = buildATensor(ctx, outs[0]);
-
-  sigmoid_focal_loss_backward_cuda(input, target, weight, grad_input, gamma,
-                                   alpha);
-}
-
-void softmax_focal_loss_forward_cuda_parrots(CudaContext& ctx,
-                                             const SSElement& attr,
-                                             const OperatorBase::in_list_t& ins,
-                                             OperatorBase::out_list_t& outs) {
-  float gamma;
-  float alpha;
-  SSAttrs(attr).get<float>("gamma", gamma).get<float>("alpha", alpha).done();
-
-  // get inputs and outputs
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& target = buildATensor(ctx, ins[1]);
-  const auto& weight = buildATensor(ctx, ins[2]);
-
-  auto output = buildATensor(ctx, outs[0]);
-  softmax_focal_loss_forward_cuda(input, target, weight, output, gamma, alpha);
-}
-
-void softmax_focal_loss_backward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  float gamma;
-  float alpha;
-  SSAttrs(attr).get<float>("gamma", gamma).get<float>("alpha", alpha).done();
-
-  // get inputs and outputs
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& target = buildATensor(ctx, ins[1]);
-  const auto& weight = buildATensor(ctx, ins[2]);
-
-  auto buff = buildATensor(ctx, outs[0]);
-  auto grad_input = buildATensor(ctx, outs[1]);
-  softmax_focal_loss_backward_cuda(input, target, weight, buff, grad_input,
-                                   gamma, alpha);
-}
-
-PARROTS_EXTENSION_REGISTER(sigmoid_focal_loss_forward)
-    .attr("gamma")
-    .attr("alpha")
-    .input(3)
-    .output(1)
-    .apply(sigmoid_focal_loss_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(sigmoid_focal_loss_backward)
-    .attr("gamma")
-    .attr("alpha")
-    .input(3)
-    .output(1)
-    .apply(sigmoid_focal_loss_backward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(softmax_focal_loss_forward)
-    .attr("gamma")
-    .attr("alpha")
-    .input(3)
-    .output(1)
-    .apply(softmax_focal_loss_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(softmax_focal_loss_backward)
-    .attr("gamma")
-    .attr("alpha")
-    .input(3)
-    .output(2)
-    .apply(softmax_focal_loss_backward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/focal_loss_pytorch.h b/mmcv/ops/csrc/parrots/focal_loss_pytorch.h
deleted file mode 100644
index b7a00c8ab..000000000
--- a/mmcv/ops/csrc/parrots/focal_loss_pytorch.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef FOCAL_LOSS_PYTORCH_H
-#define FOCAL_LOSS_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void sigmoid_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight,
-                                     Tensor output, float gamma, float alpha);
-
-void sigmoid_focal_loss_backward_cuda(Tensor input, Tensor target,
-                                      Tensor weight, Tensor grad_input,
-                                      float gamma, float alpha);
-
-void softmax_focal_loss_forward_cuda(Tensor input, Tensor target, Tensor weight,
-                                     Tensor output, float gamma, float alpha);
-
-void softmax_focal_loss_backward_cuda(Tensor input, Tensor target,
-                                      Tensor weight, Tensor buff,
-                                      Tensor grad_input, float gamma,
-                                      float alpha);
-#endif  // FOCAL_LOSS_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/furthest_point_sample.cpp b/mmcv/ops/csrc/parrots/furthest_point_sample.cpp
deleted file mode 100644
index 9c7098acd..000000000
--- a/mmcv/ops/csrc/parrots/furthest_point_sample.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-// Modified from
-// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/sampling.cpp
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void furthest_point_sampling_forward_impl(Tensor points_tensor,
-                                          Tensor temp_tensor, Tensor idx_tensor,
-                                          int b, int n, int m) {
-  DISPATCH_DEVICE_IMPL(furthest_point_sampling_forward_impl, points_tensor,
-                       temp_tensor, idx_tensor, b, n, m);
-}
-
-void furthest_point_sampling_with_dist_forward_impl(Tensor points_tensor,
-                                                    Tensor temp_tensor,
-                                                    Tensor idx_tensor, int b,
-                                                    int n, int m) {
-  DISPATCH_DEVICE_IMPL(furthest_point_sampling_with_dist_forward_impl,
-                       points_tensor, temp_tensor, idx_tensor, b, n, m);
-}
-
-void furthest_point_sampling_forward(Tensor points_tensor, Tensor temp_tensor,
-                                     Tensor idx_tensor, int b, int n, int m) {
-  furthest_point_sampling_forward_impl(points_tensor, temp_tensor, idx_tensor,
-                                       b, n, m);
-}
-
-void furthest_point_sampling_with_dist_forward(Tensor points_tensor,
-                                               Tensor temp_tensor,
-                                               Tensor idx_tensor, int b, int n,
-                                               int m) {
-  furthest_point_sampling_with_dist_forward_impl(points_tensor, temp_tensor,
-                                                 idx_tensor, b, n, m);
-}
diff --git a/mmcv/ops/csrc/parrots/furthest_point_sample_parrots.cpp b/mmcv/ops/csrc/parrots/furthest_point_sample_parrots.cpp
deleted file mode 100644
index 483bfb243..000000000
--- a/mmcv/ops/csrc/parrots/furthest_point_sample_parrots.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "furthest_point_sample_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void furthest_point_sample_forward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  int b, n, m;
-  SSAttrs(attr).get<int>("b", b).get<int>("n", n).get<int>("m", m).done();
-
-  auto points_tensor = buildATensor(ctx, ins[0]);
-  auto temp_tensor = buildATensor(ctx, ins[1]);
-
-  auto idx_tensor = buildATensor(ctx, outs[0]);
-
-  furthest_point_sampling_forward(points_tensor, temp_tensor, idx_tensor, b, n,
-                                  m);
-}
-
-void furthest_point_sampling_with_dist_forward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  int b, n, m;
-  SSAttrs(attr).get<int>("b", b).get<int>("n", n).get<int>("m", m).done();
-
-  auto points_tensor = buildATensor(ctx, ins[0]);
-  auto temp_tensor = buildATensor(ctx, ins[1]);
-
-  auto idx_tensor = buildATensor(ctx, outs[0]);
-
-  furthest_point_sampling_with_dist_forward(points_tensor, temp_tensor,
-                                            idx_tensor, b, n, m);
-}
-PARROTS_EXTENSION_REGISTER(furthest_point_sampling_forward)
-    .attr("b")
-    .attr("n")
-    .attr("m")
-    .input(2)
-    .output(1)
-    .apply(furthest_point_sample_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(furthest_point_sampling_with_dist_forward)
-    .attr("b")
-    .attr("n")
-    .attr("m")
-    .input(2)
-    .output(1)
-    .apply(furthest_point_sampling_with_dist_forward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/furthest_point_sample_pytorch.h b/mmcv/ops/csrc/parrots/furthest_point_sample_pytorch.h
deleted file mode 100644
index 0325cd66e..000000000
--- a/mmcv/ops/csrc/parrots/furthest_point_sample_pytorch.h
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef FURTHEST_POINT_SAMPLE_PYTORCH_H
-#define FURTHEST_POINT_SAMPLE_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void furthest_point_sampling_forward(Tensor points_tensor, Tensor temp_tensor,
-                                     Tensor idx_tensor, int b, int n, int m);
-
-void furthest_point_sampling_with_dist_forward(Tensor points_tensor,
-                                               Tensor temp_tensor,
-                                               Tensor idx_tensor, int b, int n,
-                                               int m);
-#endif  // FURTHEST_POINT_SAMPLE_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/fused_bias_leakyrelu.cpp b/mmcv/ops/csrc/parrots/fused_bias_leakyrelu.cpp
deleted file mode 100644
index 8d411c9d8..000000000
--- a/mmcv/ops/csrc/parrots/fused_bias_leakyrelu.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-// Modified from
-// https://github.com/rosinality/stylegan2-pytorch/blob/master/op/fused_bias_act.cpp
-
-/*
-Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
-
-NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator
-Augmentation (ADA)
-=======================================================================
-
-1. Definitions
-
-"Licensor" means any person or entity that distributes its Work.
-
-"Software" means the original work of authorship made available under
-this License.
-
-"Work" means the Software and any additions to or derivative works of
-the Software that are made available under this License.
-
-The terms "reproduce," "reproduction," "derivative works," and
-"distribution" have the meaning as provided under U.S. copyright law;
-provided, however, that for the purposes of this License, derivative
-works shall not include works that remain separable from, or merely
-link (or bind by name) to the interfaces of, the Work.
-
-Works, including the Software, are "made available" under this License
-by including in or with the Work either (a) a copyright notice
-referencing the applicability of this License to the Work, or (b) a
-copy of this License.
-
-2. License Grants
-
-    2.1 Copyright Grant. Subject to the terms and conditions of this
-    License, each Licensor grants to you a perpetual, worldwide,
-    non-exclusive, royalty-free, copyright license to reproduce,
-    prepare derivative works of, publicly display, publicly perform,
-    sublicense and distribute its Work and any resulting derivative
-    works in any form.
-
-3. Limitations
-
-    3.1 Redistribution. You may reproduce or distribute the Work only
-    if (a) you do so under this License, (b) you include a complete
-    copy of this License with your distribution, and (c) you retain
-    without modification any copyright, patent, trademark, or
-    attribution notices that are present in the Work.
-
-    3.2 Derivative Works. You may specify that additional or different
-    terms apply to the use, reproduction, and distribution of your
-    derivative works of the Work ("Your Terms") only if (a) Your Terms
-    provide that the use limitation in Section 3.3 applies to your
-    derivative works, and (b) you identify the specific derivative
-    works that are subject to Your Terms. Notwithstanding Your Terms,
-    this License (including the redistribution requirements in Section
-    3.1) will continue to apply to the Work itself.
-
-    3.3 Use Limitation. The Work and any derivative works thereof only
-    may be used or intended for use non-commercially. Notwithstanding
-    the foregoing, NVIDIA and its affiliates may use the Work and any
-    derivative works commercially. As used herein, "non-commercially"
-    means for research or evaluation purposes only.
-
-    3.4 Patent Claims. If you bring or threaten to bring a patent claim
-    against any Licensor (including any claim, cross-claim or
-    counterclaim in a lawsuit) to enforce any patents that you allege
-    are infringed by any Work, then your rights under this License from
-    such Licensor (including the grant in Section 2.1) will terminate
-    immediately.
-
-    3.5 Trademarks. This License does not grant any rights to use any
-    Licensor’s or its affiliates’ names, logos, or trademarks, except
-    as necessary to reproduce the notices described in this License.
-
-    3.6 Termination. If you violate any term of this License, then your
-    rights under this License (including the grant in Section 2.1) will
-    terminate immediately.
-
-4. Disclaimer of Warranty.
-
-THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
-NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
-THIS LICENSE.
-
-5. Limitation of Liability.
-
-EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
-THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
-SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
-INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
-OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
-(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
-LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
-COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
-THE POSSIBILITY OF SUCH DAMAGES.
-
-=======================================================================
-*/
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-torch::Tensor fused_bias_leakyrelu_op_impl(const torch::Tensor& input,
-                                           const torch::Tensor& bias,
-                                           const torch::Tensor& refer, int act,
-                                           int grad, float alpha, float scale) {
-  return DISPATCH_DEVICE_IMPL(fused_bias_leakyrelu_op_impl, input, bias, refer,
-                              act, grad, alpha, scale);
-}
-
-torch::Tensor fused_bias_leakyrelu(const torch::Tensor& input,
-                                   const torch::Tensor& bias,
-                                   const torch::Tensor& refer, int act,
-                                   int grad, float alpha, float scale) {
-  return fused_bias_leakyrelu_op_impl(input, bias, refer, act, grad, alpha,
-                                      scale);
-}
diff --git a/mmcv/ops/csrc/parrots/fused_bias_parrots.cpp b/mmcv/ops/csrc/parrots/fused_bias_parrots.cpp
deleted file mode 100644
index 47409ad20..000000000
--- a/mmcv/ops/csrc/parrots/fused_bias_parrots.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <torch/extension.h>
-
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-using namespace at;
-using namespace parrots;
-
-torch::Tensor fused_bias_leakyrelu(const torch::Tensor &input,
-                                   const torch::Tensor &bias,
-                                   const torch::Tensor &refer, int act,
-                                   int grad, float alpha, float scale);
-
-void fused_bias_leakyrelu_parrots(CudaContext &ctx, const SSElement &attr,
-                                  const OperatorBase::in_list_t &ins,
-                                  OperatorBase::out_list_t &outs) {
-  int act, grad;
-  float alpha, scale;
-  SSAttrs(attr)
-      .get<int>("act", act)
-      .get<int>("grad", grad)
-      .get<float>("alpha", alpha)
-      .get<float>("scale", scale)
-      .done();
-  const auto &input = buildATensor(ctx, ins[0]);
-  const auto &bias = buildATensor(ctx, ins[1]);
-  const auto &refer = buildATensor(ctx, ins[2]);
-  auto out = fused_bias_leakyrelu(input, bias, refer, act, grad, alpha, scale);
-  updateDArray(ctx, out, outs[0]);
-}
-
-PARROTS_EXTENSION_REGISTER(fused_bias_leakyrelu)
-    .attr("act")
-    .attr("grad")
-    .attr("alpha")
-    .attr("scale")
-    .input(3)
-    .output(1)
-    .apply(fused_bias_leakyrelu_parrots)
-    .done();
diff --git a/mmcv/ops/csrc/parrots/gather_points.cpp b/mmcv/ops/csrc/parrots/gather_points.cpp
deleted file mode 100644
index b8fb02002..000000000
--- a/mmcv/ops/csrc/parrots/gather_points.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void gather_points_forward_impl(int b, int c, int n, int npoints,
-                                const Tensor points, const Tensor idx,
-                                Tensor out) {
-  DISPATCH_DEVICE_IMPL(gather_points_forward_impl, b, c, n, npoints, points,
-                       idx, out);
-}
-
-void gather_points_backward_impl(int b, int c, int n, int npoints,
-                                 const Tensor grad_out, const Tensor idx,
-                                 Tensor grad_points) {
-  DISPATCH_DEVICE_IMPL(gather_points_backward_impl, b, c, n, npoints, grad_out,
-                       idx, grad_points);
-}
-
-void gather_points_forward(Tensor points_tensor, Tensor idx_tensor,
-                           Tensor out_tensor, int b, int c, int n,
-                           int npoints) {
-  gather_points_forward_impl(b, c, n, npoints, points_tensor, idx_tensor,
-                             out_tensor);
-}
-
-void gather_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
-                            Tensor grad_points_tensor, int b, int c, int n,
-                            int npoints) {
-  gather_points_backward_impl(b, c, n, npoints, grad_out_tensor, idx_tensor,
-                              grad_points_tensor);
-}
diff --git a/mmcv/ops/csrc/parrots/gather_points_parrots.cpp b/mmcv/ops/csrc/parrots/gather_points_parrots.cpp
deleted file mode 100644
index 1d2d9e129..000000000
--- a/mmcv/ops/csrc/parrots/gather_points_parrots.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "gather_points_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void gather_points_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                        const OperatorBase::in_list_t& ins,
-                                        OperatorBase::out_list_t& outs) {
-  int b, c, n, npoints;
-  SSAttrs(attr)
-      .get<int>("b", b)
-      .get<int>("c", c)
-      .get<int>("n", n)
-      .get<int>("npoints", npoints)
-      .done();
-
-  auto points_tensor = buildATensor(ctx, ins[0]);
-  auto idx_tensor = buildATensor(ctx, ins[1]);
-
-  auto out_tensor = buildATensor(ctx, outs[0]);
-
-  gather_points_forward(points_tensor, idx_tensor, out_tensor, b, c, n,
-                        npoints);
-}
-
-void gather_points_backward_cuda_parrots(CudaContext& ctx,
-                                         const SSElement& attr,
-                                         const OperatorBase::in_list_t& ins,
-                                         OperatorBase::out_list_t& outs) {
-  int b, c, n, npoints;
-  SSAttrs(attr)
-      .get<int>("b", b)
-      .get<int>("c", c)
-      .get<int>("n", n)
-      .get<int>("npoints", npoints)
-      .done();
-
-  auto grad_out_tensor = buildATensor(ctx, ins[0]);
-  auto idx_tensor = buildATensor(ctx, ins[1]);
-
-  auto grad_points_tensor = buildATensor(ctx, outs[0]);
-
-  gather_points_backward(grad_out_tensor, idx_tensor, grad_points_tensor, b, c,
-                         n, npoints);
-}
-
-PARROTS_EXTENSION_REGISTER(gather_points_forward)
-    .attr("b")
-    .attr("c")
-    .attr("n")
-    .attr("npoints")
-    .input(2)
-    .output(1)
-    .apply(gather_points_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(gather_points_backward)
-    .attr("b")
-    .attr("c")
-    .attr("n")
-    .attr("npoints")
-    .input(2)
-    .output(1)
-    .apply(gather_points_backward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/gather_points_pytorch.h b/mmcv/ops/csrc/parrots/gather_points_pytorch.h
deleted file mode 100644
index 1689ae6ad..000000000
--- a/mmcv/ops/csrc/parrots/gather_points_pytorch.h
+++ /dev/null
@@ -1,13 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef GATHER_POINTS_PYTORCH_H
-#define GATHER_POINTS_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void gather_points_forward(Tensor points_tensor, Tensor idx_tensor,
-                           Tensor out_tensor, int b, int c, int n, int npoints);
-
-void gather_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
-                            Tensor grad_points_tensor, int b, int c, int n,
-                            int npoints);
-#endif  // GATHER_POINTS_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/group_points.cpp b/mmcv/ops/csrc/parrots/group_points.cpp
deleted file mode 100644
index cdd190d40..000000000
--- a/mmcv/ops/csrc/parrots/group_points.cpp
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-// Modified from
-// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/group_points.cpp
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void group_points_forward_impl(int b, int c, int n, int npoints, int nsample,
-                               const Tensor points, const Tensor idx,
-                               Tensor out) {
-  DISPATCH_DEVICE_IMPL(group_points_forward_impl, b, c, n, npoints, nsample,
-                       points, idx, out);
-}
-
-void group_points_backward_impl(int b, int c, int n, int npoints, int nsample,
-                                const Tensor grad_out, const Tensor idx,
-                                Tensor grad_points) {
-  DISPATCH_DEVICE_IMPL(group_points_backward_impl, b, c, n, npoints, nsample,
-                       grad_out, idx, grad_points);
-}
-
-void group_points_forward(Tensor points_tensor, Tensor idx_tensor,
-                          Tensor out_tensor, int b, int c, int n, int npoints,
-                          int nsample) {
-  DISPATCH_DEVICE_IMPL(group_points_forward_impl, b, c, n, npoints, nsample,
-                       points_tensor, idx_tensor, out_tensor);
-}
-
-void group_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
-                           Tensor grad_points_tensor, int b, int c, int n,
-                           int npoints, int nsample) {
-  group_points_backward_impl(b, c, n, npoints, nsample, grad_out_tensor,
-                             idx_tensor, grad_points_tensor);
-}
diff --git a/mmcv/ops/csrc/parrots/group_points_parrots.cpp b/mmcv/ops/csrc/parrots/group_points_parrots.cpp
deleted file mode 100644
index 282c01a8c..000000000
--- a/mmcv/ops/csrc/parrots/group_points_parrots.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "group_points_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void group_points_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                       const OperatorBase::in_list_t& ins,
-                                       OperatorBase::out_list_t& outs) {
-  int b, c, n, npoints, nsample;
-  SSAttrs(attr)
-      .get<int>("b", b)
-      .get<int>("c", c)
-      .get<int>("n", n)
-      .get<int>("npoints", npoints)
-      .get<int>("nsample", nsample)
-      .done();
-  auto points_tensor = buildATensor(ctx, ins[0]);
-  auto idx_tensor = buildATensor(ctx, ins[1]);
-
-  auto out_tensor = buildATensor(ctx, outs[0]);
-
-  group_points_forward(points_tensor, idx_tensor, out_tensor, b, c, n, npoints,
-                       nsample);
-}
-
-void group_points_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                        const OperatorBase::in_list_t& ins,
-                                        OperatorBase::out_list_t& outs) {
-  int b, c, n, npoints, nsample;
-  SSAttrs(attr)
-      .get<int>("b", b)
-      .get<int>("c", c)
-      .get<int>("n", n)
-      .get<int>("npoints", npoints)
-      .get<int>("nsample", nsample)
-      .done();
-  auto grad_out_tensor = buildATensor(ctx, ins[0]);
-  auto idx_tensor = buildATensor(ctx, ins[1]);
-
-  auto grad_points_tensor = buildATensor(ctx, outs[0]);
-
-  group_points_backward(grad_out_tensor, idx_tensor, grad_points_tensor, b, c,
-                        n, npoints, nsample);
-}
-
-PARROTS_EXTENSION_REGISTER(group_points_forward)
-    .attr("b")
-    .attr("c")
-    .attr("n")
-    .attr("npoints")
-    .attr("nsample")
-    .input(2)
-    .output(1)
-    .apply(group_points_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(group_points_backward)
-    .attr("b")
-    .attr("c")
-    .attr("n")
-    .attr("npoints")
-    .attr("nsample")
-    .input(2)
-    .output(1)
-    .apply(group_points_backward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/group_points_pytorch.h b/mmcv/ops/csrc/parrots/group_points_pytorch.h
deleted file mode 100644
index e704ab078..000000000
--- a/mmcv/ops/csrc/parrots/group_points_pytorch.h
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef GROUP_POINTS_PYTORCH_H
-#define GROUP_POINTS_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void group_points_forward(Tensor points_tensor, Tensor idx_tensor,
-                          Tensor out_tensor, int b, int c, int n, int npoints,
-                          int nsample);
-
-void group_points_backward(Tensor grad_out_tensor, Tensor idx_tensor,
-                           Tensor grad_points_tensor, int b, int c, int n,
-                           int npoints, int nsample);
-
-#endif  // GROUP_POINTS_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/info.cpp b/mmcv/ops/csrc/parrots/info.cpp
deleted file mode 100644
index a4cc41861..000000000
--- a/mmcv/ops/csrc/parrots/info.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-// modified from
-// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp
-#include "pytorch_cpp_helper.hpp"
-
-#ifdef MMCV_WITH_CUDA
-#ifdef MMCV_WITH_HIP
-#include <hip/hip_runtime_api.h>
-int get_hiprt_version() {
-  int runtimeVersion;
-  hipRuntimeGetVersion(&runtimeVersion);
-  return runtimeVersion;
-}
-#else
-#include <cuda_runtime_api.h>
-int get_cudart_version() { return CUDART_VERSION; }
-#endif
-#endif
-
-std::string get_compiling_cuda_version() {
-#ifdef MMCV_WITH_CUDA
-#ifndef MMCV_WITH_HIP
-  std::ostringstream oss;
-  // copied from
-  // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
-  auto printCudaStyleVersion = [&](int v) {
-    oss << (v / 1000) << "." << (v / 10 % 100);
-    if (v % 10 != 0) {
-      oss << "." << (v % 10);
-    }
-  };
-  printCudaStyleVersion(get_cudart_version());
-  return oss.str();
-#else
-  std::ostringstream oss;
-  oss << get_hiprt_version();
-  return oss.str();
-#endif
-#else
-  return std::string("not available");
-#endif
-}
-
-// similar to
-// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/Version.cpp
-std::string get_compiler_version() {
-  std::ostringstream ss;
-#if defined(__GNUC__)
-#ifndef __clang__
-  { ss << "GCC " << __GNUC__ << "." << __GNUC_MINOR__; }
-#endif
-#endif
-
-#if defined(__clang_major__)
-  {
-    ss << "clang " << __clang_major__ << "." << __clang_minor__ << "."
-       << __clang_patchlevel__;
-  }
-#endif
-
-#if defined(_MSC_VER)
-  { ss << "MSVC " << _MSC_FULL_VER; }
-#endif
-  return ss.str();
-}
diff --git a/mmcv/ops/csrc/parrots/iou3d.cpp b/mmcv/ops/csrc/parrots/iou3d.cpp
deleted file mode 100644
index a347c0ee9..000000000
--- a/mmcv/ops/csrc/parrots/iou3d.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-// Modified from
-// https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/iou3d_nms/src/iou3d_nms.cpp
-
-/*
-3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
-Written by Shaoshuai Shi
-All Rights Reserved 2019-2020.
-*/
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-const int THREADS_PER_BLOCK_NMS = sizeof(unsigned long long) * 8;
-
-void iou3d_boxes_overlap_bev_forward_impl(const int num_a, const Tensor boxes_a,
-                                          const int num_b, const Tensor boxes_b,
-                                          Tensor ans_overlap) {
-  DISPATCH_DEVICE_IMPL(iou3d_boxes_overlap_bev_forward_impl, num_a, boxes_a,
-                       num_b, boxes_b, ans_overlap);
-}
-
-void iou3d_nms3d_forward_impl(const Tensor boxes, Tensor &keep,
-                              Tensor &keep_num, float nms_overlap_thresh) {
-  DISPATCH_DEVICE_IMPL(iou3d_nms3d_forward_impl, boxes, keep, keep_num,
-                       nms_overlap_thresh);
-}
-
-void iou3d_nms3d_normal_forward_impl(const Tensor boxes, Tensor &keep,
-                                     Tensor &keep_num,
-                                     float nms_overlap_thresh) {
-  DISPATCH_DEVICE_IMPL(iou3d_nms3d_normal_forward_impl, boxes, keep, keep_num,
-                       nms_overlap_thresh);
-}
-
-void iou3d_boxes_overlap_bev_forward(Tensor boxes_a, Tensor boxes_b,
-                                     Tensor ans_overlap) {
-  // params boxes: (N, 7) [x, y, z, dx, dy, dz, heading]
-  // params boxes_b: (M, 5)
-  // params ans_overlap: (N, M)
-  int num_a = boxes_a.size(0);
-  int num_b = boxes_b.size(0);
-
-  iou3d_boxes_overlap_bev_forward_impl(num_a, boxes_a, num_b, boxes_b,
-                                       ans_overlap);
-}
-
-void iou3d_nms3d_forward(Tensor boxes, Tensor keep, Tensor keep_num,
-                         float nms_overlap_thresh) {
-  // params boxes: (N, 7) [x, y, z, dx, dy, dz, heading]
-  // params keep: (N)
-  CHECK_CONTIGUOUS(boxes);
-  CHECK_CONTIGUOUS(keep);
-
-  iou3d_nms3d_forward_impl(boxes, keep, keep_num, nms_overlap_thresh);
-}
-
-void iou3d_nms3d_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num,
-                                float nms_overlap_thresh) {
-  // params boxes: (N, 7) [x, y, z, dx, dy, dz, heading]
-  // params keep: (N)
-
-  CHECK_CONTIGUOUS(boxes);
-  CHECK_CONTIGUOUS(keep);
-
-  iou3d_nms3d_normal_forward_impl(boxes, keep, keep_num, nms_overlap_thresh);
-}
diff --git a/mmcv/ops/csrc/parrots/iou3d_parrots.cpp b/mmcv/ops/csrc/parrots/iou3d_parrots.cpp
deleted file mode 100644
index 20e288aea..000000000
--- a/mmcv/ops/csrc/parrots/iou3d_parrots.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "iou3d_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void iou3d_boxes_overlap_bev_forward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  auto boxes_a = buildATensor(ctx, ins[0]);
-  auto boxes_b = buildATensor(ctx, ins[1]);
-
-  auto ans_iou = buildATensor(ctx, outs[0]);
-
-  iou3d_boxes_overlap_bev_forward(boxes_a, boxes_b, ans_iou);
-}
-
-void iou3d_nms3d_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                      const OperatorBase::in_list_t& ins,
-                                      OperatorBase::out_list_t& outs) {
-  float nms_overlap_thresh;
-  SSAttrs(attr).get<float>("nms_overlap_thresh", nms_overlap_thresh).done();
-
-  auto boxes = buildATensor(ctx, ins[0]);
-
-  auto keep = buildATensor(ctx, outs[0]);
-  auto keep_num = buildATensor(ctx, outs[1]);
-
-  iou3d_nms3d_forward(boxes, keep, keep_num, nms_overlap_thresh);
-}
-
-void iou3d_nms3d_normal_forward_cuda_parrots(CudaContext& ctx,
-                                             const SSElement& attr,
-                                             const OperatorBase::in_list_t& ins,
-                                             OperatorBase::out_list_t& outs) {
-  float nms_overlap_thresh;
-  SSAttrs(attr).get<float>("nms_overlap_thresh", nms_overlap_thresh).done();
-
-  auto boxes = buildATensor(ctx, ins[0]);
-
-  auto keep = buildATensor(ctx, outs[0]);
-  auto keep_num = buildATensor(ctx, outs[1]);
-
-  iou3d_nms3d_normal_forward(boxes, keep, keep_num, nms_overlap_thresh);
-}
-
-PARROTS_EXTENSION_REGISTER(iou3d_boxes_overlap_bev_forward)
-    .input(2)
-    .output(1)
-    .apply(iou3d_boxes_overlap_bev_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(iou3d_nms3d_forward)
-    .attr("nms_overlap_thresh")
-    .input(1)
-    .output(2)
-    .apply(iou3d_nms3d_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(iou3d_nms3d_normal_forward)
-    .attr("nms_overlap_thresh")
-    .input(1)
-    .output(2)
-    .apply(iou3d_nms3d_normal_forward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/iou3d_pytorch.h b/mmcv/ops/csrc/parrots/iou3d_pytorch.h
deleted file mode 100644
index 76170edc7..000000000
--- a/mmcv/ops/csrc/parrots/iou3d_pytorch.h
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef IOU_3D_PYTORCH_H
-#define IOU_3D_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void iou3d_boxes_overlap_bev_forward(Tensor boxes_a, Tensor boxes_b,
-                                     Tensor ans_overlap);
-
-void iou3d_nms3d_forward(Tensor boxes, Tensor keep, Tensor keep_num,
-                         float nms_overlap_thresh);
-
-void iou3d_nms3d_normal_forward(Tensor boxes, Tensor keep, Tensor keep_num,
-                                float nms_overlap_thresh);
-
-#endif  // IOU_3D_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/knn.cpp b/mmcv/ops/csrc/parrots/knn.cpp
deleted file mode 100644
index b4be9428c..000000000
--- a/mmcv/ops/csrc/parrots/knn.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-// Modified from
-// https://github.com/CVMI-Lab/PAConv/tree/main/scene_seg/lib/pointops/src/knnquery_heap
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void knn_forward_impl(int b, int n, int m, int nsample, const Tensor xyz,
-                      const Tensor new_xyz, Tensor idx, Tensor dist2) {
-  DISPATCH_DEVICE_IMPL(knn_forward_impl, b, n, m, nsample, xyz, new_xyz, idx,
-                       dist2);
-}
-
-void knn_forward(Tensor xyz_tensor, Tensor new_xyz_tensor, Tensor idx_tensor,
-                 Tensor dist2_tensor, int b, int n, int m, int nsample) {
-  knn_forward_impl(b, n, m, nsample, xyz_tensor, new_xyz_tensor, idx_tensor,
-                   dist2_tensor);
-}
diff --git a/mmcv/ops/csrc/parrots/knn_parrots.cpp b/mmcv/ops/csrc/parrots/knn_parrots.cpp
deleted file mode 100644
index 585b84644..000000000
--- a/mmcv/ops/csrc/parrots/knn_parrots.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "knn_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void knn_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                              const OperatorBase::in_list_t& ins,
-                              OperatorBase::out_list_t& outs) {
-  int b, n, m, nsample;
-  SSAttrs(attr)
-      .get<int>("b", b)
-      .get<int>("n", n)
-      .get<int>("m", m)
-      .get<int>("nsample", nsample)
-      .done();
-
-  auto xyz_tensor = buildATensor(ctx, ins[0]);
-  auto new_xyz_tensor = buildATensor(ctx, ins[1]);
-
-  auto idx_tensor = buildATensor(ctx, outs[0]);
-  auto dist2_tensor = buildATensor(ctx, outs[1]);
-
-  knn_forward(xyz_tensor, new_xyz_tensor, idx_tensor, dist2_tensor, b, n, m,
-              nsample);
-}
-
-PARROTS_EXTENSION_REGISTER(knn_forward)
-    .attr("b")
-    .attr("n")
-    .attr("m")
-    .attr("nsample")
-    .input(2)
-    .output(2)
-    .apply(knn_forward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/knn_pytorch.h b/mmcv/ops/csrc/parrots/knn_pytorch.h
deleted file mode 100644
index b0875f838..000000000
--- a/mmcv/ops/csrc/parrots/knn_pytorch.h
+++ /dev/null
@@ -1,9 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef KNN_PYTORCH_H
-#define KNN_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void knn_forward(Tensor xyz_tensor, Tensor new_xyz_tensor, Tensor idx_tensor,
-                 Tensor dist2_tensor, int b, int n, int m, int nsample);
-#endif  // KNN_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/masked_conv2d.cpp b/mmcv/ops/csrc/parrots/masked_conv2d.cpp
deleted file mode 100644
index 590392535..000000000
--- a/mmcv/ops/csrc/parrots/masked_conv2d.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void masked_im2col_forward_impl(const Tensor im, const Tensor mask_h_idx,
-                                const Tensor mask_w_idx, Tensor col,
-                                const int kernel_h, const int kernel_w,
-                                const int pad_h, const int pad_w) {
-  DISPATCH_DEVICE_IMPL(masked_im2col_forward_impl, im, mask_h_idx, mask_w_idx,
-                       col, kernel_h, kernel_w, pad_h, pad_w);
-}
-
-void masked_col2im_forward_impl(const Tensor col, const Tensor mask_h_idx,
-                                const Tensor mask_w_idx, Tensor im, int height,
-                                int width, int channels) {
-  DISPATCH_DEVICE_IMPL(masked_col2im_forward_impl, col, mask_h_idx, mask_w_idx,
-                       im, height, width, channels);
-}
-
-void masked_im2col_forward(const Tensor im, const Tensor mask_h_idx,
-                           const Tensor mask_w_idx, Tensor col,
-                           const int kernel_h, const int kernel_w,
-                           const int pad_h, const int pad_w) {
-  masked_im2col_forward_impl(im, mask_h_idx, mask_w_idx, col, kernel_h,
-                             kernel_w, pad_h, pad_w);
-}
-
-void masked_col2im_forward(const Tensor col, const Tensor mask_h_idx,
-                           const Tensor mask_w_idx, Tensor im, int height,
-                           int width, int channels) {
-  masked_col2im_forward_impl(col, mask_h_idx, mask_w_idx, im, height, width,
-                             channels);
-}
diff --git a/mmcv/ops/csrc/parrots/masked_conv2d_parrots.cpp b/mmcv/ops/csrc/parrots/masked_conv2d_parrots.cpp
deleted file mode 100644
index 39f19740c..000000000
--- a/mmcv/ops/csrc/parrots/masked_conv2d_parrots.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "masked_conv2d_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void masked_im2col_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                        const OperatorBase::in_list_t& ins,
-                                        OperatorBase::out_list_t& outs) {
-  // im: (n, ic, h, w), kernel size (kh, kw)
-  // kernel: (oc, ic * kh * kw), col: (kh * kw * ic, ow * oh)
-  int kernel_h, kernel_w, pad_h, pad_w;
-  SSAttrs(attr)
-      .get<int>("kernel_h", kernel_h)
-      .get<int>("kernel_w", kernel_w)
-      .get<int>("pad_h", pad_h)
-      .get<int>("pad_w", pad_w)
-      .done();
-
-  const auto& im = buildATensor(ctx, ins[0]);
-  const auto& mask_h_idx = buildATensor(ctx, ins[1]);
-  const auto& mask_w_idx = buildATensor(ctx, ins[2]);
-
-  auto col = buildATensor(ctx, outs[0]);
-  masked_im2col_forward_cuda(im, mask_h_idx, mask_w_idx, col, kernel_h,
-                             kernel_w, pad_h, pad_w);
-}
-
-void masked_col2im_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                        const OperatorBase::in_list_t& ins,
-                                        OperatorBase::out_list_t& outs) {
-  // im: (n, ic, h, w), kernel size (kh, kw)
-  // kernel: (oc, ic * kh * kh), col: (kh * kw * ic, ow * oh)
-  int height, width, channels;
-  SSAttrs(attr)
-      .get<int>("height", height)
-      .get<int>("width", width)
-      .get<int>("channels", channels)
-      .done();
-
-  const auto& col = buildATensor(ctx, ins[0]);
-  const auto& mask_h_idx = buildATensor(ctx, ins[1]);
-  const auto& mask_w_idx = buildATensor(ctx, ins[2]);
-
-  auto im = buildATensor(ctx, outs[0]);
-  masked_col2im_forward_cuda(col, mask_h_idx, mask_w_idx, im, height, width,
-                             channels);
-}
-
-PARROTS_EXTENSION_REGISTER(masked_im2col_forward)
-    .attr("kernel_h")
-    .attr("kernel_w")
-    .attr("pad_h")
-    .attr("pad_w")
-    .input(3)
-    .output(1)
-    .apply(masked_im2col_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(masked_col2im_forward)
-    .attr("height")
-    .attr("width")
-    .attr("channels")
-    .input(3)
-    .output(1)
-    .apply(masked_col2im_forward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/masked_conv2d_pytorch.h b/mmcv/ops/csrc/parrots/masked_conv2d_pytorch.h
deleted file mode 100644
index 36d5643f6..000000000
--- a/mmcv/ops/csrc/parrots/masked_conv2d_pytorch.h
+++ /dev/null
@@ -1,15 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef MASKED_CONV2D_PYTORCH_H
-#define MASKED_CONV2D_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void masked_im2col_forward_cuda(const Tensor im, const Tensor mask_h_idx,
-                                const Tensor mask_w_idx, Tensor col,
-                                const int kernel_h, const int kernel_w,
-                                const int pad_h, const int pad_w);
-
-void masked_col2im_forward_cuda(const Tensor col, const Tensor mask_h_idx,
-                                const Tensor mask_w_idx, Tensor im, int height,
-                                int width, int channels);
-#endif  // MASKED_CONV2D_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/min_area_polygons.cpp b/mmcv/ops/csrc/parrots/min_area_polygons.cpp
deleted file mode 100644
index 8ff996dc8..000000000
--- a/mmcv/ops/csrc/parrots/min_area_polygons.cpp
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void min_area_polygons_impl(const Tensor pointsets, Tensor polygons) {
-  DISPATCH_DEVICE_IMPL(min_area_polygons_impl, pointsets, polygons);
-}
-
-void min_area_polygons(const Tensor pointsets, Tensor polygons) {
-  min_area_polygons_impl(pointsets, polygons);
-}
diff --git a/mmcv/ops/csrc/parrots/min_area_polygons_parrots.cpp b/mmcv/ops/csrc/parrots/min_area_polygons_parrots.cpp
deleted file mode 100644
index d9e4ff4b3..000000000
--- a/mmcv/ops/csrc/parrots/min_area_polygons_parrots.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "min_area_polygons_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void min_area_polygons_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                    const OperatorBase::in_list_t& ins,
-                                    OperatorBase::out_list_t& outs) {
-  auto pointsets = buildATensor(ctx, ins[0]);
-
-  auto polygons = buildATensor(ctx, outs[0]);
-  min_area_polygons(pointsets, polygons);
-}
-
-PARROTS_EXTENSION_REGISTER(min_area_polygons)
-    .input(1)
-    .output(1)
-    .apply(min_area_polygons_cuda_parrots)
-    .done();
-
-#endif
diff --git a/mmcv/ops/csrc/parrots/min_area_polygons_pytorch.h b/mmcv/ops/csrc/parrots/min_area_polygons_pytorch.h
deleted file mode 100644
index 1df276418..000000000
--- a/mmcv/ops/csrc/parrots/min_area_polygons_pytorch.h
+++ /dev/null
@@ -1,9 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef MIN_AREA_POLYGONS_PYTORCH_H
-#define MIN_AREA_POLYGONS_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void min_area_polygons(const Tensor pointsets, Tensor polygons);
-
-#endif  // MIN_AREA_POLYGONS_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/modulated_deform_conv.cpp b/mmcv/ops/csrc/parrots/modulated_deform_conv.cpp
deleted file mode 100644
index 12b538a05..000000000
--- a/mmcv/ops/csrc/parrots/modulated_deform_conv.cpp
+++ /dev/null
@@ -1,237 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void modulated_deformable_im2col_impl(
-    const Tensor data_im, const Tensor data_offset, const Tensor data_mask,
-    const int batch_size, const int channels, const int height_im,
-    const int width_im, const int height_col, const int width_col,
-    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
-    const int stride_h, const int stride_w, const int dilation_h,
-    const int dilation_w, const int deformable_group, Tensor data_col) {
-  DISPATCH_DEVICE_IMPL(modulated_deformable_im2col_impl, data_im, data_offset,
-                       data_mask, batch_size, channels, height_im, width_im,
-                       height_col, width_col, kernel_h, kernel_w, pad_h, pad_w,
-                       stride_h, stride_w, dilation_h, dilation_w,
-                       deformable_group, data_col);
-}
-
-void modulated_deformable_col2im_impl(
-    const Tensor data_col, const Tensor data_offset, const Tensor data_mask,
-    const int batch_size, const int channels, const int height_im,
-    const int width_im, const int height_col, const int width_col,
-    const int kernel_h, const int kernel_w, const int pad_h, const int pad_w,
-    const int stride_h, const int stride_w, const int dilation_h,
-    const int dilation_w, const int deformable_group, Tensor grad_im) {
-  DISPATCH_DEVICE_IMPL(modulated_deformable_col2im_impl, data_col, data_offset,
-                       data_mask, batch_size, channels, height_im, width_im,
-                       height_col, width_col, kernel_h, kernel_w, pad_h, pad_w,
-                       stride_h, stride_w, dilation_h, dilation_w,
-                       deformable_group, grad_im);
-}
-
-void modulated_deformable_col2im_coord_impl(
-    const Tensor data_col, const Tensor data_im, const Tensor data_offset,
-    const Tensor data_mask, const int batch_size, const int channels,
-    const int height_im, const int width_im, const int height_col,
-    const int width_col, const int kernel_h, const int kernel_w,
-    const int pad_h, const int pad_w, const int stride_h, const int stride_w,
-    const int dilation_h, const int dilation_w, const int deformable_group,
-    Tensor grad_offset, Tensor grad_mask) {
-  DISPATCH_DEVICE_IMPL(modulated_deformable_col2im_coord_impl, data_col,
-                       data_im, data_offset, data_mask, batch_size, channels,
-                       height_im, width_im, height_col, width_col, kernel_h,
-                       kernel_w, pad_h, pad_w, stride_h, stride_w, dilation_h,
-                       dilation_w, deformable_group, grad_offset, grad_mask);
-}
-
-void modulated_deform_conv_forward(
-    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
-    Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w,
-    const int stride_h, const int stride_w, const int pad_h, const int pad_w,
-    const int dilation_h, const int dilation_w, const int group,
-    const int deformable_group, const bool with_bias) {
-  at::DeviceGuard guard(input.device());
-
-  const int batch = input.size(0);
-  const int channels = input.size(1);
-  const int height = input.size(2);
-  const int width = input.size(3);
-
-  const int channels_out = weight.size(0);
-  const int channels_kernel = weight.size(1);
-  const int kernel_h_ = weight.size(2);
-  const int kernel_w_ = weight.size(3);
-
-  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
-    AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).",
-             kernel_h_, kernel_w, kernel_h_, kernel_w_);
-  if (channels != channels_kernel * group)
-    AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).",
-             channels, channels_kernel * group);
-
-  const int height_out =
-      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
-  const int width_out =
-      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
-
-  if (ones.ndimension() != 2 ||
-      ones.size(0) * ones.size(1) < height_out * width_out) {
-    // Resize plane and fill with ones...
-    ones = at::ones({height_out, width_out}, input.options());
-  }
-
-  // resize output
-  output = output.view({batch, channels_out, height_out, width_out}).zero_();
-  // resize temporary columns
-  columns =
-      at::zeros({channels * kernel_h * kernel_w, 1 * height_out * width_out},
-                input.options());
-
-  output = output.view({output.size(0), group, output.size(1) / group,
-                        output.size(2), output.size(3)});
-
-  for (int b = 0; b < batch; b++) {
-    modulated_deformable_im2col_impl(
-        input[b], offset[b], mask[b], 1, channels, height, width, height_out,
-        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
-        dilation_h, dilation_w, deformable_group, columns);
-
-    // divide into group
-    weight = weight.view({group, weight.size(0) / group, weight.size(1),
-                          weight.size(2), weight.size(3)});
-    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
-
-    for (int g = 0; g < group; g++) {
-      output[b][g] = output[b][g]
-                         .flatten(1)
-                         .addmm_(weight[g].flatten(1), columns[g])
-                         .view_as(output[b][g]);
-    }
-
-    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
-                          weight.size(3), weight.size(4)});
-    columns =
-        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
-  }
-
-  output = output.view({output.size(0), output.size(1) * output.size(2),
-                        output.size(3), output.size(4)});
-
-  if (with_bias) {
-    output += bias.view({1, bias.size(0), 1, 1});
-  }
-}
-
-void modulated_deform_conv_backward(
-    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
-    Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight,
-    Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output,
-    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
-    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
-    const bool with_bias) {
-  at::DeviceGuard guard(input.device());
-
-  const int batch = input.size(0);
-  const int channels = input.size(1);
-  const int height = input.size(2);
-  const int width = input.size(3);
-
-  const int channels_kernel = weight.size(1);
-  const int kernel_h_ = weight.size(2);
-  const int kernel_w_ = weight.size(3);
-  if (kernel_h_ != kernel_h || kernel_w_ != kernel_w)
-    AT_ERROR("Input shape and kernel shape won't match: (%d x %d vs %d x %d).",
-             kernel_h_, kernel_w, kernel_h_, kernel_w_);
-  if (channels != channels_kernel * group)
-    AT_ERROR("Input shape and kernel channels won't match: (%d vs %d).",
-             channels, channels_kernel * group);
-
-  const int height_out =
-      (height + 2 * pad_h - (dilation_h * (kernel_h - 1) + 1)) / stride_h + 1;
-  const int width_out =
-      (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
-
-  if (ones.ndimension() != 2 ||
-      ones.size(0) * ones.size(1) < height_out * width_out) {
-    // Resize plane and fill with ones...
-    ones = at::ones({height_out, width_out}, input.options());
-  }
-
-  grad_input = grad_input.view({batch, channels, height, width});
-  columns = at::zeros({channels * kernel_h * kernel_w, height_out * width_out},
-                      input.options());
-
-  grad_output =
-      grad_output.view({grad_output.size(0), group, grad_output.size(1) / group,
-                        grad_output.size(2), grad_output.size(3)});
-
-  for (int b = 0; b < batch; b++) {
-    // divide int group
-    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
-    weight = weight.view({group, weight.size(0) / group, weight.size(1),
-                          weight.size(2), weight.size(3)});
-
-    for (int g = 0; g < group; g++) {
-      columns[g].addmm_(weight[g].flatten(1).transpose(0, 1),
-                        grad_output[b][g].flatten(1), 0.0f, 1.0f);
-    }
-
-    columns =
-        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
-    weight = weight.view({weight.size(0) * weight.size(1), weight.size(2),
-                          weight.size(3), weight.size(4)});
-
-    // gradient w.r.t. input coordinate data
-    modulated_deformable_col2im_coord_impl(
-        columns, input[b], offset[b], mask[b], 1, channels, height, width,
-        height_out, width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h,
-        stride_w, dilation_h, dilation_w, deformable_group, grad_offset[b],
-        grad_mask[b]);
-    // gradient w.r.t. input data
-    modulated_deformable_col2im_impl(
-        columns, offset[b], mask[b], 1, channels, height, width, height_out,
-        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
-        dilation_h, dilation_w, deformable_group, grad_input[b]);
-
-    // gradient w.r.t. weight, dWeight should accumulate across the batch and
-    // group
-    modulated_deformable_im2col_impl(
-        input[b], offset[b], mask[b], 1, channels, height, width, height_out,
-        width_out, kernel_h, kernel_w, pad_h, pad_w, stride_h, stride_w,
-        dilation_h, dilation_w, deformable_group, columns);
-
-    columns = columns.view({group, columns.size(0) / group, columns.size(1)});
-    grad_weight = grad_weight.view({group, grad_weight.size(0) / group,
-                                    grad_weight.size(1), grad_weight.size(2),
-                                    grad_weight.size(3)});
-    if (with_bias)
-      grad_bias = grad_bias.view({group, grad_bias.size(0) / group});
-
-    for (int g = 0; g < group; g++) {
-      grad_weight[g] =
-          grad_weight[g]
-              .flatten(1)
-              .addmm_(grad_output[b][g].flatten(1), columns[g].transpose(0, 1))
-              .view_as(grad_weight[g]);
-      if (with_bias) {
-        grad_bias[g] =
-            grad_bias[g]
-                .view({-1, 1})
-                .addmm_(grad_output[b][g].flatten(1), ones.view({-1, 1}))
-                .view(-1);
-      }
-    }
-
-    columns =
-        columns.view({columns.size(0) * columns.size(1), columns.size(2)});
-    grad_weight = grad_weight.view({grad_weight.size(0) * grad_weight.size(1),
-                                    grad_weight.size(2), grad_weight.size(3),
-                                    grad_weight.size(4)});
-    if (with_bias)
-      grad_bias = grad_bias.view({grad_bias.size(0) * grad_bias.size(1)});
-  }
-  grad_output = grad_output.view({grad_output.size(0) * grad_output.size(1),
-                                  grad_output.size(2), grad_output.size(3),
-                                  grad_output.size(4)});
-}
diff --git a/mmcv/ops/csrc/parrots/modulated_deform_conv_parrots.cpp b/mmcv/ops/csrc/parrots/modulated_deform_conv_parrots.cpp
deleted file mode 100644
index 2ef7efff6..000000000
--- a/mmcv/ops/csrc/parrots/modulated_deform_conv_parrots.cpp
+++ /dev/null
@@ -1,199 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "modulated_deform_conv_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void modulated_deform_conv_forward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  int kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h,
-      dilation_w, group, deformable_group, with_bias;
-  SSAttrs(attr)
-      .get<int>("kernel_h", kernel_h)
-      .get<int>("kernel_w", kernel_w)
-      .get<int>("stride_h", stride_h)
-      .get<int>("stride_w", stride_w)
-      .get<int>("pad_h", pad_h)
-      .get<int>("pad_w", pad_w)
-      .get<int>("dilation_h", dilation_h)
-      .get<int>("dilation_w", dilation_w)
-      .get<int>("group", group)
-      .get<int>("deformable_group", deformable_group)
-      .get<int>("with_bias", with_bias)
-      .done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& weight = buildATensor(ctx, ins[1]);
-  const auto& bias = buildATensor(ctx, ins[2]);
-  const auto& ones = buildATensor(ctx, ins[3]);
-  const auto& offset = buildATensor(ctx, ins[4]);
-  const auto& mask = buildATensor(ctx, ins[5]);
-
-  auto output = buildATensor(ctx, outs[0]);
-  auto columns = buildATensor(ctx, outs[1]);
-
-  modulated_deform_conv_forward(input, weight, bias, ones, offset, mask, output,
-                                columns, kernel_h, kernel_w, stride_h, stride_w,
-                                pad_h, pad_w, dilation_h, dilation_w, group,
-                                deformable_group, with_bias);
-}
-
-void modulated_deform_conv_backward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  int kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h,
-      dilation_w, group, deformable_group, with_bias;
-  SSAttrs(attr)
-      .get<int>("kernel_h", kernel_h)
-      .get<int>("kernel_w", kernel_w)
-      .get<int>("stride_h", stride_h)
-      .get<int>("stride_w", stride_w)
-      .get<int>("pad_h", pad_h)
-      .get<int>("pad_w", pad_w)
-      .get<int>("dilation_h", dilation_h)
-      .get<int>("dilation_w", dilation_w)
-      .get<int>("group", group)
-      .get<int>("deformable_group", deformable_group)
-      .get<int>("with_bias", with_bias)
-      .done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& weight = buildATensor(ctx, ins[1]);
-  const auto& bias = buildATensor(ctx, ins[2]);
-  const auto& ones = buildATensor(ctx, ins[3]);
-  const auto& offset = buildATensor(ctx, ins[4]);
-  const auto& mask = buildATensor(ctx, ins[5]);
-
-  auto columns = buildATensor(ctx, outs[0]);
-  auto grad_input = buildATensor(ctx, outs[1]);
-  auto grad_weight = buildATensor(ctx, outs[2]);
-  auto grad_bias = buildATensor(ctx, outs[3]);
-  auto grad_offset = buildATensor(ctx, outs[4]);
-  auto grad_mask = buildATensor(ctx, outs[5]);
-  auto grad_output = buildATensor(ctx, outs[6]);
-  modulated_deform_conv_backward(
-      input, weight, bias, ones, offset, mask, columns, grad_input, grad_weight,
-      grad_bias, grad_offset, grad_mask, grad_output, kernel_h, kernel_w,
-      stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group,
-      deformable_group, with_bias);
-}
-#endif
-
-void modulated_deform_conv_forward_cpu_parrots(
-    HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  int kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h,
-      dilation_w, group, deformable_group, with_bias;
-  SSAttrs(attr)
-      .get<int>("kernel_h", kernel_h)
-      .get<int>("kernel_w", kernel_w)
-      .get<int>("stride_h", stride_h)
-      .get<int>("stride_w", stride_w)
-      .get<int>("pad_h", pad_h)
-      .get<int>("pad_w", pad_w)
-      .get<int>("dilation_h", dilation_h)
-      .get<int>("dilation_w", dilation_w)
-      .get<int>("group", group)
-      .get<int>("deformable_group", deformable_group)
-      .get<int>("with_bias", with_bias)
-      .done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& weight = buildATensor(ctx, ins[1]);
-  const auto& bias = buildATensor(ctx, ins[2]);
-  const auto& ones = buildATensor(ctx, ins[3]);
-  const auto& offset = buildATensor(ctx, ins[4]);
-  const auto& mask = buildATensor(ctx, ins[5]);
-
-  auto output = buildATensor(ctx, outs[0]);
-  auto columns = buildATensor(ctx, outs[1]);
-
-  modulated_deform_conv_forward(input, weight, bias, ones, offset, mask, output,
-                                columns, kernel_h, kernel_w, stride_h, stride_w,
-                                pad_h, pad_w, dilation_h, dilation_w, group,
-                                deformable_group, with_bias);
-}
-
-void modulated_deform_conv_backward_cpu_parrots(
-    HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  int kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation_h,
-      dilation_w, group, deformable_group, with_bias;
-  SSAttrs(attr)
-      .get<int>("kernel_h", kernel_h)
-      .get<int>("kernel_w", kernel_w)
-      .get<int>("stride_h", stride_h)
-      .get<int>("stride_w", stride_w)
-      .get<int>("pad_h", pad_h)
-      .get<int>("pad_w", pad_w)
-      .get<int>("dilation_h", dilation_h)
-      .get<int>("dilation_w", dilation_w)
-      .get<int>("group", group)
-      .get<int>("deformable_group", deformable_group)
-      .get<int>("with_bias", with_bias)
-      .done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& weight = buildATensor(ctx, ins[1]);
-  const auto& bias = buildATensor(ctx, ins[2]);
-  const auto& ones = buildATensor(ctx, ins[3]);
-  const auto& offset = buildATensor(ctx, ins[4]);
-  const auto& mask = buildATensor(ctx, ins[5]);
-
-  auto columns = buildATensor(ctx, outs[0]);
-  auto grad_input = buildATensor(ctx, outs[1]);
-  auto grad_weight = buildATensor(ctx, outs[2]);
-  auto grad_bias = buildATensor(ctx, outs[3]);
-  auto grad_offset = buildATensor(ctx, outs[4]);
-  auto grad_mask = buildATensor(ctx, outs[5]);
-  auto grad_output = buildATensor(ctx, outs[6]);
-  modulated_deform_conv_backward(
-      input, weight, bias, ones, offset, mask, columns, grad_input, grad_weight,
-      grad_bias, grad_offset, grad_mask, grad_output, kernel_h, kernel_w,
-      stride_h, stride_w, pad_h, pad_w, dilation_h, dilation_w, group,
-      deformable_group, with_bias);
-}
-PARROTS_EXTENSION_REGISTER(modulated_deform_conv_forward)
-    .attr("kernel_h")
-    .attr("kernel_w")
-    .attr("stride_h")
-    .attr("stride_w")
-    .attr("pad_h")
-    .attr("pad_w")
-    .attr("dilation_h")
-    .attr("dilation_w")
-    .attr("group")
-    .attr("deformable_group")
-    .attr("with_bias")
-    .input(6)
-    .output(2)
-    .apply(modulated_deform_conv_forward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(modulated_deform_conv_forward_cuda_parrots)
-#endif
-    .done();
-
-PARROTS_EXTENSION_REGISTER(modulated_deform_conv_backward)
-    .attr("kernel_h")
-    .attr("kernel_w")
-    .attr("stride_h")
-    .attr("stride_w")
-    .attr("pad_h")
-    .attr("pad_w")
-    .attr("dilation_h")
-    .attr("dilation_w")
-    .attr("group")
-    .attr("deformable_group")
-    .attr("with_bias")
-    .input(6)
-    .output(7)
-    .apply(modulated_deform_conv_backward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(modulated_deform_conv_backward_cuda_parrots)
-#endif
-    .done();
diff --git a/mmcv/ops/csrc/parrots/modulated_deform_conv_pytorch.h b/mmcv/ops/csrc/parrots/modulated_deform_conv_pytorch.h
deleted file mode 100644
index 12f686861..000000000
--- a/mmcv/ops/csrc/parrots/modulated_deform_conv_pytorch.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef MODULATED_DEFORM_CONV_PYTORCH_H
-#define MODULATED_DEFORM_CONV_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void modulated_deform_conv_forward(
-    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
-    Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w,
-    const int stride_h, const int stride_w, const int pad_h, const int pad_w,
-    const int dilation_h, const int dilation_w, const int group,
-    const int deformable_group, const bool with_bias);
-
-void modulated_deform_conv_backward(
-    Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
-    Tensor mask, Tensor columns, Tensor grad_input, Tensor grad_weight,
-    Tensor grad_bias, Tensor grad_offset, Tensor grad_mask, Tensor grad_output,
-    int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h,
-    int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
-    const bool with_bias);
-#endif  // MODULATED_DEFORM_CONV_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/ms_deform_attn.cpp b/mmcv/ops/csrc/parrots/ms_deform_attn.cpp
deleted file mode 100644
index 25c8f6209..000000000
--- a/mmcv/ops/csrc/parrots/ms_deform_attn.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-/*!
-**************************************************************************************************
-* Deformable DETR
-* Copyright (c) 2020 SenseTime. All Rights Reserved.
-* Licensed under the Apache License, Version 2.0 [see LICENSE for details]
-**************************************************************************************************
-* Modified from
-*https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/tree/pytorch_1.0.0
-**************************************************************************************************
-*/
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-Tensor ms_deform_attn_impl_forward(const Tensor &value,
-                                   const Tensor &spatial_shapes,
-                                   const Tensor &level_start_index,
-                                   const Tensor &sampling_loc,
-                                   const Tensor &attn_weight,
-                                   const int im2col_step) {
-  return DISPATCH_DEVICE_IMPL(ms_deform_attn_impl_forward, value,
-                              spatial_shapes, level_start_index, sampling_loc,
-                              attn_weight, im2col_step);
-}
-
-void ms_deform_attn_impl_backward(
-    const Tensor &value, const Tensor &spatial_shapes,
-    const Tensor &level_start_index, const Tensor &sampling_loc,
-    const Tensor &attn_weight, const Tensor &grad_output, Tensor &grad_value,
-    Tensor &grad_sampling_loc, Tensor &grad_attn_weight,
-    const int im2col_step) {
-  DISPATCH_DEVICE_IMPL(ms_deform_attn_impl_backward, value, spatial_shapes,
-                       level_start_index, sampling_loc, attn_weight,
-                       grad_output, grad_value, grad_sampling_loc,
-                       grad_attn_weight, im2col_step);
-}
-
-Tensor ms_deform_attn_forward(const Tensor &value, const Tensor &spatial_shapes,
-                              const Tensor &level_start_index,
-                              const Tensor &sampling_loc,
-                              const Tensor &attn_weight,
-                              const int im2col_step) {
-  at::DeviceGuard guard(value.device());
-  return ms_deform_attn_impl_forward(value, spatial_shapes, level_start_index,
-                                     sampling_loc, attn_weight, im2col_step);
-}
-
-void ms_deform_attn_backward(const Tensor &value, const Tensor &spatial_shapes,
-                             const Tensor &level_start_index,
-                             const Tensor &sampling_loc,
-                             const Tensor &attn_weight,
-                             const Tensor &grad_output, Tensor &grad_value,
-                             Tensor &grad_sampling_loc,
-                             Tensor &grad_attn_weight, const int im2col_step) {
-  at::DeviceGuard guard(value.device());
-  ms_deform_attn_impl_backward(value, spatial_shapes, level_start_index,
-                               sampling_loc, attn_weight, grad_output,
-                               grad_value, grad_sampling_loc, grad_attn_weight,
-                               im2col_step);
-}
diff --git a/mmcv/ops/csrc/parrots/ms_deform_attn_parrots.cpp b/mmcv/ops/csrc/parrots/ms_deform_attn_parrots.cpp
deleted file mode 100644
index a3ad786a8..000000000
--- a/mmcv/ops/csrc/parrots/ms_deform_attn_parrots.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <torch/extension.h>
-
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-using namespace at;
-using namespace parrots;
-
-Tensor ms_deform_attn_forward(const Tensor &value, const Tensor &spatial_shapes,
-                              const Tensor &level_start_index,
-                              const Tensor &sampling_loc,
-                              const Tensor &attn_weight, const int im2col_step);
-
-void ms_deform_attn_backward(const Tensor &value, const Tensor &spatial_shapes,
-                             const Tensor &level_start_index,
-                             const Tensor &sampling_loc,
-                             const Tensor &attn_weight,
-                             const Tensor &grad_output, Tensor &grad_value,
-                             Tensor &grad_sampling_loc,
-                             Tensor &grad_attn_weight, const int im2col_step);
-
-void ms_deform_attn_forward_parrots(CudaContext &ctx, const SSElement &attr,
-                                    const OperatorBase::in_list_t &ins,
-                                    OperatorBase::out_list_t &outs) {
-  int im2col_step;
-  SSAttrs(attr).get<int>("im2col_step", im2col_step).done();
-  const auto &value = buildATensor(ctx, ins[0]);
-  const auto &spatial_shapes = buildATensor(ctx, ins[1]);
-  const auto &level_start_index = buildATensor(ctx, ins[2]);
-  const auto &sampling_loc = buildATensor(ctx, ins[3]);
-  const auto &attn_weight = buildATensor(ctx, ins[4]);
-  auto out = ms_deform_attn_forward(value, spatial_shapes, level_start_index,
-                                    sampling_loc, attn_weight, im2col_step);
-  updateDArray(ctx, out, outs[0]);
-}
-
-void ms_deform_attn_backward_parrots(CudaContext &ctx, const SSElement &attr,
-                                     const OperatorBase::in_list_t &ins,
-                                     OperatorBase::out_list_t &outs) {
-  int im2col_step;
-  SSAttrs(attr).get<int>("im2col_step", im2col_step).done();
-  const auto &value = buildATensor(ctx, ins[0]);
-  const auto &spatial_shapes = buildATensor(ctx, ins[1]);
-  const auto &level_start_index = buildATensor(ctx, ins[2]);
-  const auto &sampling_loc = buildATensor(ctx, ins[3]);
-  const auto &attn_weight = buildATensor(ctx, ins[4]);
-  const auto &grad_output = buildATensor(ctx, ins[5]);
-  auto grad_value = buildATensor(ctx, outs[0]);
-  auto grad_sampling_loc = buildATensor(ctx, outs[1]);
-  auto grad_attn_weight = buildATensor(ctx, outs[2]);
-  ms_deform_attn_backward(value, spatial_shapes, level_start_index,
-                          sampling_loc, attn_weight, grad_output, grad_value,
-                          grad_sampling_loc, grad_attn_weight, im2col_step);
-}
-
-PARROTS_EXTENSION_REGISTER(ms_deform_attn_forward)
-    .attr("im2col_step")
-    .input(5)
-    .output(1)
-    .apply(ms_deform_attn_forward_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(ms_deform_attn_backward)
-    .attr("im2col_step")
-    .input(6)
-    .output(3)
-    .apply(ms_deform_attn_backward_parrots)
-    .done();
diff --git a/mmcv/ops/csrc/parrots/nms.cpp b/mmcv/ops/csrc/parrots/nms.cpp
deleted file mode 100644
index 199d8af23..000000000
--- a/mmcv/ops/csrc/parrots/nms.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-Tensor nms_impl(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
-  return DISPATCH_DEVICE_IMPL(nms_impl, boxes, scores, iou_threshold, offset);
-}
-
-Tensor softnms_impl(Tensor boxes, Tensor scores, Tensor dets,
-                    float iou_threshold, float sigma, float min_score,
-                    int method, int offset) {
-  return DISPATCH_DEVICE_IMPL(softnms_impl, boxes, scores, dets, iou_threshold,
-                              sigma, min_score, method, offset);
-}
-
-std::vector<std::vector<int> > nms_match_impl(Tensor dets,
-                                              float iou_threshold) {
-  return DISPATCH_DEVICE_IMPL(nms_match_impl, dets, iou_threshold);
-}
-
-Tensor nms(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
-  return nms_impl(boxes, scores, iou_threshold, offset);
-}
-
-Tensor softnms(Tensor boxes, Tensor scores, Tensor dets, float iou_threshold,
-               float sigma, float min_score, int method, int offset) {
-  return softnms_impl(boxes, scores, dets, iou_threshold, sigma, min_score,
-                      method, offset);
-}
-
-std::vector<std::vector<int> > nms_match(Tensor dets, float iou_threshold) {
-  return nms_match_impl(dets, iou_threshold);
-}
diff --git a/mmcv/ops/csrc/parrots/nms_parrots.cpp b/mmcv/ops/csrc/parrots/nms_parrots.cpp
deleted file mode 100644
index db8b5f16e..000000000
--- a/mmcv/ops/csrc/parrots/nms_parrots.cpp
+++ /dev/null
@@ -1,140 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "nms_pytorch.h"
-
-using namespace parrots;
-
-// Tensor nms(Tensor boxes, Tensor scores, float iou_threshold, int offset);
-template <typename T>
-void nms_parrots(T& ctx, const SSElement& attr,
-                 const OperatorBase::in_list_t& ins,
-                 OperatorBase::out_list_t& outs) {
-  float iou_threshold;
-  int offset;
-  SSAttrs(attr)
-      .get("iou_threshold", iou_threshold)
-      .get("offset", offset)
-      .done();
-  at::Tensor boxes, scores;
-  boxes = buildATensor(ctx, ins[0]);
-  scores = buildATensor(ctx, ins[1]);
-  auto out = nms(boxes, scores, iou_threshold, offset);
-  updateDArray(ctx, out, outs[0]);
-}
-
-/*Tensor softnms(Tensor boxes, Tensor scores, Tensor dets, float iou_threshold,
- *                float sigma, float min_score, int method, int offset);*/
-template <typename T>
-void softnms_parrots(T& ctx, const SSElement& attr,
-                     const OperatorBase::in_list_t& ins,
-                     OperatorBase::out_list_t& outs) {
-  float iou_threshold, sigma, min_score;
-  int method, offset;
-  SSAttrs(attr)
-      .get("iou_threshold", iou_threshold)
-      .get("sigma", sigma)
-      .get("min_score", min_score)
-      .get("method", method)
-      .get("offset", offset)
-      .done();
-  at::Tensor boxes, scores, dets;
-  boxes = buildATensor(ctx, ins[0]);
-  scores = buildATensor(ctx, ins[1]);
-  dets = buildATensor(ctx, ins[2]);
-  auto out = softnms(boxes, scores, dets, iou_threshold, sigma, min_score,
-                     method, offset);
-  updateDArray(ctx, out, outs[0]);
-}
-
-// std::vector<std::vector<int> > nms_match(Tensor dets, float iou_threshold);
-template <typename T>
-void nms_match_parrots(T& ctx, const SSElement& attr,
-                       const OperatorBase::in_list_t& ins,
-                       OperatorBase::out_list_t& outs) {
-  float iou_threshold;
-  SSAttrs(attr).get("iou_threshold", iou_threshold).done();
-  at::Tensor dets;
-  dets = buildATensor(ctx, ins[0]);
-  auto out = nms_match(dets, iou_threshold);
-  int n = out.size(), m = 0;
-  for (int i = 0; i < n; ++i)
-    if (m < out[i].size()) m = out[i].size();
-  auto options = torch::TensorOptions().dtype(at::kInt);
-  auto tensor = torch::zeros({n, m}, options);
-  for (int i = 0; i < n; i++)
-    tensor.slice(0, i, i + 1) =
-        torch::from_blob(out[i].data(), {out[i].size()}, options);
-  updateDArray(ctx, tensor, outs[0]);
-}
-
-/*Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order,
- *                    const Tensor dets_sorted, const float iou_threshold,
- *                                       const int multi_label);*/
-template <typename T>
-void nms_rotated_parrots(T& ctx, const SSElement& attr,
-                         const OperatorBase::in_list_t& ins,
-                         OperatorBase::out_list_t& outs) {
-  float iou_threshold;
-  int multi_label;
-  SSAttrs(attr)
-      .get("iou_threshold", iou_threshold)
-      .get("multi_label", multi_label)
-      .done();
-  at::Tensor dets, scores, order, dets_sorted;
-  dets = buildATensor(ctx, ins[0]);
-  scores = buildATensor(ctx, ins[1]);
-  order = buildATensor(ctx, ins[2]);
-  dets_sorted = buildATensor(ctx, ins[3]);
-  auto out =
-      nms_rotated(dets, scores, order, dets_sorted, iou_threshold, multi_label);
-  updateDArray(ctx, out, outs[0]);
-}
-
-PARROTS_EXTENSION_REGISTER(nms)
-    .attr("iou_threshold")
-    .attr("offset")
-    .input(2)
-    .output(1)
-    .apply(nms_parrots<HostContext>)
-#ifdef MMCV_WITH_CUDA
-    .apply(nms_parrots<CudaContext>)
-#endif
-    .done();
-
-PARROTS_EXTENSION_REGISTER(softnms)
-    .attr("iou_threshold")
-    .attr("sigma")
-    .attr("min_score")
-    .attr("method")
-    .attr("offset")
-    .input(3)
-    .output(1)
-    .apply(softnms_parrots<HostContext>)
-#ifdef MMCV_WITH_CUDA
-    .apply(softnms_parrots<CudaContext>)
-#endif
-    .done();
-
-PARROTS_EXTENSION_REGISTER(nms_match)
-    .attr("iou_threshold")
-    .input(1)
-    .output(1)
-    .apply(nms_match_parrots<HostContext>)
-#ifdef MMCV_WITH_CUDA
-    .apply(nms_match_parrots<CudaContext>)
-#endif
-    .done();
-
-PARROTS_EXTENSION_REGISTER(nms_rotated)
-    .attr("multi_label")
-    .attr("iou_threshold")
-    .input(4)
-    .output(1)
-    .apply(nms_rotated_parrots<HostContext>)
-#ifdef MMCV_WITH_CUDA
-    .apply(nms_rotated_parrots<CudaContext>)
-#endif
-    .done();
diff --git a/mmcv/ops/csrc/parrots/nms_pytorch.h b/mmcv/ops/csrc/parrots/nms_pytorch.h
deleted file mode 100644
index 78c680e57..000000000
--- a/mmcv/ops/csrc/parrots/nms_pytorch.h
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef NMS_PYTORCH_H
-#define NMS_PYTORCH_H
-#include <torch/extension.h>
-
-at::Tensor nms(at::Tensor boxes, at::Tensor scores, float iou_threshold,
-               int offset);
-
-at::Tensor softnms(at::Tensor boxes, at::Tensor scores, at::Tensor dets,
-                   float iou_threshold, float sigma, float min_score,
-                   int method, int offset);
-
-std::vector<std::vector<int> > nms_match(at::Tensor dets, float iou_threshold);
-
-at::Tensor nms_rotated(const at::Tensor dets, const at::Tensor scores,
-                       const at::Tensor order, const at::Tensor dets_sorted,
-                       const float iou_threshold, const int multi_label);
-#endif  // NMS_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/nms_rotated.cpp b/mmcv/ops/csrc/parrots/nms_rotated.cpp
deleted file mode 100644
index e4ef676a9..000000000
--- a/mmcv/ops/csrc/parrots/nms_rotated.cpp
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-// modified from
-// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/nms_rotated/nms_rotated.h
-#include "pytorch_cpp_helper.hpp"
-
-Tensor nms_rotated_cpu(const Tensor dets, const Tensor scores,
-                       const float iou_threshold);
-
-#ifdef MMCV_WITH_CUDA
-Tensor nms_rotated_cuda(const Tensor dets, const Tensor scores,
-                        const Tensor order, const Tensor dets_sorted,
-                        const float iou_threshold, const int multi_label);
-#endif
-
-// Interface for Python
-// inline is needed to prevent multiple function definitions when this header is
-// included by different cpps
-Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order,
-                   const Tensor dets_sorted, const float iou_threshold,
-                   const int multi_label) {
-  assert(dets.device().is_cuda() == scores.device().is_cuda());
-  if (dets.device().is_cuda()) {
-#ifdef MMCV_WITH_CUDA
-    return nms_rotated_cuda(dets, scores, order, dets_sorted, iou_threshold,
-                            multi_label);
-#else
-    AT_ERROR("Not compiled with GPU support");
-#endif
-  }
-
-  return nms_rotated_cpu(dets, scores, iou_threshold);
-}
diff --git a/mmcv/ops/csrc/parrots/pixel_group.cpp b/mmcv/ops/csrc/parrots/pixel_group.cpp
deleted file mode 100644
index 2bf8c8bbf..000000000
--- a/mmcv/ops/csrc/parrots/pixel_group.cpp
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-// It is modified from https://github.com/WenmuZhou/PAN.pytorch
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-std::vector<std::vector<float>> pixel_group_impl(
-    Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label,
-    Tensor kernel_contour, int kernel_region_num, float dis_threshold) {
-  return DISPATCH_DEVICE_IMPL(pixel_group_impl, score, mask, embedding,
-                              kernel_label, kernel_contour, kernel_region_num,
-                              dis_threshold);
-}
-
-std::vector<std::vector<float>> pixel_group(
-    Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label,
-    Tensor kernel_contour, int kernel_region_num, float distance_threshold) {
-  score = score.contiguous();
-  mask = mask.contiguous();
-  embedding = embedding.contiguous();
-  kernel_label = kernel_label.contiguous();
-  kernel_contour = kernel_contour.contiguous();
-
-  return pixel_group_impl(score, mask, embedding, kernel_label, kernel_contour,
-                          kernel_region_num, distance_threshold);
-}
diff --git a/mmcv/ops/csrc/parrots/pixel_group_parrots.cpp b/mmcv/ops/csrc/parrots/pixel_group_parrots.cpp
deleted file mode 100644
index bd863a4e1..000000000
--- a/mmcv/ops/csrc/parrots/pixel_group_parrots.cpp
+++ /dev/null
@@ -1,54 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "pixel_group_pytorch.h"
-
-using namespace parrots;
-using namespace std;
-
-template <typename T>
-void pixel_group_parrots(T& ctx, const SSElement& attr,
-                         const OperatorBase::in_list_t& ins,
-                         OperatorBase::out_list_t& outs) {
-  int kernel_region_num;
-  float distance_threshold;
-  SSAttrs(attr)
-      .get<int>("kernel_region_num", kernel_region_num)
-      .get<float>("distance_threshold", distance_threshold)
-      .done();
-  at::Tensor score;
-  at::Tensor mask;
-  at::Tensor embedding;
-  at::Tensor kernel_label;
-  at::Tensor kernel_contour;
-  score = buildATensor(ctx, ins[0]);
-  mask = buildATensor(ctx, ins[1]);
-  embedding = buildATensor(ctx, ins[2]);
-  kernel_label = buildATensor(ctx, ins[3]);
-  kernel_contour = buildATensor(ctx, ins[4]);
-  auto out = pixel_group(score, mask, embedding, kernel_label, kernel_contour,
-                         kernel_region_num, distance_threshold);
-  int n = out.size();
-  std::vector<float> out_tensor;
-  for (int i = 0; i < n; ++i) out_tensor.push_back(float(out[i].size()));
-  for (int i = 0; i < n; ++i)
-    out_tensor.insert(out_tensor.end(), out[i].begin(), out[i].end());
-  auto options = torch::TensorOptions().dtype(at::kFloat);
-  auto tensor = torch::zeros({1, out_tensor.size()}, options);
-  tensor.slice(0, 0, 1) =
-      torch::from_blob(out_tensor.data(), {out_tensor.size()}, options);
-  updateDArray(ctx, tensor, outs[0]);
-}
-
-PARROTS_EXTENSION_REGISTER(pixel_group)
-    .attr("kernel_region_num")
-    .attr("distance_threshold")
-    .input(5)
-    .output(1)
-    .apply(pixel_group_parrots<HostContext>)
-#ifdef MMCV_WITH_CUDA
-    .apply(pixel_group_parrots<CudaContext>)
-#endif
-    .done();
diff --git a/mmcv/ops/csrc/parrots/pixel_group_pytorch.h b/mmcv/ops/csrc/parrots/pixel_group_pytorch.h
deleted file mode 100644
index 1686ef3ee..000000000
--- a/mmcv/ops/csrc/parrots/pixel_group_pytorch.h
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef PIXEL_GROUP_PYTORCH_H
-#define PIXEL_GROUP_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-std::vector<std::vector<float>> pixel_group(
-    Tensor score, Tensor mask, Tensor embedding, Tensor kernel_label,
-    Tensor kernel_contour, int kernel_region_num, float distance_threshold);
-
-#endif  // PIXEL_GROUP_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/points_in_boxes.cpp b/mmcv/ops/csrc/parrots/points_in_boxes.cpp
deleted file mode 100644
index 540da9403..000000000
--- a/mmcv/ops/csrc/parrots/points_in_boxes.cpp
+++ /dev/null
@@ -1,44 +0,0 @@
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void points_in_boxes_part_forward_impl(int batch_size, int boxes_num,
-                                       int pts_num, const Tensor boxes,
-                                       const Tensor pts,
-                                       Tensor box_idx_of_points) {
-  DISPATCH_DEVICE_IMPL(points_in_boxes_part_forward_impl, batch_size, boxes_num,
-                       pts_num, boxes, pts, box_idx_of_points);
-}
-
-void points_in_boxes_all_forward_impl(int batch_size, int boxes_num,
-                                      int pts_num, const Tensor boxes,
-                                      const Tensor pts,
-                                      Tensor box_idx_of_points) {
-  DISPATCH_DEVICE_IMPL(points_in_boxes_all_forward_impl, batch_size, boxes_num,
-                       pts_num, boxes, pts, box_idx_of_points);
-}
-
-void points_in_boxes_part_forward(Tensor boxes_tensor, Tensor pts_tensor,
-                                  Tensor box_idx_of_points_tensor) {
-  // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
-  // coordinate, z is the bottom center, each box params pts: (B, npoints, 3)
-  // [x, y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints),
-  // default -1
-  int batch_size = boxes_tensor.size(0);
-  int boxes_num = boxes_tensor.size(1);
-  int pts_num = pts_tensor.size(1);
-  points_in_boxes_part_forward_impl(batch_size, boxes_num, pts_num,
-                                    boxes_tensor, pts_tensor,
-                                    box_idx_of_points_tensor);
-}
-
-void points_in_boxes_all_forward(Tensor boxes_tensor, Tensor pts_tensor,
-                                 Tensor box_idx_of_points_tensor) {
-  // params boxes: (B, N, 7) [x, y, z, x_size, y_size, z_size, rz] in LiDAR
-  // coordinate, z is the bottom center. params pts: (B, npoints, 3) [x, y, z]
-  // in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default -1
-  int batch_size = boxes_tensor.size(0);
-  int boxes_num = boxes_tensor.size(1);
-  int pts_num = pts_tensor.size(1);
-  points_in_boxes_all_forward_impl(batch_size, boxes_num, pts_num, boxes_tensor,
-                                   pts_tensor, box_idx_of_points_tensor);
-}
diff --git a/mmcv/ops/csrc/parrots/points_in_boxes_parrots.cpp b/mmcv/ops/csrc/parrots/points_in_boxes_parrots.cpp
deleted file mode 100644
index afd2b0eb2..000000000
--- a/mmcv/ops/csrc/parrots/points_in_boxes_parrots.cpp
+++ /dev/null
@@ -1,64 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "points_in_boxes_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void points_in_boxes_part_forward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  auto boxes_tensor = buildATensor(ctx, ins[0]);
-  auto pts_tensor = buildATensor(ctx, ins[1]);
-
-  auto box_idx_of_points_tensor = buildATensor(ctx, outs[0]);
-
-  points_in_boxes_part_forward(boxes_tensor, pts_tensor,
-                               box_idx_of_points_tensor);
-}
-
-void points_in_boxes_all_forward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  auto boxes_tensor = buildATensor(ctx, ins[0]);
-  auto pts_tensor = buildATensor(ctx, ins[1]);
-
-  auto box_idx_of_points_tensor = buildATensor(ctx, outs[0]);
-
-  points_in_boxes_all_forward(boxes_tensor, pts_tensor,
-                              box_idx_of_points_tensor);
-}
-
-PARROTS_EXTENSION_REGISTER(points_in_boxes_part_forward)
-    .input(2)
-    .output(1)
-    .apply(points_in_boxes_part_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(points_in_boxes_all_forward)
-    .input(2)
-    .output(1)
-    .apply(points_in_boxes_all_forward_cuda_parrots)
-    .done();
-#endif
-
-void points_in_boxes_forward_cpu_parrots(HostContext& ctx,
-                                         const SSElement& attr,
-                                         const OperatorBase::in_list_t& ins,
-                                         OperatorBase::out_list_t& outs) {
-  auto boxes_tensor = buildATensor(ctx, ins[0]);
-  auto pts_tensor = buildATensor(ctx, ins[1]);
-
-  auto pts_indices_tensor = buildATensor(ctx, outs[0]);
-
-  points_in_boxes_cpu_forward(boxes_tensor, pts_tensor, pts_indices_tensor);
-}
-
-PARROTS_EXTENSION_REGISTER(points_in_boxes_cpu_forward)
-    .input(2)
-    .output(1)
-    .apply(points_in_boxes_forward_cpu_parrots)
-    .done();
diff --git a/mmcv/ops/csrc/parrots/points_in_boxes_pytorch.h b/mmcv/ops/csrc/parrots/points_in_boxes_pytorch.h
deleted file mode 100644
index f3e465e3c..000000000
--- a/mmcv/ops/csrc/parrots/points_in_boxes_pytorch.h
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef POINTS_IN_BOXES_PYTORCH_H
-#define POINTS_IN_BOXES_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void points_in_boxes_part_forward(Tensor boxes_tensor, Tensor pts_tensor,
-                                  Tensor box_idx_of_points_tensor);
-
-void points_in_boxes_all_forward(Tensor boxes_tensor, Tensor pts_tensor,
-                                 Tensor box_idx_of_points_tensor);
-
-void points_in_boxes_cpu_forward(Tensor boxes_tensor, Tensor pts_tensor,
-                                 Tensor pts_indices_tensor);
-
-#endif  // POINTS_IN_BOXES_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/points_in_polygons.cpp b/mmcv/ops/csrc/parrots/points_in_polygons.cpp
deleted file mode 100644
index 75a93dcef..000000000
--- a/mmcv/ops/csrc/parrots/points_in_polygons.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void points_in_polygons_forward_impl(const Tensor points, const Tensor polygons,
-                                     Tensor output, const int rows,
-                                     const int cols) {
-  DISPATCH_DEVICE_IMPL(points_in_polygons_forward_impl, points, polygons,
-                       output, rows, cols);
-}
-
-void points_in_polygons_forward(Tensor points, Tensor polygons, Tensor output) {
-  int rows = points.size(0);
-  int cols = polygons.size(0);
-  points_in_polygons_forward_impl(points, polygons, output, rows, cols);
-}
diff --git a/mmcv/ops/csrc/parrots/points_in_polygons_parrots.cpp b/mmcv/ops/csrc/parrots/points_in_polygons_parrots.cpp
deleted file mode 100644
index d52018e64..000000000
--- a/mmcv/ops/csrc/parrots/points_in_polygons_parrots.cpp
+++ /dev/null
@@ -1,28 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "points_in_polygons_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void points_in_polygons_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                     const OperatorBase::in_list_t& ins,
-                                     OperatorBase::out_list_t& outs) {
-  auto points = buildATensor(ctx, ins[0]);
-  auto polygons = buildATensor(ctx, ins[1]);
-
-  auto output = buildATensor(ctx, outs[0]);
-
-  points_in_polygons_forward(points, polygons, output);
-}
-
-PARROTS_EXTENSION_REGISTER(points_in_polygons_forward)
-    .input(2)
-    .output(1)
-    .apply(points_in_polygons_cuda_parrots)
-    .done();
-
-#endif
diff --git a/mmcv/ops/csrc/parrots/points_in_polygons_pytorch.h b/mmcv/ops/csrc/parrots/points_in_polygons_pytorch.h
deleted file mode 100644
index 042678143..000000000
--- a/mmcv/ops/csrc/parrots/points_in_polygons_pytorch.h
+++ /dev/null
@@ -1,9 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef POINTS_IN_POLYGONS_PYTORCH_H
-#define POINTS_IN_POLYGONS_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void points_in_polygons_forward(Tensor points, Tensor polygons, Tensor output);
-
-#endif  // POINTS_IN_POLYGONS_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/prroi_pool.cpp b/mmcv/ops/csrc/parrots/prroi_pool.cpp
deleted file mode 100644
index 00db84a15..000000000
--- a/mmcv/ops/csrc/parrots/prroi_pool.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void prroi_pool_forward_impl(Tensor input, Tensor rois, Tensor output,
-                             int pooled_height, int pooled_width,
-                             float spatial_scale) {
-  DISPATCH_DEVICE_IMPL(prroi_pool_forward_impl, input, rois, output,
-                       pooled_height, pooled_width, spatial_scale);
-}
-
-void prroi_pool_backward_impl(Tensor grad_output, Tensor rois,
-                              Tensor grad_input, int pooled_height,
-                              int pooled_width, float spatial_scale) {
-  DISPATCH_DEVICE_IMPL(prroi_pool_backward_impl, grad_output, rois, grad_input,
-                       pooled_height, pooled_width, spatial_scale);
-}
-
-void prroi_pool_coor_backward_impl(Tensor output, Tensor grad_output,
-                                   Tensor input, Tensor rois, Tensor grad_rois,
-                                   int pooled_height, int pooled_width,
-                                   float spatial_scale) {
-  DISPATCH_DEVICE_IMPL(prroi_pool_coor_backward_impl, output, grad_output,
-                       input, rois, grad_rois, pooled_height, pooled_width,
-                       spatial_scale);
-}
-
-void prroi_pool_forward(Tensor input, Tensor rois, Tensor output,
-                        int pooled_height, int pooled_width,
-                        float spatial_scale) {
-  prroi_pool_forward_impl(input, rois, output, pooled_height, pooled_width,
-                          spatial_scale);
-}
-
-void prroi_pool_backward(Tensor grad_output, Tensor rois, Tensor grad_input,
-                         int pooled_height, int pooled_width,
-                         float spatial_scale) {
-  prroi_pool_backward_impl(grad_output, rois, grad_input, pooled_height,
-                           pooled_width, spatial_scale);
-}
-
-void prroi_pool_coor_backward(Tensor output, Tensor grad_output, Tensor input,
-                              Tensor rois, Tensor grad_rois, int pooled_height,
-                              int pooled_width, float spatial_scale) {
-  prroi_pool_coor_backward_impl(output, grad_output, input, rois, grad_rois,
-                                pooled_height, pooled_width, spatial_scale);
-}
diff --git a/mmcv/ops/csrc/parrots/prroi_pool_parrots.cpp b/mmcv/ops/csrc/parrots/prroi_pool_parrots.cpp
deleted file mode 100644
index 4e8295581..000000000
--- a/mmcv/ops/csrc/parrots/prroi_pool_parrots.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "prroi_pool_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void prroi_pool_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                     const OperatorBase::in_list_t& ins,
-                                     OperatorBase::out_list_t& outs) {
-  int pooled_height;
-  int pooled_width;
-  float spatial_scale;
-  SSAttrs(attr)
-      .get<int>("pooled_height", pooled_height)
-      .get<int>("pooled_width", pooled_width)
-      .get<float>("spatial_scale", spatial_scale)
-      .done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& rois = buildATensor(ctx, ins[1]);
-  auto output = buildATensor(ctx, outs[0]);
-  prroi_pool_forward(input, rois, output, pooled_height, pooled_width,
-                     spatial_scale);
-}
-
-void prroi_pool_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                      const OperatorBase::in_list_t& ins,
-                                      OperatorBase::out_list_t& outs) {
-  int pooled_height;
-  int pooled_width;
-  float spatial_scale;
-  SSAttrs(attr)
-      .get<int>("pooled_height", pooled_height)
-      .get<int>("pooled_width", pooled_width)
-      .get<float>("spatial_scale", spatial_scale)
-      .done();
-
-  const auto& grad_output = buildATensor(ctx, ins[0]);
-  const auto& rois = buildATensor(ctx, ins[1]);
-  auto grad_input = buildATensor(ctx, outs[0]);
-  prroi_pool_backward(grad_output, rois, grad_input, pooled_height,
-                      pooled_width, spatial_scale);
-}
-
-void prroi_pool_coor_backward_cuda_parrots(CudaContext& ctx,
-                                           const SSElement& attr,
-                                           const OperatorBase::in_list_t& ins,
-                                           OperatorBase::out_list_t& outs) {
-  int pooled_height;
-  int pooled_width;
-  float spatial_scale;
-  SSAttrs(attr)
-      .get<int>("pooled_height", pooled_height)
-      .get<int>("pooled_width", pooled_width)
-      .get<float>("spatial_scale", spatial_scale)
-      .done();
-
-  const auto& output = buildATensor(ctx, ins[0]);
-  const auto& grad_output = buildATensor(ctx, ins[1]);
-  const auto& input = buildATensor(ctx, ins[2]);
-  const auto& rois = buildATensor(ctx, ins[3]);
-  auto grad_rois = buildATensor(ctx, outs[0]);
-  prroi_pool_coor_backward(output, grad_output, input, rois, grad_rois,
-                           pooled_height, pooled_width, spatial_scale);
-}
-
-PARROTS_EXTENSION_REGISTER(prroi_pool_forward)
-    .attr("pooled_height")
-    .attr("pooled_width")
-    .attr("spatial_scale")
-    .input(2)
-    .output(1)
-    .apply(prroi_pool_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(prroi_pool_backward)
-    .attr("pooled_height")
-    .attr("pooled_width")
-    .attr("spatial_scale")
-    .input(2)
-    .output(1)
-    .apply(prroi_pool_backward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(prroi_pool_coor_backward)
-    .attr("pooled_height")
-    .attr("pooled_width")
-    .attr("spatial_scale")
-    .input(4)
-    .output(1)
-    .apply(prroi_pool_coor_backward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/prroi_pool_pytorch.h b/mmcv/ops/csrc/parrots/prroi_pool_pytorch.h
deleted file mode 100644
index 451b01dd5..000000000
--- a/mmcv/ops/csrc/parrots/prroi_pool_pytorch.h
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef PRROI_POOL_PYTORCH_H
-#define PRROI_POOL_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void prroi_pool_forward(Tensor input, Tensor rois, Tensor output,
-                        int pooled_height, int pooled_width,
-                        float spatial_scale);
-
-void prroi_pool_backward(Tensor grad_output, Tensor rois, Tensor grad_input,
-                         int pooled_height, int pooled_width,
-                         float spatial_scale);
-
-void prroi_pool_coor_backward(Tensor output, Tensor grad_output, Tensor input,
-                              Tensor rois, Tensor grad_rois, int pooled_height,
-                              int pooled_width, float spatial_scale);
-
-#endif  // PRROI_POOL_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/psamask.cpp b/mmcv/ops/csrc/parrots/psamask.cpp
deleted file mode 100644
index 6064c9ba5..000000000
--- a/mmcv/ops/csrc/parrots/psamask.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-// Modified from
-// https://github.com/hszhao/semseg/blob/master/lib/psa/src
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void psamask_forward_impl(const int psa_type, const Tensor input, Tensor output,
-                          const int num_, const int h_feature,
-                          const int w_feature, const int h_mask,
-                          const int w_mask, const int half_h_mask,
-                          const int half_w_mask) {
-  DISPATCH_DEVICE_IMPL(psamask_forward_impl, psa_type, input, output, num_,
-                       h_feature, w_feature, h_mask, w_mask, half_h_mask,
-                       half_w_mask);
-}
-
-void psamask_backward_impl(const int psa_type, const Tensor grad_output,
-                           Tensor grad_input, const int num_,
-                           const int h_feature, const int w_feature,
-                           const int h_mask, const int w_mask,
-                           const int half_h_mask, const int half_w_mask) {
-  DISPATCH_DEVICE_IMPL(psamask_backward_impl, psa_type, grad_output, grad_input,
-                       num_, h_feature, w_feature, h_mask, w_mask, half_h_mask,
-                       half_w_mask);
-}
-
-void psamask_forward(const Tensor input, Tensor output, const int psa_type,
-                     const int num_, const int h_feature, const int w_feature,
-                     const int h_mask, const int w_mask, const int half_h_mask,
-                     const int half_w_mask) {
-  psamask_forward_impl(psa_type, input, output, num_, h_feature, w_feature,
-                       h_mask, w_mask, half_h_mask, half_w_mask);
-}
-
-void psamask_backward(Tensor grad_output, const Tensor grad_input,
-                      const int psa_type, const int num_, const int h_feature,
-                      const int w_feature, const int h_mask, const int w_mask,
-                      const int half_h_mask, const int half_w_mask) {
-  psamask_backward_impl(psa_type, grad_output, grad_input, num_, h_feature,
-                        w_feature, h_mask, w_mask, half_h_mask, half_w_mask);
-}
diff --git a/mmcv/ops/csrc/parrots/psamask_parrots.cpp b/mmcv/ops/csrc/parrots/psamask_parrots.cpp
deleted file mode 100644
index f67102d02..000000000
--- a/mmcv/ops/csrc/parrots/psamask_parrots.cpp
+++ /dev/null
@@ -1,129 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "psamask_pytorch.h"
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void psamask_forward_cuda_parrots(CudaContext &ctx, const SSElement &attr,
-                                  const OperatorBase::in_list_t &ins,
-                                  OperatorBase::out_list_t &outs) {
-  int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask,
-      half_w_mask;
-  SSAttrs(attr)
-      .get<int>("psa_type", psa_type)
-      .get<int>("num_", num_)
-      .get<int>("h_feature", h_feature)
-      .get<int>("w_feature", w_feature)
-      .get<int>("h_mask", h_mask)
-      .get<int>("w_mask", w_mask)
-      .get<int>("half_h_mask", half_h_mask)
-      .get<int>("half_w_mask", half_w_mask)
-      .done();
-  const auto &input = buildATensor(ctx, ins[0]);
-  auto output = buildATensor(ctx, outs[0]);
-  psamask_forward_cuda(psa_type, input, output, num_, h_feature, w_feature,
-                       h_mask, w_mask, half_h_mask, half_w_mask);
-}
-
-void psamask_backward_cuda_parrots(CudaContext &ctx, const SSElement &attr,
-                                   const OperatorBase::in_list_t &ins,
-                                   OperatorBase::out_list_t &outs) {
-  int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask,
-      half_w_mask;
-  SSAttrs(attr)
-      .get<int>("psa_type", psa_type)
-      .get<int>("num_", num_)
-      .get<int>("h_feature", h_feature)
-      .get<int>("w_feature", w_feature)
-      .get<int>("h_mask", h_mask)
-      .get<int>("w_mask", w_mask)
-      .get<int>("half_h_mask", half_h_mask)
-      .get<int>("half_w_mask", half_w_mask)
-      .done();
-
-  const auto &grad_output = buildATensor(ctx, ins[0]);
-  auto grad_input = buildATensor(ctx, outs[0]);
-  psamask_backward_cuda(psa_type, grad_output, grad_input, num_, h_feature,
-                        w_feature, h_mask, w_mask, half_h_mask, half_w_mask);
-}
-#endif
-
-void psamask_forward_cpu_parrots(HostContext &ctx, const SSElement &attr,
-                                 const OperatorBase::in_list_t &ins,
-                                 OperatorBase::out_list_t &outs) {
-  int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask,
-      half_w_mask;
-  SSAttrs(attr)
-      .get<int>("psa_type", psa_type)
-      .get<int>("num_", num_)
-      .get<int>("h_feature", h_feature)
-      .get<int>("w_feature", w_feature)
-      .get<int>("h_mask", h_mask)
-      .get<int>("w_mask", w_mask)
-      .get<int>("half_h_mask", half_h_mask)
-      .get<int>("half_w_mask", half_w_mask)
-      .done();
-  const auto &input = buildATensor(ctx, ins[0]);
-  auto output = buildATensor(ctx, outs[0]);
-  psamask_forward_cpu(psa_type, input, output, num_, h_feature, w_feature,
-                      h_mask, w_mask, half_h_mask, half_w_mask);
-}
-
-void psamask_backward_cpu_parrots(HostContext &ctx, const SSElement &attr,
-                                  const OperatorBase::in_list_t &ins,
-                                  OperatorBase::out_list_t &outs) {
-  int psa_type, num_, h_feature, w_feature, h_mask, w_mask, half_h_mask,
-      half_w_mask;
-  SSAttrs(attr)
-      .get<int>("psa_type", psa_type)
-      .get<int>("num_", num_)
-      .get<int>("h_feature", h_feature)
-      .get<int>("w_feature", w_feature)
-      .get<int>("h_mask", h_mask)
-      .get<int>("w_mask", w_mask)
-      .get<int>("half_h_mask", half_h_mask)
-      .get<int>("half_w_mask", half_w_mask)
-      .done();
-
-  const auto &grad_output = buildATensor(ctx, ins[0]);
-  auto grad_input = buildATensor(ctx, outs[0]);
-  psamask_backward_cpu(psa_type, grad_output, grad_input, num_, h_feature,
-                       w_feature, h_mask, w_mask, half_h_mask, half_w_mask);
-}
-
-PARROTS_EXTENSION_REGISTER(psamask_forward)
-    .attr("psa_type")
-    .attr("num_")
-    .attr("h_feature")
-    .attr("w_feature")
-    .attr("h_mask")
-    .attr("w_mask")
-    .attr("half_h_mask")
-    .attr("half_w_mask")
-    .input(1)
-    .output(1)
-    .apply(psamask_forward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(psamask_forward_cuda_parrots)
-#endif
-    .done();
-
-PARROTS_EXTENSION_REGISTER(psamask_backward)
-    .attr("psa_type")
-    .attr("num_")
-    .attr("h_feature")
-    .attr("w_feature")
-    .attr("h_mask")
-    .attr("w_mask")
-    .attr("half_h_mask")
-    .attr("half_w_mask")
-    .input(1)
-    .output(1)
-    .apply(psamask_backward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(psamask_backward_cuda_parrots)
-#endif
-    .done();
diff --git a/mmcv/ops/csrc/parrots/psamask_pytorch.h b/mmcv/ops/csrc/parrots/psamask_pytorch.h
deleted file mode 100644
index c3f0579ef..000000000
--- a/mmcv/ops/csrc/parrots/psamask_pytorch.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef PSAMASK_PYTORCH_H
-#define PSAMASK_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-#ifdef MMCV_WITH_CUDA
-void psamask_forward_cuda(const int psa_type, const Tensor input, Tensor output,
-                          const int num_, const int h_feature,
-                          const int w_feature, const int h_mask,
-                          const int w_mask, const int half_h_mask,
-                          const int half_w_mask);
-
-void psamask_backward_cuda(const int psa_type, const Tensor grad_output,
-                           Tensor grad_input, const int num_,
-                           const int h_feature, const int w_feature,
-                           const int h_mask, const int w_mask,
-                           const int half_h_mask, const int half_w_mask);
-#endif
-void psamask_forward_cpu(const int psa_type, const Tensor input, Tensor output,
-                         const int num_, const int h_feature,
-                         const int w_feature, const int h_mask,
-                         const int w_mask, const int half_h_mask,
-                         const int half_w_mask);
-
-void psamask_backward_cpu(const int psa_type, const Tensor grad_output,
-                          Tensor grad_input, const int num_,
-                          const int h_feature, const int w_feature,
-                          const int h_mask, const int w_mask,
-                          const int half_h_mask, const int half_w_mask);
-#endif  // PSAMASK_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/riroi_align_rotated.cpp b/mmcv/ops/csrc/parrots/riroi_align_rotated.cpp
deleted file mode 100644
index 81ffa9fd6..000000000
--- a/mmcv/ops/csrc/parrots/riroi_align_rotated.cpp
+++ /dev/null
@@ -1,42 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void riroi_align_rotated_forward_impl(Tensor features, Tensor rois,
-                                      Tensor output, int pooled_height,
-                                      int pooled_width, float spatial_scale,
-                                      int num_samples, int num_orientations,
-                                      bool clockwise) {
-  DISPATCH_DEVICE_IMPL(riroi_align_rotated_forward_impl, features, rois, output,
-                       pooled_height, pooled_width, spatial_scale, num_samples,
-                       num_orientations, clockwise);
-}
-
-void riroi_align_rotated_backward_impl(Tensor top_grad, Tensor rois,
-                                       Tensor bottom_grad, int pooled_height,
-                                       int pooled_width, float spatial_scale,
-                                       int num_samples, int num_orientations,
-                                       bool clockwise) {
-  DISPATCH_DEVICE_IMPL(riroi_align_rotated_backward_impl, top_grad, rois,
-                       bottom_grad, pooled_height, pooled_width, spatial_scale,
-                       num_samples, num_orientations, clockwise);
-}
-
-void riroi_align_rotated_forward(Tensor features, Tensor rois, Tensor output,
-                                 int pooled_height, int pooled_width,
-                                 float spatial_scale, int num_samples,
-                                 int num_orientations, bool clockwise) {
-  riroi_align_rotated_forward_impl(features, rois, output, pooled_height,
-                                   pooled_width, spatial_scale, num_samples,
-                                   num_orientations, clockwise);
-}
-
-void riroi_align_rotated_backward(Tensor top_grad, Tensor rois,
-                                  Tensor bottom_grad, int pooled_height,
-                                  int pooled_width, float spatial_scale,
-                                  int num_samples, int num_orientations,
-                                  bool clockwise) {
-  riroi_align_rotated_backward_impl(top_grad, rois, bottom_grad, pooled_height,
-                                    pooled_width, spatial_scale, num_samples,
-                                    num_orientations, clockwise);
-}
diff --git a/mmcv/ops/csrc/parrots/riroi_align_rotated_parrots.cpp b/mmcv/ops/csrc/parrots/riroi_align_rotated_parrots.cpp
deleted file mode 100644
index 5eb340ce4..000000000
--- a/mmcv/ops/csrc/parrots/riroi_align_rotated_parrots.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "riroi_align_rotated_pytorch.h"
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void riroi_align_rotated_forward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  int pooled_height;
-  int pooled_width;
-  float spatial_scale;
-  int sample_num;
-  int num_orientations;
-  bool clockwise;
-  SSAttrs(attr)
-      .get<int>("pooled_height", pooled_height)
-      .get<int>("pooled_width", pooled_width)
-      .get<float>("spatial_scale", spatial_scale)
-      .get<int>("num_samples", sample_num)
-      .get<int>("num_orientations", num_orientations)
-      .get<bool>("clockwise", clockwise)
-      .done();
-
-  auto input = buildATensor(ctx, ins[0]);
-  auto rois = buildATensor(ctx, ins[1]);
-  auto output = buildATensor(ctx, outs[0]);
-  riroi_align_rotated_forward(input, rois, output, pooled_height, pooled_width,
-                              spatial_scale, sample_num, num_orientations,
-                              clockwise);
-}
-
-void riroi_align_rotated_backward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  int pooled_height;
-  int pooled_width;
-  float spatial_scale;
-  int sample_num;
-  int num_orientations;
-  bool clockwise;
-  SSAttrs(attr)
-      .get<int>("pooled_height", pooled_height)
-      .get<int>("pooled_width", pooled_width)
-      .get<float>("spatial_scale", spatial_scale)
-      .get<int>("num_samples", sample_num)
-      .get<int>("num_orientations", num_orientations)
-      .get<bool>("clockwise", clockwise)
-      .done();
-
-  auto grad_output = buildATensor(ctx, ins[0]);
-  auto rois = buildATensor(ctx, ins[1]);
-  auto grad_input = buildATensor(ctx, outs[0]);
-  riroi_align_rotated_backward(grad_output, rois, grad_input, pooled_height,
-                               pooled_width, spatial_scale, sample_num,
-                               num_orientations, clockwise);
-}
-
-PARROTS_EXTENSION_REGISTER(riroi_align_rotated_forward)
-    .attr("pooled_height")
-    .attr("pooled_width")
-    .attr("spatial_scale")
-    .attr("num_samples")
-    .attr("num_orientations")
-    .attr("clockwise")
-    .input(2)
-    .output(1)
-    .apply(riroi_align_rotated_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(riroi_align_rotated_backward)
-    .attr("pooled_height")
-    .attr("pooled_width")
-    .attr("spatial_scale")
-    .attr("num_samples")
-    .attr("num_orientations")
-    .attr("clockwise")
-    .input(2)
-    .output(1)
-    .apply(riroi_align_rotated_backward_cuda_parrots)
-    .done();
-
-#endif
diff --git a/mmcv/ops/csrc/parrots/riroi_align_rotated_pytorch.h b/mmcv/ops/csrc/parrots/riroi_align_rotated_pytorch.h
deleted file mode 100644
index 49a30bffa..000000000
--- a/mmcv/ops/csrc/parrots/riroi_align_rotated_pytorch.h
+++ /dev/null
@@ -1,18 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef RIROI_ALIGN_ROTATED_PYTORCH_H
-#define RIROI_ALIGN_ROTATED_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void riroi_align_rotated_forward(Tensor features, Tensor rois, Tensor output,
-                                 int pooled_height, int pooled_width,
-                                 float spatial_scale, int num_samples,
-                                 int num_orientations, bool clockwise);
-
-void riroi_align_rotated_backward(Tensor top_grad, Tensor rois,
-                                  Tensor bottom_grad, int pooled_height,
-                                  int pooled_width, float spatial_scale,
-                                  int num_samples, int num_orientations,
-                                  bool clockwise);
-
-#endif  // RIROI_ALIGN_ROTATED_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/roi_align.cpp b/mmcv/ops/csrc/parrots/roi_align.cpp
deleted file mode 100644
index 6e7077397..000000000
--- a/mmcv/ops/csrc/parrots/roi_align.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void roi_align_forward_impl(Tensor input, Tensor rois, Tensor output,
-                            Tensor argmax_y, Tensor argmax_x,
-                            int aligned_height, int aligned_width,
-                            float spatial_scale, int sampling_ratio,
-                            int pool_mode, bool aligned) {
-  DISPATCH_DEVICE_IMPL(roi_align_forward_impl, input, rois, output, argmax_y,
-                       argmax_x, aligned_height, aligned_width, spatial_scale,
-                       sampling_ratio, pool_mode, aligned);
-}
-
-void roi_align_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax_y,
-                             Tensor argmax_x, Tensor grad_input,
-                             int aligned_height, int aligned_width,
-                             float spatial_scale, int sampling_ratio,
-                             int pool_mode, bool aligned) {
-  DISPATCH_DEVICE_IMPL(roi_align_backward_impl, grad_output, rois, argmax_y,
-                       argmax_x, grad_input, aligned_height, aligned_width,
-                       spatial_scale, sampling_ratio, pool_mode, aligned);
-}
-
-void roi_align_forward(Tensor input, Tensor rois, Tensor output,
-                       Tensor argmax_y, Tensor argmax_x, int aligned_height,
-                       int aligned_width, float spatial_scale,
-                       int sampling_ratio, int pool_mode, bool aligned) {
-  roi_align_forward_impl(input, rois, output, argmax_y, argmax_x,
-                         aligned_height, aligned_width, spatial_scale,
-                         sampling_ratio, pool_mode, aligned);
-}
-
-void roi_align_backward(Tensor grad_output, Tensor rois, Tensor argmax_y,
-                        Tensor argmax_x, Tensor grad_input, int aligned_height,
-                        int aligned_width, float spatial_scale,
-                        int sampling_ratio, int pool_mode, bool aligned) {
-  roi_align_backward_impl(grad_output, rois, argmax_y, argmax_x, grad_input,
-                          aligned_height, aligned_width, spatial_scale,
-                          sampling_ratio, pool_mode, aligned);
-}
diff --git a/mmcv/ops/csrc/parrots/roi_align_parrots.cpp b/mmcv/ops/csrc/parrots/roi_align_parrots.cpp
deleted file mode 100644
index 60abea092..000000000
--- a/mmcv/ops/csrc/parrots/roi_align_parrots.cpp
+++ /dev/null
@@ -1,151 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "roi_align_pytorch.h"
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void roi_align_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                    const OperatorBase::in_list_t& ins,
-                                    OperatorBase::out_list_t& outs) {
-  int aligned_height;
-  int aligned_width;
-  float spatial_scale;
-  int sampling_ratio;
-  int pool_mode;
-  bool aligned;
-  SSAttrs(attr)
-      .get<int>("aligned_height", aligned_height)
-      .get<int>("aligned_width", aligned_width)
-      .get<float>("spatial_scale", spatial_scale)
-      .get<int>("sampling_ratio", sampling_ratio)
-      .get<int>("pool_mode", pool_mode)
-      .get<bool>("aligned", aligned)
-      .done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& rois = buildATensor(ctx, ins[1]);
-  auto output = buildATensor(ctx, outs[0]);
-  auto argmax_y = buildATensor(ctx, outs[1]);
-  auto argmax_x = buildATensor(ctx, outs[2]);
-  roi_align_forward_cuda(input, rois, output, argmax_y, argmax_x,
-                         aligned_height, aligned_width, spatial_scale,
-                         sampling_ratio, pool_mode, aligned);
-}
-
-void roi_align_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                     const OperatorBase::in_list_t& ins,
-                                     OperatorBase::out_list_t& outs) {
-  int aligned_height;
-  int aligned_width;
-  float spatial_scale;
-  int sampling_ratio;
-  int pool_mode;
-  bool aligned;
-  SSAttrs(attr)
-      .get<int>("aligned_height", aligned_height)
-      .get<int>("aligned_width", aligned_width)
-      .get<float>("spatial_scale", spatial_scale)
-      .get<int>("sampling_ratio", sampling_ratio)
-      .get<int>("pool_mode", pool_mode)
-      .get<bool>("aligned", aligned)
-      .done();
-
-  const auto& grad_output = buildATensor(ctx, ins[0]);
-  const auto& rois = buildATensor(ctx, ins[1]);
-  const auto& argmax_y = buildATensor(ctx, ins[2]);
-  const auto& argmax_x = buildATensor(ctx, ins[3]);
-  auto grad_input = buildATensor(ctx, outs[0]);
-  roi_align_backward_cuda(grad_output, rois, argmax_y, argmax_x, grad_input,
-                          aligned_height, aligned_width, spatial_scale,
-                          sampling_ratio, pool_mode, aligned);
-}
-#endif
-
-void roi_align_forward_cpu_parrots(HostContext& ctx, const SSElement& attr,
-                                   const OperatorBase::in_list_t& ins,
-                                   OperatorBase::out_list_t& outs) {
-  int aligned_height;
-  int aligned_width;
-  float spatial_scale;
-  int sampling_ratio;
-  int pool_mode;
-  bool aligned;
-  SSAttrs(attr)
-      .get<int>("aligned_height", aligned_height)
-      .get<int>("aligned_width", aligned_width)
-      .get<float>("spatial_scale", spatial_scale)
-      .get<int>("sampling_ratio", sampling_ratio)
-      .get<int>("pool_mode", pool_mode)
-      .get<bool>("aligned", aligned)
-      .done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& rois = buildATensor(ctx, ins[1]);
-  auto output = buildATensor(ctx, outs[0]);
-  auto argmax_y = buildATensor(ctx, outs[1]);
-  auto argmax_x = buildATensor(ctx, outs[2]);
-  roi_align_forward_cpu(input, rois, output, argmax_y, argmax_x, aligned_height,
-                        aligned_width, spatial_scale, sampling_ratio, pool_mode,
-                        aligned);
-}
-
-void roi_align_backward_cpu_parrots(HostContext& ctx, const SSElement& attr,
-                                    const OperatorBase::in_list_t& ins,
-                                    OperatorBase::out_list_t& outs) {
-  int aligned_height;
-  int aligned_width;
-  float spatial_scale;
-  int sampling_ratio;
-  int pool_mode;
-  bool aligned;
-  SSAttrs(attr)
-      .get<int>("aligned_height", aligned_height)
-      .get<int>("aligned_width", aligned_width)
-      .get<float>("spatial_scale", spatial_scale)
-      .get<int>("sampling_ratio", sampling_ratio)
-      .get<int>("pool_mode", pool_mode)
-      .get<bool>("aligned", aligned)
-      .done();
-
-  const auto& grad_output = buildATensor(ctx, ins[0]);
-  const auto& rois = buildATensor(ctx, ins[1]);
-  const auto& argmax_y = buildATensor(ctx, ins[2]);
-  const auto& argmax_x = buildATensor(ctx, ins[3]);
-  auto grad_input = buildATensor(ctx, outs[0]);
-  roi_align_backward_cpu(grad_output, rois, argmax_y, argmax_x, grad_input,
-                         aligned_height, aligned_width, spatial_scale,
-                         sampling_ratio, pool_mode, aligned);
-}
-
-PARROTS_EXTENSION_REGISTER(roi_align_forward)
-    .attr("aligned_height")
-    .attr("aligned_width")
-    .attr("spatial_scale")
-    .attr("sampling_ratio")
-    .attr("pool_mode")
-    .attr("aligned")
-    .input(2)
-    .output(3)
-    .apply(roi_align_forward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(roi_align_forward_cuda_parrots)
-#endif
-    .done();
-
-PARROTS_EXTENSION_REGISTER(roi_align_backward)
-    .attr("aligned_height")
-    .attr("aligned_width")
-    .attr("spatial_scale")
-    .attr("sampling_ratio")
-    .attr("pool_mode")
-    .attr("aligned")
-    .input(4)
-    .output(1)
-    .apply(roi_align_backward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(roi_align_backward_cuda_parrots)
-#endif
-    .done();
diff --git a/mmcv/ops/csrc/parrots/roi_align_pytorch.h b/mmcv/ops/csrc/parrots/roi_align_pytorch.h
deleted file mode 100644
index 4c6016098..000000000
--- a/mmcv/ops/csrc/parrots/roi_align_pytorch.h
+++ /dev/null
@@ -1,32 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef ROI_ALIGN_PYTORCH_H
-#define ROI_ALIGN_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-#ifdef MMCV_WITH_CUDA
-void roi_align_forward_cuda(Tensor input, Tensor rois, Tensor output,
-                            Tensor argmax_y, Tensor argmax_x,
-                            int aligned_height, int aligned_width,
-                            float spatial_scale, int sampling_ratio,
-                            int pool_mode, bool aligned);
-
-void roi_align_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax_y,
-                             Tensor argmax_x, Tensor grad_input,
-                             int aligned_height, int aligned_width,
-                             float spatial_scale, int sampling_ratio,
-                             int pool_mode, bool aligned);
-#endif
-
-void roi_align_forward_cpu(Tensor input, Tensor rois, Tensor output,
-                           Tensor argmax_y, Tensor argmax_x, int aligned_height,
-                           int aligned_width, float spatial_scale,
-                           int sampling_ratio, int pool_mode, bool aligned);
-
-void roi_align_backward_cpu(Tensor grad_output, Tensor rois, Tensor argmax_y,
-                            Tensor argmax_x, Tensor grad_input,
-                            int aligned_height, int aligned_width,
-                            float spatial_scale, int sampling_ratio,
-                            int pool_mode, bool aligned);
-
-#endif  // ROI_ALIGN_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/roi_align_rotated.cpp b/mmcv/ops/csrc/parrots/roi_align_rotated.cpp
deleted file mode 100644
index 5ef691ada..000000000
--- a/mmcv/ops/csrc/parrots/roi_align_rotated.cpp
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void roi_align_rotated_forward_impl(Tensor features, Tensor rois, Tensor output,
-                                    int aligned_height, int aligned_width,
-                                    float spatial_scale, int sample_ratio,
-                                    bool aligned, bool clockwise) {
-  DISPATCH_DEVICE_IMPL(roi_align_rotated_forward_impl, features, rois, output,
-                       aligned_height, aligned_width, spatial_scale,
-                       sample_ratio, aligned, clockwise);
-}
-
-void roi_align_rotated_backward_impl(Tensor top_grad, Tensor rois,
-                                     Tensor bottom_grad, int aligned_height,
-                                     int aligned_width, float spatial_scale,
-                                     int sample_ratio, bool aligned,
-                                     bool clockwise) {
-  DISPATCH_DEVICE_IMPL(roi_align_rotated_backward_impl, top_grad, rois,
-                       bottom_grad, aligned_height, aligned_width,
-                       spatial_scale, sample_ratio, aligned, clockwise);
-}
-
-void roi_align_rotated_forward(Tensor input, Tensor rois, Tensor output,
-                               int aligned_height, int aligned_width,
-                               float spatial_scale, int sampling_ratio,
-                               bool aligned, bool clockwise) {
-  roi_align_rotated_forward_impl(input, rois, output, aligned_height,
-                                 aligned_width, spatial_scale, sampling_ratio,
-                                 aligned, clockwise);
-}
-
-void roi_align_rotated_backward(Tensor top_grad, Tensor rois,
-                                Tensor bottom_grad, int aligned_height,
-                                int aligned_width, float spatial_scale,
-                                int sampling_ratio, bool aligned,
-                                bool clockwise) {
-  roi_align_rotated_backward_impl(top_grad, rois, bottom_grad, aligned_height,
-                                  aligned_width, spatial_scale, sampling_ratio,
-                                  aligned, clockwise);
-}
diff --git a/mmcv/ops/csrc/parrots/roi_align_rotated_parrots.cpp b/mmcv/ops/csrc/parrots/roi_align_rotated_parrots.cpp
deleted file mode 100644
index 9386250a2..000000000
--- a/mmcv/ops/csrc/parrots/roi_align_rotated_parrots.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "roi_align_rotated_pytorch.h"
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void roi_align_rotated_forward_cuda_parrots(CudaContext& ctx,
-                                            const SSElement& attr,
-                                            const OperatorBase::in_list_t& ins,
-                                            OperatorBase::out_list_t& outs) {
-  int pooled_height;
-  int pooled_width;
-  float spatial_scale;
-  int sampling_ratio;
-  bool aligned;
-  bool clockwise;
-  SSAttrs(attr)
-      .get<int>("pooled_height", pooled_height)
-      .get<int>("pooled_width", pooled_width)
-      .get<float>("spatial_scale", spatial_scale)
-      .get<int>("sampling_ratio", sampling_ratio)
-      .get<bool>("aligned", aligned)
-      .get<bool>("clockwise", clockwise)
-      .done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& rois = buildATensor(ctx, ins[1]);
-  auto output = buildATensor(ctx, outs[0]);
-  roi_align_rotated_forward_cuda(input, rois, output, pooled_height,
-                                 pooled_width, spatial_scale, sampling_ratio,
-                                 aligned, clockwise);
-}
-
-void roi_align_rotated_backward_cuda_parrots(CudaContext& ctx,
-                                             const SSElement& attr,
-                                             const OperatorBase::in_list_t& ins,
-                                             OperatorBase::out_list_t& outs) {
-  int pooled_height;
-  int pooled_width;
-  float spatial_scale;
-  int sampling_ratio;
-  bool aligned;
-  bool clockwise;
-  SSAttrs(attr)
-      .get<int>("pooled_height", pooled_height)
-      .get<int>("pooled_width", pooled_width)
-      .get<float>("spatial_scale", spatial_scale)
-      .get<int>("sampling_ratio", sampling_ratio)
-      .get<bool>("aligned", aligned)
-      .get<bool>("clockwise", clockwise)
-      .done();
-
-  const auto& grad_output = buildATensor(ctx, ins[0]);
-  const auto& rois = buildATensor(ctx, ins[1]);
-  auto grad_input = buildATensor(ctx, outs[0]);
-  roi_align_rotated_backward_cuda(grad_output, rois, grad_input, pooled_height,
-                                  pooled_width, spatial_scale, sampling_ratio,
-                                  aligned, clockwise);
-}
-#endif
-
-void roi_align_rotated_forward_cpu_parrots(HostContext& ctx,
-                                           const SSElement& attr,
-                                           const OperatorBase::in_list_t& ins,
-                                           OperatorBase::out_list_t& outs) {
-  int pooled_height;
-  int pooled_width;
-  float spatial_scale;
-  int sampling_ratio;
-  bool aligned;
-  bool clockwise;
-  SSAttrs(attr)
-      .get<int>("pooled_height", pooled_height)
-      .get<int>("pooled_width", pooled_width)
-      .get<float>("spatial_scale", spatial_scale)
-      .get<int>("sampling_ratio", sampling_ratio)
-      .get<bool>("aligned", aligned)
-      .get<bool>("clockwise", clockwise)
-      .done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& rois = buildATensor(ctx, ins[1]);
-  auto output = buildATensor(ctx, outs[0]);
-  roi_align_rotated_forward_cpu(input, rois, output, pooled_height,
-                                pooled_width, spatial_scale, sampling_ratio,
-                                aligned, clockwise);
-}
-
-void roi_align_rotated_backward_cpu_parrots(HostContext& ctx,
-                                            const SSElement& attr,
-                                            const OperatorBase::in_list_t& ins,
-                                            OperatorBase::out_list_t& outs) {
-  int pooled_height;
-  int pooled_width;
-  float spatial_scale;
-  int sampling_ratio;
-  bool aligned;
-  bool clockwise;
-  SSAttrs(attr)
-      .get<int>("pooled_height", pooled_height)
-      .get<int>("pooled_width", pooled_width)
-      .get<float>("spatial_scale", spatial_scale)
-      .get<int>("sampling_ratio", sampling_ratio)
-      .get<bool>("aligned", aligned)
-      .get<bool>("clockwise", clockwise)
-      .done();
-
-  const auto& grad_output = buildATensor(ctx, ins[0]);
-  const auto& rois = buildATensor(ctx, ins[1]);
-  auto grad_input = buildATensor(ctx, outs[0]);
-  roi_align_rotated_backward_cpu(grad_output, rois, grad_input, pooled_height,
-                                 pooled_width, spatial_scale, sampling_ratio,
-                                 aligned, clockwise);
-}
-
-PARROTS_EXTENSION_REGISTER(roi_align_rotated_forward)
-    .attr("pooled_height")
-    .attr("pooled_width")
-    .attr("spatial_scale")
-    .attr("sampling_ratio")
-    .attr("aligned")
-    .attr("clockwise")
-    .input(2)
-    .output(1)
-    .apply(roi_align_rotated_forward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(roi_align_rotated_forward_cuda_parrots)
-#endif
-    .done();
-
-PARROTS_EXTENSION_REGISTER(roi_align_rotated_backward)
-    .attr("pooled_height")
-    .attr("pooled_width")
-    .attr("spatial_scale")
-    .attr("sampling_ratio")
-    .attr("aligned")
-    .attr("clockwise")
-    .input(2)
-    .output(1)
-    .apply(roi_align_rotated_backward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(roi_align_rotated_backward_cuda_parrots)
-#endif
-    .done();
diff --git a/mmcv/ops/csrc/parrots/roi_align_rotated_pytorch.h b/mmcv/ops/csrc/parrots/roi_align_rotated_pytorch.h
deleted file mode 100644
index 8136b56d1..000000000
--- a/mmcv/ops/csrc/parrots/roi_align_rotated_pytorch.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef ROI_ALIGN_ROTATED_PYTORCH_H
-#define ROI_ALIGN_ROTATED_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-#ifdef MMCV_WITH_CUDA
-void roi_align_rotated_forward_cuda(Tensor input, Tensor rois, Tensor output,
-                                    int pooled_height, int pooled_width,
-                                    float spatial_scale, int sampling_ratio,
-                                    bool aligned, bool clockwise);
-
-void roi_align_rotated_backward_cuda(Tensor grad_output, Tensor rois,
-                                     Tensor bottom_grad, int pooled_height,
-                                     int pooled_width, float spatial_scale,
-                                     int sampling_ratio, bool aligned,
-                                     bool clockwise);
-#endif
-
-void roi_align_rotated_forward_cpu(Tensor input, Tensor rois, Tensor output,
-                                   int pooled_height, int pooled_width,
-                                   float spatial_scale, int sampling_ratio,
-                                   bool aligned, bool clockwise);
-
-void roi_align_rotated_backward_cpu(Tensor grad_output, Tensor rois,
-                                    Tensor bottom_grad, int pooled_height,
-                                    int pooled_width, float spatial_scale,
-                                    int sampling_ratio, bool aligned,
-                                    bool clockwise);
-
-#endif  // ROI_ALIGN_ROTATED_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/roi_pool.cpp b/mmcv/ops/csrc/parrots/roi_pool.cpp
deleted file mode 100644
index bba90b806..000000000
--- a/mmcv/ops/csrc/parrots/roi_pool.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output,
-                           Tensor argmax, int pooled_height, int pooled_width,
-                           float spatial_scale) {
-  DISPATCH_DEVICE_IMPL(roi_pool_forward_impl, input, rois, output, argmax,
-                       pooled_height, pooled_width, spatial_scale);
-}
-
-void roi_pool_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax,
-                            Tensor grad_input, int pooled_height,
-                            int pooled_width, float spatial_scale) {
-  DISPATCH_DEVICE_IMPL(roi_pool_backward_impl, grad_output, rois, argmax,
-                       grad_input, pooled_height, pooled_width, spatial_scale);
-}
-
-void roi_pool_forward(Tensor input, Tensor rois, Tensor output, Tensor argmax,
-                      int pooled_height, int pooled_width,
-                      float spatial_scale) {
-  roi_pool_forward_impl(input, rois, output, argmax, pooled_height,
-                        pooled_width, spatial_scale);
-}
-
-void roi_pool_backward(Tensor grad_output, Tensor rois, Tensor argmax,
-                       Tensor grad_input, int pooled_height, int pooled_width,
-                       float spatial_scale) {
-  roi_pool_backward_impl(grad_output, rois, argmax, grad_input, pooled_height,
-                         pooled_width, spatial_scale);
-}
diff --git a/mmcv/ops/csrc/parrots/roi_pool_parrots.cpp b/mmcv/ops/csrc/parrots/roi_pool_parrots.cpp
deleted file mode 100644
index 0acde4a41..000000000
--- a/mmcv/ops/csrc/parrots/roi_pool_parrots.cpp
+++ /dev/null
@@ -1,67 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "roi_pool_pytorch.h"
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void roi_pool_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                   const OperatorBase::in_list_t& ins,
-                                   OperatorBase::out_list_t& outs) {
-  int pooled_height;
-  int pooled_width;
-  float spatial_scale;
-  SSAttrs(attr)
-      .get<int>("pooled_height", pooled_height)
-      .get<int>("pooled_width", pooled_width)
-      .get<float>("spatial_scale", spatial_scale)
-      .done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& rois = buildATensor(ctx, ins[1]);
-  auto output = buildATensor(ctx, outs[0]);
-  auto argmax = buildATensor(ctx, outs[1]);
-  roi_pool_forward_cuda(input, rois, output, argmax, pooled_height,
-                        pooled_width, spatial_scale);
-}
-
-void roi_pool_backward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                    const OperatorBase::in_list_t& ins,
-                                    OperatorBase::out_list_t& outs) {
-  int pooled_height;
-  int pooled_width;
-  float spatial_scale;
-  SSAttrs(attr)
-      .get<int>("pooled_height", pooled_height)
-      .get<int>("pooled_width", pooled_width)
-      .get<float>("spatial_scale", spatial_scale)
-      .done();
-
-  const auto& grad_output = buildATensor(ctx, ins[0]);
-  const auto& rois = buildATensor(ctx, ins[1]);
-  const auto& argmax = buildATensor(ctx, ins[2]);
-  auto grad_input = buildATensor(ctx, outs[0]);
-  roi_pool_backward_cuda(grad_output, rois, argmax, grad_input, pooled_height,
-                         pooled_width, spatial_scale);
-}
-
-PARROTS_EXTENSION_REGISTER(roi_pool_forward)
-    .attr("pooled_height")
-    .attr("pooled_width")
-    .attr("spatial_scale")
-    .input(2)
-    .output(2)
-    .apply(roi_pool_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(roi_pool_backward)
-    .attr("pooled_height")
-    .attr("pooled_width")
-    .attr("spatial_scale")
-    .input(3)
-    .output(1)
-    .apply(roi_pool_backward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/roi_pool_pytorch.h b/mmcv/ops/csrc/parrots/roi_pool_pytorch.h
deleted file mode 100644
index d67a1502f..000000000
--- a/mmcv/ops/csrc/parrots/roi_pool_pytorch.h
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef ROI_POOL_PYTORCH_H
-#define ROI_POOL_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-#ifdef MMCV_WITH_CUDA
-void roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor output,
-                           Tensor argmax, int pooled_height, int pooled_width,
-                           float spatial_scale);
-
-void roi_pool_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax,
-                            Tensor grad_input, int pooled_height,
-                            int pooled_width, float spatial_scale);
-#endif
-#endif  // ROI_POOL_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/roiaware_pool3d.cpp b/mmcv/ops/csrc/parrots/roiaware_pool3d.cpp
deleted file mode 100644
index 6cf9cf094..000000000
--- a/mmcv/ops/csrc/parrots/roiaware_pool3d.cpp
+++ /dev/null
@@ -1,72 +0,0 @@
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void roiaware_pool3d_forward_impl(int boxes_num, int pts_num, int channels,
-                                  int max_pts_each_voxel, int out_x, int out_y,
-                                  int out_z, const Tensor rois,
-                                  const Tensor pts, const Tensor pts_feature,
-                                  Tensor argmax, Tensor pts_idx_of_voxels,
-                                  Tensor pooled_features, int pool_method) {
-  DISPATCH_DEVICE_IMPL(roiaware_pool3d_forward_impl, boxes_num, pts_num,
-                       channels, max_pts_each_voxel, out_x, out_y, out_z, rois,
-                       pts, pts_feature, argmax, pts_idx_of_voxels,
-                       pooled_features, pool_method);
-}
-
-void roiaware_pool3d_backward_impl(int boxes_num, int out_x, int out_y,
-                                   int out_z, int channels,
-                                   int max_pts_each_voxel,
-                                   const Tensor pts_idx_of_voxels,
-                                   const Tensor argmax, const Tensor grad_out,
-                                   Tensor grad_in, int pool_method) {
-  DISPATCH_DEVICE_IMPL(roiaware_pool3d_backward_impl, boxes_num, out_x, out_y,
-                       out_z, channels, max_pts_each_voxel, pts_idx_of_voxels,
-                       argmax, grad_out, grad_in, pool_method);
-}
-
-void roiaware_pool3d_forward(Tensor rois, Tensor pts, Tensor pts_feature,
-                             Tensor argmax, Tensor pts_idx_of_voxels,
-                             Tensor pooled_features, int pool_method) {
-  // params rois: (N, 7) [x, y, z, x_size, y_size, z_size, ry] in LiDAR
-  // coordinate
-  // params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
-  // params pts_feature: (npoints, C)
-  // params argmax: (N, out_x, out_y, out_z, C)
-  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
-  // params pooled_features: (N, out_x, out_y, out_z, C)
-  // params pool_method: 0: max_pool 1: avg_pool
-  int boxes_num = rois.size(0);
-  int pts_num = pts.size(0);
-  int channels = pts_feature.size(1);
-  int max_pts_each_voxel = pts_idx_of_voxels.size(4);  // index 0 is the counter
-  int out_x = pts_idx_of_voxels.size(1);
-  int out_y = pts_idx_of_voxels.size(2);
-  int out_z = pts_idx_of_voxels.size(3);
-  assert((out_x < 256) && (out_y < 256) &&
-         (out_z < 256));  // we encode index with 8bit
-
-  roiaware_pool3d_forward_impl(boxes_num, pts_num, channels, max_pts_each_voxel,
-                               out_x, out_y, out_z, rois, pts, pts_feature,
-                               argmax, pts_idx_of_voxels, pooled_features,
-                               pool_method);
-}
-
-void roiaware_pool3d_backward(Tensor pts_idx_of_voxels, Tensor argmax,
-                              Tensor grad_out, Tensor grad_in,
-                              int pool_method) {
-  // params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
-  // params argmax: (N, out_x, out_y, out_z, C)
-  // params grad_out: (N, out_x, out_y, out_z, C)
-  // params grad_in: (npoints, C), return value
-  // params pool_method: 0: max_pool 1: avg_pool
-  int boxes_num = pts_idx_of_voxels.size(0);
-  int out_x = pts_idx_of_voxels.size(1);
-  int out_y = pts_idx_of_voxels.size(2);
-  int out_z = pts_idx_of_voxels.size(3);
-  int max_pts_each_voxel = pts_idx_of_voxels.size(4);  // index 0 is the counter
-  int channels = grad_out.size(4);
-
-  roiaware_pool3d_backward_impl(boxes_num, out_x, out_y, out_z, channels,
-                                max_pts_each_voxel, pts_idx_of_voxels, argmax,
-                                grad_out, grad_in, pool_method);
-}
diff --git a/mmcv/ops/csrc/parrots/roiaware_pool3d_parrots.cpp b/mmcv/ops/csrc/parrots/roiaware_pool3d_parrots.cpp
deleted file mode 100644
index 771d92004..000000000
--- a/mmcv/ops/csrc/parrots/roiaware_pool3d_parrots.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "roiaware_pool3d_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void roiaware_pool3d_forward_cuda_parrots(CudaContext& ctx,
-                                          const SSElement& attr,
-                                          const OperatorBase::in_list_t& ins,
-                                          OperatorBase::out_list_t& outs) {
-  int pool_method;
-  SSAttrs(attr).get<int>("pool_method", pool_method).done();
-  auto rois = buildATensor(ctx, ins[0]);
-  auto pts = buildATensor(ctx, ins[1]);
-  auto pts_feature = buildATensor(ctx, ins[2]);
-
-  auto argmax = buildATensor(ctx, outs[0]);
-  auto pts_idx_of_voxels = buildATensor(ctx, outs[1]);
-  auto pooled_features = buildATensor(ctx, outs[2]);
-
-  roiaware_pool3d_forward(rois, pts, pts_feature, argmax, pts_idx_of_voxels,
-                          pooled_features, pool_method);
-}
-
-void roiaware_pool3d_backward_cuda_parrots(CudaContext& ctx,
-                                           const SSElement& attr,
-                                           const OperatorBase::in_list_t& ins,
-                                           OperatorBase::out_list_t& outs) {
-  int pool_method;
-  SSAttrs(attr).get<int>("pool_method", pool_method).done();
-  auto pts_idx_of_voxels = buildATensor(ctx, ins[0]);
-  auto argmax = buildATensor(ctx, ins[1]);
-  auto grad_out = buildATensor(ctx, ins[2]);
-
-  auto grad_in = buildATensor(ctx, outs[0]);
-
-  roiaware_pool3d_backward(pts_idx_of_voxels, argmax, grad_out, grad_in,
-                           pool_method);
-}
-
-PARROTS_EXTENSION_REGISTER(roiaware_pool3d_forward)
-    .attr("pool_method")
-    .input(3)
-    .output(3)
-    .apply(roiaware_pool3d_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(roiaware_pool3d_backward)
-    .attr("pool_method")
-    .input(3)
-    .output(1)
-    .apply(roiaware_pool3d_backward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/roiaware_pool3d_pytorch.h b/mmcv/ops/csrc/parrots/roiaware_pool3d_pytorch.h
deleted file mode 100644
index 0b4b0402a..000000000
--- a/mmcv/ops/csrc/parrots/roiaware_pool3d_pytorch.h
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef ROIAWARE_POOL3D_PYTORCH_H
-#define ROIAWARE_POOL3D_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void roiaware_pool3d_forward(Tensor rois, Tensor pts, Tensor pts_feature,
-                             Tensor argmax, Tensor pts_idx_of_voxels,
-                             Tensor pooled_features, int pool_method);
-
-void roiaware_pool3d_backward(Tensor pts_idx_of_voxels, Tensor argmax,
-                              Tensor grad_out, Tensor grad_in, int pool_method);
-
-#endif  // ROIAWARE_POOL3D_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/roipoint_pool3d.cpp b/mmcv/ops/csrc/parrots/roipoint_pool3d.cpp
deleted file mode 100644
index a10080b7c..000000000
--- a/mmcv/ops/csrc/parrots/roipoint_pool3d.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
-Modified from
-https://github.com/open-mmlab/OpenPCDet/blob/master/pcdet/ops/roipoint_pool3d/src/roipoint_pool3d.cpp
-Point cloud feature pooling
-Written by Shaoshuai Shi
-All Rights Reserved 2018.
-*/
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void roipoint_pool3d_forward_impl(int batch_size, int pts_num, int boxes_num,
-                                  int feature_in_len, int sampled_pts_num,
-                                  const Tensor xyz, const Tensor boxes3d,
-                                  const Tensor pts_feature,
-                                  Tensor pooled_features,
-                                  Tensor pooled_empty_flag) {
-  DISPATCH_DEVICE_IMPL(roipoint_pool3d_forward_impl, batch_size, pts_num,
-                       boxes_num, feature_in_len, sampled_pts_num, xyz, boxes3d,
-                       pts_feature, pooled_features, pooled_empty_flag);
-}
-
-void roipoint_pool3d_forward(Tensor xyz, Tensor boxes3d, Tensor pts_feature,
-                             Tensor pooled_features, Tensor pooled_empty_flag) {
-  // params xyz: (B, N, 3)
-  // params boxes3d: (B, M, 7)
-  // params pts_feature: (B, N, C)
-  // params pooled_features: (B, M, 512, 3+C)
-  // params pooled_empty_flag: (B, M)
-  int batch_size = xyz.size(0);
-  int pts_num = xyz.size(1);
-  int boxes_num = boxes3d.size(1);
-  int feature_in_len = pts_feature.size(2);
-  int sampled_pts_num = pooled_features.size(2);
-
-  roipoint_pool3d_forward_impl(batch_size, pts_num, boxes_num, feature_in_len,
-                               sampled_pts_num, xyz, boxes3d, pts_feature,
-                               pooled_features, pooled_empty_flag);
-}
diff --git a/mmcv/ops/csrc/parrots/roipoint_pool3d_parrots.cpp b/mmcv/ops/csrc/parrots/roipoint_pool3d_parrots.cpp
deleted file mode 100644
index 17f549849..000000000
--- a/mmcv/ops/csrc/parrots/roipoint_pool3d_parrots.cpp
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "roipoint_pool3d_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void roipoint_pool3d_forward_cuda_parrots(CudaContext& ctx,
-                                          const SSElement& attr,
-                                          const OperatorBase::in_list_t& ins,
-                                          OperatorBase::out_list_t& outs) {
-  auto xyz = buildATensor(ctx, ins[0]);
-  auto boxes3d = buildATensor(ctx, ins[1]);
-  auto pts_feature = buildATensor(ctx, ins[2]);
-
-  auto pooled_features = buildATensor(ctx, outs[0]);
-  auto pooled_empty_flag = buildATensor(ctx, outs[1]);
-
-  roipoint_pool3d_forward(xyz, boxes3d, pts_feature, pooled_features,
-                          pooled_empty_flag);
-}
-
-PARROTS_EXTENSION_REGISTER(roipoint_pool3d_forward)
-    .input(3)
-    .output(2)
-    .apply(roipoint_pool3d_forward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/roipoint_pool3d_pytorch.h b/mmcv/ops/csrc/parrots/roipoint_pool3d_pytorch.h
deleted file mode 100644
index e5b61b0d9..000000000
--- a/mmcv/ops/csrc/parrots/roipoint_pool3d_pytorch.h
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef ROIPOINT_POOL3D_PYTORCH_H
-#define ROIPOINT_POOL3D_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void roipoint_pool3d_forward(Tensor xyz, Tensor boxes3d, Tensor pts_feature,
-                             Tensor pooled_features, Tensor pooled_empty_flag);
-
-#endif  // ROIPOINT_POOL3D_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/rotated_feature_align.cpp b/mmcv/ops/csrc/parrots/rotated_feature_align.cpp
deleted file mode 100644
index 71fe0c9a0..000000000
--- a/mmcv/ops/csrc/parrots/rotated_feature_align.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-// Modified from
-// https://github.com/SJTU-Thinklab-Det/r3det-on-mmdetection/blob/master/mmdet/ops/fr/src/feature_refine_cuda.cpp
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void rotated_feature_align_forward_impl(const Tensor features,
-                                        const Tensor best_bboxes,
-                                        const float spatial_scale,
-                                        const int points, Tensor output) {
-  DISPATCH_DEVICE_IMPL(rotated_feature_align_forward_impl, features,
-                       best_bboxes, spatial_scale, points, output);
-}
-
-void rotated_feature_align_backward_impl(const Tensor top_grad,
-                                         const Tensor best_bboxes,
-                                         const float spatial_scale,
-                                         const int points, Tensor bottom_grad) {
-  DISPATCH_DEVICE_IMPL(rotated_feature_align_backward_impl, top_grad,
-                       best_bboxes, spatial_scale, points, bottom_grad);
-}
-
-void rotated_feature_align_forward(const Tensor features,
-                                   const Tensor best_bboxes, Tensor output,
-                                   const float spatial_scale,
-                                   const int points) {
-  rotated_feature_align_forward_impl(features, best_bboxes, spatial_scale,
-                                     points, output);
-}
-
-void rotated_feature_align_backward(const Tensor top_grad,
-                                    const Tensor best_bboxes,
-                                    Tensor bottom_grad,
-                                    const float spatial_scale,
-                                    const int points) {
-  rotated_feature_align_backward_impl(top_grad, best_bboxes, spatial_scale,
-                                      points, bottom_grad);
-}
diff --git a/mmcv/ops/csrc/parrots/rotated_feature_align_parrots.cpp b/mmcv/ops/csrc/parrots/rotated_feature_align_parrots.cpp
deleted file mode 100644
index d4efaf1d3..000000000
--- a/mmcv/ops/csrc/parrots/rotated_feature_align_parrots.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "rotated_feature_align_pytorch.h"
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void rotated_feature_align_forward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  float spatial_scale;
-  int points;
-  SSAttrs(attr)
-      .get<float>("spatial_scale", spatial_scale)
-      .get<int>("points", points)
-      .done();
-
-  auto features = buildATensor(ctx, ins[0]);
-  auto best_bboxes = buildATensor(ctx, ins[1]);
-  auto output = buildATensor(ctx, outs[0]);
-  rotated_feature_align_forward(features, best_bboxes, output, spatial_scale,
-                                points);
-}
-
-void rotated_feature_align_backward_cuda_parrots(
-    CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  float spatial_scale;
-  int points;
-  SSAttrs(attr)
-      .get<float>("spatial_scale", spatial_scale)
-      .get<int>("points", points)
-      .done();
-
-  auto grad_output = buildATensor(ctx, ins[0]);
-  auto best_bboxes = buildATensor(ctx, ins[1]);
-  auto grad_input = buildATensor(ctx, outs[0]);
-  rotated_feature_align_backward(grad_output, best_bboxes, grad_input,
-                                 spatial_scale, points);
-}
-#endif
-
-void rotated_feature_align_forward_cpu_parrots(
-    HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  float spatial_scale;
-  int points;
-  SSAttrs(attr)
-      .get<float>("spatial_scale", spatial_scale)
-      .get<int>("points", points)
-      .done();
-
-  auto features = buildATensor(ctx, ins[0]);
-  auto best_bboxes = buildATensor(ctx, ins[1]);
-  auto output = buildATensor(ctx, outs[0]);
-  rotated_feature_align_forward(features, best_bboxes, output, spatial_scale,
-                                points);
-}
-
-void rotated_feature_align_backward_cpu_parrots(
-    HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
-    OperatorBase::out_list_t& outs) {
-  float spatial_scale;
-  int points;
-  SSAttrs(attr)
-      .get<float>("spatial_scale", spatial_scale)
-      .get<int>("points", points)
-      .done();
-
-  auto grad_output = buildATensor(ctx, ins[0]);
-  auto best_bboxes = buildATensor(ctx, ins[1]);
-  auto grad_input = buildATensor(ctx, outs[0]);
-  rotated_feature_align_backward(grad_output, best_bboxes, grad_input,
-                                 spatial_scale, points);
-}
-
-PARROTS_EXTENSION_REGISTER(rotated_feature_align_forward)
-    .attr("spatial_scale")
-    .attr("points")
-    .input(2)
-    .output(1)
-    .apply(rotated_feature_align_forward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(rotated_feature_align_forward_cuda_parrots)
-#endif
-    .done();
-
-PARROTS_EXTENSION_REGISTER(rotated_feature_align_backward)
-    .attr("spatial_scale")
-    .attr("points")
-    .input(2)
-    .output(1)
-    .apply(rotated_feature_align_backward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(rotated_feature_align_backward_cuda_parrots)
-#endif
-    .done();
diff --git a/mmcv/ops/csrc/parrots/rotated_feature_align_pytorch.h b/mmcv/ops/csrc/parrots/rotated_feature_align_pytorch.h
deleted file mode 100644
index 9a695ee5e..000000000
--- a/mmcv/ops/csrc/parrots/rotated_feature_align_pytorch.h
+++ /dev/null
@@ -1,17 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef ROTATED_FEATURE_ALIGN_PYTORCH_H
-#define ROTATED_FEATURE_ALIGN_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void rotated_feature_align_forward(const Tensor features,
-                                   const Tensor best_bboxes, Tensor output,
-                                   const float spatial_scale, const int points);
-
-void rotated_feature_align_backward(const Tensor top_grad,
-                                    const Tensor best_bboxes,
-                                    Tensor bottom_grad,
-                                    const float spatial_scale,
-                                    const int points);
-
-#endif  // ROTATED_FEATURE_ALIGN_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/sync_bn.cpp b/mmcv/ops/csrc/parrots/sync_bn.cpp
deleted file mode 100644
index fd5a51327..000000000
--- a/mmcv/ops/csrc/parrots/sync_bn.cpp
+++ /dev/null
@@ -1,69 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void sync_bn_forward_mean_impl(const Tensor input, Tensor mean) {
-  DISPATCH_DEVICE_IMPL(sync_bn_forward_mean_impl, input, mean);
-}
-
-void sync_bn_forward_var_impl(const Tensor input, const Tensor mean,
-                              Tensor var) {
-  DISPATCH_DEVICE_IMPL(sync_bn_forward_var_impl, input, mean, var);
-}
-
-void sync_bn_forward_output_impl(const Tensor input, const Tensor mean,
-                                 const Tensor var, Tensor running_mean,
-                                 Tensor running_var, const Tensor weight,
-                                 const Tensor bias, Tensor norm, Tensor std,
-                                 Tensor output, float eps, float momentum,
-                                 int group_size) {
-  DISPATCH_DEVICE_IMPL(sync_bn_forward_output_impl, input, mean, var,
-                       running_mean, running_var, weight, bias, norm, std,
-                       output, eps, momentum, group_size);
-}
-
-void sync_bn_backward_param_impl(const Tensor grad_output, const Tensor norm,
-                                 Tensor grad_weight, Tensor grad_bias) {
-  DISPATCH_DEVICE_IMPL(sync_bn_backward_param_impl, grad_output, norm,
-                       grad_weight, grad_bias);
-}
-
-void sync_bn_backward_data_impl(const Tensor grad_output, const Tensor weight,
-                                const Tensor grad_weight,
-                                const Tensor grad_bias, const Tensor norm,
-                                const Tensor std, Tensor grad_input) {
-  DISPATCH_DEVICE_IMPL(sync_bn_backward_data_impl, grad_output, weight,
-                       grad_weight, grad_bias, norm, std, grad_input);
-}
-
-void sync_bn_forward_mean(const Tensor input, Tensor mean) {
-  sync_bn_forward_mean_impl(input, mean);
-}
-
-void sync_bn_forward_var(const Tensor input, const Tensor mean, Tensor var) {
-  sync_bn_forward_var_impl(input, mean, var);
-}
-
-void sync_bn_forward_output(const Tensor input, const Tensor mean,
-                            const Tensor var, const Tensor weight,
-                            const Tensor bias, Tensor running_mean,
-                            Tensor running_var, Tensor norm, Tensor std,
-                            Tensor output, float eps, float momentum,
-                            int group_size) {
-  sync_bn_forward_output_impl(input, mean, var, running_mean, running_var,
-                              weight, bias, norm, std, output, eps, momentum,
-                              group_size);
-}
-
-void sync_bn_backward_param(const Tensor grad_output, const Tensor norm,
-                            Tensor grad_weight, Tensor grad_bias) {
-  sync_bn_backward_param_impl(grad_output, norm, grad_weight, grad_bias);
-}
-
-void sync_bn_backward_data(const Tensor grad_output, const Tensor weight,
-                           const Tensor grad_weight, const Tensor grad_bias,
-                           const Tensor norm, const Tensor std,
-                           Tensor grad_input) {
-  sync_bn_backward_data_impl(grad_output, weight, grad_weight, grad_bias, norm,
-                             std, grad_input);
-}
diff --git a/mmcv/ops/csrc/parrots/sync_bn_parrots.cpp b/mmcv/ops/csrc/parrots/sync_bn_parrots.cpp
deleted file mode 100644
index 0b1855abd..000000000
--- a/mmcv/ops/csrc/parrots/sync_bn_parrots.cpp
+++ /dev/null
@@ -1,111 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "sync_bn_pytorch.h"
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void sync_bn_forward_mean_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                       const OperatorBase::in_list_t& ins,
-                                       OperatorBase::out_list_t& outs) {
-  const auto& input = buildATensor(ctx, ins[0]);
-  auto mean = buildATensor(ctx, outs[0]);
-  sync_bn_forward_mean_cuda(input, mean);
-}
-
-void sync_bn_forward_var_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                      const OperatorBase::in_list_t& ins,
-                                      OperatorBase::out_list_t& outs) {
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& mean = buildATensor(ctx, ins[1]);
-  auto var = buildATensor(ctx, outs[0]);
-  sync_bn_forward_var_cuda(input, mean, var);
-}
-
-void sync_bn_forward_output_cuda_parrots(CudaContext& ctx,
-                                         const SSElement& attr,
-                                         const OperatorBase::in_list_t& ins,
-                                         OperatorBase::out_list_t& outs) {
-  size_t group_size;
-  float eps, momentum;
-  SSAttrs(attr)
-      .get<float>("eps", eps)
-      .get<float>("momentum", momentum)
-      .get<size_t>("group_size", group_size)
-      .done();
-
-  const auto& input = buildATensor(ctx, ins[0]);
-  const auto& mean = buildATensor(ctx, ins[1]);
-  const auto& var = buildATensor(ctx, ins[2]);
-  const auto& weight = buildATensor(ctx, ins[3]);
-  const auto& bias = buildATensor(ctx, ins[4]);
-  auto running_mean = buildATensor(ctx, outs[0]);
-  auto running_var = buildATensor(ctx, outs[1]);
-  auto norm = buildATensor(ctx, outs[2]);
-  auto std = buildATensor(ctx, outs[3]);
-  auto output = buildATensor(ctx, outs[4]);
-  sync_bn_forward_output_cuda(input, mean, var, running_mean, running_var,
-                              weight, bias, norm, std, output, eps, momentum,
-                              group_size);
-}
-
-void sync_bn_backward_param_cuda_parrots(CudaContext& ctx,
-                                         const SSElement& attr,
-                                         const OperatorBase::in_list_t& ins,
-                                         OperatorBase::out_list_t& outs) {
-  const auto& grad_output = buildATensor(ctx, ins[0]);
-  const auto& norm = buildATensor(ctx, ins[1]);
-  auto grad_weight = buildATensor(ctx, outs[0]);
-  auto grad_bias = buildATensor(ctx, outs[1]);
-  sync_bn_backward_param_cuda(grad_output, norm, grad_weight, grad_bias);
-}
-
-void sync_bn_backward_data_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                        const OperatorBase::in_list_t& ins,
-                                        OperatorBase::out_list_t& outs) {
-  const auto& grad_output = buildATensor(ctx, ins[0]);
-  const auto& weight = buildATensor(ctx, ins[1]);
-  const auto& grad_weight = buildATensor(ctx, ins[2]);
-  const auto& grad_bias = buildATensor(ctx, ins[3]);
-  const auto& norm = buildATensor(ctx, ins[4]);
-  const auto& std = buildATensor(ctx, ins[5]);
-  auto grad_input = buildATensor(ctx, outs[0]);
-  sync_bn_backward_data_cuda(grad_output, weight, grad_weight, grad_bias, norm,
-                             std, grad_input);
-}
-
-PARROTS_EXTENSION_REGISTER(sync_bn_forward_mean)
-    .input(1)
-    .output(1)
-    .apply(sync_bn_forward_mean_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(sync_bn_forward_var)
-    .input(2)
-    .output(1)
-    .apply(sync_bn_forward_var_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(sync_bn_forward_output)
-    .attr("eps")
-    .attr("momentum")
-    .attr("group_size")
-    .input(5)
-    .output(5)
-    .apply(sync_bn_forward_output_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(sync_bn_backward_param)
-    .input(2)
-    .output(2)
-    .apply(sync_bn_backward_param_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(sync_bn_backward_data)
-    .input(6)
-    .output(1)
-    .apply(sync_bn_backward_data_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/sync_bn_pytorch.h b/mmcv/ops/csrc/parrots/sync_bn_pytorch.h
deleted file mode 100644
index 6bd6a7fad..000000000
--- a/mmcv/ops/csrc/parrots/sync_bn_pytorch.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef SYNC_BN_PYTORCH_H
-#define SYNC_BN_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void sync_bn_forward_mean_cuda(const Tensor input, Tensor mean);
-
-void sync_bn_forward_var_cuda(const Tensor input, const Tensor mean,
-                              Tensor var);
-
-void sync_bn_forward_output_cuda(const Tensor input, const Tensor mean,
-                                 const Tensor var, Tensor running_mean,
-                                 Tensor running_var, const Tensor weight,
-                                 const Tensor bias, Tensor norm, Tensor std,
-                                 Tensor output, float eps, float momentum,
-                                 int group_size);
-
-void sync_bn_backward_param_cuda(const Tensor grad_output, const Tensor norm,
-                                 Tensor grad_weight, Tensor grad_bias);
-
-void sync_bn_backward_data_cuda(const Tensor grad_output, const Tensor weight,
-                                const Tensor grad_weight,
-                                const Tensor grad_bias, const Tensor norm,
-                                const Tensor std, Tensor grad_input);
-#endif  // SYNC_BN_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/three_interpolate.cpp b/mmcv/ops/csrc/parrots/three_interpolate.cpp
deleted file mode 100644
index 1e0ec71bb..000000000
--- a/mmcv/ops/csrc/parrots/three_interpolate.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-// Modified from
-// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate.cpp
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void three_interpolate_forward_impl(int b, int c, int m, int n,
-                                    const Tensor points, const Tensor idx,
-                                    const Tensor weight, Tensor out) {
-  DISPATCH_DEVICE_IMPL(three_interpolate_forward_impl, b, c, m, n, points, idx,
-                       weight, out);
-}
-
-void three_interpolate_backward_impl(int b, int c, int n, int m,
-                                     const Tensor grad_out, const Tensor idx,
-                                     const Tensor weight, Tensor grad_points) {
-  DISPATCH_DEVICE_IMPL(three_interpolate_backward_impl, b, c, n, m, grad_out,
-                       idx, weight, grad_points);
-}
-
-void three_interpolate_forward(Tensor points_tensor, Tensor idx_tensor,
-                               Tensor weight_tensor, Tensor out_tensor, int b,
-                               int c, int m, int n) {
-  three_interpolate_forward_impl(b, c, m, n, points_tensor, idx_tensor,
-                                 weight_tensor, out_tensor);
-}
-
-void three_interpolate_backward(Tensor grad_out_tensor, Tensor idx_tensor,
-                                Tensor weight_tensor, Tensor grad_points_tensor,
-                                int b, int c, int n, int m) {
-  three_interpolate_backward_impl(b, c, n, m, grad_out_tensor, idx_tensor,
-                                  weight_tensor, grad_points_tensor);
-}
diff --git a/mmcv/ops/csrc/parrots/three_interpolate_parrots.cpp b/mmcv/ops/csrc/parrots/three_interpolate_parrots.cpp
deleted file mode 100644
index a71a90fd1..000000000
--- a/mmcv/ops/csrc/parrots/three_interpolate_parrots.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "three_interpolate_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void three_interpolate_forward_cuda_parrots(CudaContext& ctx,
-                                            const SSElement& attr,
-                                            const OperatorBase::in_list_t& ins,
-                                            OperatorBase::out_list_t& outs) {
-  int b, c, m, n;
-  SSAttrs(attr)
-      .get<int>("b", b)
-      .get<int>("c", c)
-      .get<int>("m", m)
-      .get<int>("n", n)
-      .done();
-
-  auto points_tensor = buildATensor(ctx, ins[0]);
-  auto idx_tensor = buildATensor(ctx, ins[1]);
-  auto weight_tensor = buildATensor(ctx, ins[2]);
-
-  auto out_tensor = buildATensor(ctx, outs[0]);
-
-  three_interpolate_forward(points_tensor, idx_tensor, weight_tensor,
-                            out_tensor, b, c, m, n);
-}
-
-void three_interpolate_backward_cuda_parrots(CudaContext& ctx,
-                                             const SSElement& attr,
-                                             const OperatorBase::in_list_t& ins,
-                                             OperatorBase::out_list_t& outs) {
-  int b, c, n, m;
-  SSAttrs(attr)
-      .get<int>("b", b)
-      .get<int>("c", c)
-      .get<int>("n", n)
-      .get<int>("m", m)
-      .done();
-
-  auto grad_out_tensor = buildATensor(ctx, ins[0]);
-  auto idx_tensor = buildATensor(ctx, ins[1]);
-  auto weight_tensor = buildATensor(ctx, ins[2]);
-
-  auto grad_points_tensor = buildATensor(ctx, outs[0]);
-
-  three_interpolate_backward(grad_out_tensor, idx_tensor, weight_tensor,
-                             grad_points_tensor, b, c, n, m);
-}
-
-PARROTS_EXTENSION_REGISTER(three_interpolate_forward)
-    .attr("b")
-    .attr("c")
-    .attr("m")
-    .attr("n")
-    .input(3)
-    .output(1)
-    .apply(three_interpolate_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(three_interpolate_backward)
-    .attr("b")
-    .attr("c")
-    .attr("n")
-    .attr("m")
-    .input(3)
-    .output(1)
-    .apply(three_interpolate_backward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/three_interpolate_pytorch.h b/mmcv/ops/csrc/parrots/three_interpolate_pytorch.h
deleted file mode 100644
index 464c6d900..000000000
--- a/mmcv/ops/csrc/parrots/three_interpolate_pytorch.h
+++ /dev/null
@@ -1,14 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef THREE_INTERPOLATE_PYTORCH_H
-#define THREE_INTERPOLATE_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void three_interpolate_forward(Tensor points_tensor, Tensor idx_tensor,
-                               Tensor weight_tensor, Tensor out_tensor, int b,
-                               int c, int m, int n);
-
-void three_interpolate_backward(Tensor grad_out_tensor, Tensor idx_tensor,
-                                Tensor weight_tensor, Tensor grad_points_tensor,
-                                int b, int c, int n, int m);
-#endif  // THREE_INTERPOLATE_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/three_nn.cpp b/mmcv/ops/csrc/parrots/three_nn.cpp
deleted file mode 100644
index b629200c0..000000000
--- a/mmcv/ops/csrc/parrots/three_nn.cpp
+++ /dev/null
@@ -1,18 +0,0 @@
-// Modified from
-// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate.cpp
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void three_nn_forward_impl(int b, int n, int m, const Tensor unknown,
-                           const Tensor known, Tensor dist2, Tensor idx) {
-  DISPATCH_DEVICE_IMPL(three_nn_forward_impl, b, n, m, unknown, known, dist2,
-                       idx);
-}
-
-void three_nn_forward(Tensor unknown_tensor, Tensor known_tensor,
-                      Tensor dist2_tensor, Tensor idx_tensor, int b, int n,
-                      int m) {
-  three_nn_forward_impl(b, n, m, unknown_tensor, known_tensor, dist2_tensor,
-                        idx_tensor);
-}
diff --git a/mmcv/ops/csrc/parrots/three_nn_parrots.cpp b/mmcv/ops/csrc/parrots/three_nn_parrots.cpp
deleted file mode 100644
index c28c7d216..000000000
--- a/mmcv/ops/csrc/parrots/three_nn_parrots.cpp
+++ /dev/null
@@ -1,35 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "three_nn_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void three_nn_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                   const OperatorBase::in_list_t& ins,
-                                   OperatorBase::out_list_t& outs) {
-  int b, n, m;
-  SSAttrs(attr).get<int>("b", b).get<int>("n", n).get<int>("m", m).done();
-
-  auto unknown_tensor = buildATensor(ctx, ins[0]);
-  auto known_tensor = buildATensor(ctx, ins[1]);
-
-  auto dist2_tensor = buildATensor(ctx, outs[0]);
-  auto idx_tensor = buildATensor(ctx, outs[1]);
-
-  three_nn_forward(unknown_tensor, known_tensor, dist2_tensor, idx_tensor, b, n,
-                   m);
-}
-
-PARROTS_EXTENSION_REGISTER(three_nn_forward)
-    .attr("b")
-    .attr("n")
-    .attr("m")
-    .input(2)
-    .output(2)
-    .apply(three_nn_forward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/three_nn_pytorch.h b/mmcv/ops/csrc/parrots/three_nn_pytorch.h
deleted file mode 100644
index 6574fba09..000000000
--- a/mmcv/ops/csrc/parrots/three_nn_pytorch.h
+++ /dev/null
@@ -1,10 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef THREE_NN_PYTORCH_H
-#define THREE_NN_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void three_nn_forward(Tensor unknown_tensor, Tensor known_tensor,
-                      Tensor dist2_tensor, Tensor idx_tensor, int b, int n,
-                      int m);
-#endif  // THREE_NN_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/tin_shift.cpp b/mmcv/ops/csrc/parrots/tin_shift.cpp
deleted file mode 100644
index b03f58754..000000000
--- a/mmcv/ops/csrc/parrots/tin_shift.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-void tin_shift_forward_impl(Tensor input, Tensor shift, Tensor output) {
-  DISPATCH_DEVICE_IMPL(tin_shift_forward_impl, input, shift, output);
-}
-
-void tin_shift_backward_impl(Tensor grad_output, Tensor shift,
-                             Tensor grad_input) {
-  DISPATCH_DEVICE_IMPL(tin_shift_backward_impl, grad_output, shift, grad_input);
-}
-
-void tin_shift_forward(Tensor input, Tensor shift, Tensor output) {
-  tin_shift_forward_impl(input, shift, output);
-}
-
-void tin_shift_backward(Tensor grad_output, Tensor shift, Tensor grad_input) {
-  tin_shift_backward_impl(grad_output, shift, grad_input);
-}
diff --git a/mmcv/ops/csrc/parrots/tin_shift_parrots.cpp b/mmcv/ops/csrc/parrots/tin_shift_parrots.cpp
deleted file mode 100644
index b0920928e..000000000
--- a/mmcv/ops/csrc/parrots/tin_shift_parrots.cpp
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "tin_shift_pytorch.h"
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void tin_shift_forward_cuda_parrots(CudaContext &ctx, const SSElement &attr,
-                                    const OperatorBase::in_list_t &ins,
-                                    OperatorBase::out_list_t &outs) {
-  const auto &input = buildATensor(ctx, ins[0]);
-  const auto &shift = buildATensor(ctx, ins[1]);
-  auto output = buildATensor(ctx, outs[0]);
-  tin_shift_forward_cuda(input, shift, output);
-}
-
-void tin_shift_backward_cuda_parrots(CudaContext &ctx, const SSElement &attr,
-                                     const OperatorBase::in_list_t &ins,
-                                     OperatorBase::out_list_t &outs) {
-  const auto &grad_output = buildATensor(ctx, ins[0]);
-  const auto &shift = buildATensor(ctx, ins[1]);
-  auto grad_input = buildATensor(ctx, outs[0]);
-  tin_shift_backward_cuda(grad_output, shift, grad_input);
-}
-
-PARROTS_EXTENSION_REGISTER(tin_shift_forward)
-    .input(2)
-    .output(1)
-    .apply(tin_shift_forward_cuda_parrots)
-    .done();
-
-PARROTS_EXTENSION_REGISTER(tin_shift_backward)
-    .input(2)
-    .output(1)
-    .apply(tin_shift_backward_cuda_parrots)
-    .done();
-#endif
diff --git a/mmcv/ops/csrc/parrots/tin_shift_pytorch.h b/mmcv/ops/csrc/parrots/tin_shift_pytorch.h
deleted file mode 100644
index fe7238376..000000000
--- a/mmcv/ops/csrc/parrots/tin_shift_pytorch.h
+++ /dev/null
@@ -1,11 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef TIN_SHIFT_PYTORCH_H
-#define TIN_SHIFT_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void tin_shift_forward_cuda(Tensor input, Tensor shift, Tensor output);
-
-void tin_shift_backward_cuda(Tensor grad_output, Tensor shift,
-                             Tensor grad_input);
-#endif  // TIN_SHIFT_PYTORCH_H
diff --git a/mmcv/ops/csrc/parrots/upfirdn2d.cpp b/mmcv/ops/csrc/parrots/upfirdn2d.cpp
deleted file mode 100644
index dd325bd78..000000000
--- a/mmcv/ops/csrc/parrots/upfirdn2d.cpp
+++ /dev/null
@@ -1,118 +0,0 @@
-// Modified from
-// https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.cpp
-
-/*
-Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
-
-NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator
-Augmentation (ADA)
-=======================================================================
-
-1. Definitions
-
-"Licensor" means any person or entity that distributes its Work.
-
-"Software" means the original work of authorship made available under
-this License.
-
-"Work" means the Software and any additions to or derivative works of
-the Software that are made available under this License.
-
-The terms "reproduce," "reproduction," "derivative works," and
-"distribution" have the meaning as provided under U.S. copyright law;
-provided, however, that for the purposes of this License, derivative
-works shall not include works that remain separable from, or merely
-link (or bind by name) to the interfaces of, the Work.
-
-Works, including the Software, are "made available" under this License
-by including in or with the Work either (a) a copyright notice
-referencing the applicability of this License to the Work, or (b) a
-copy of this License.
-
-2. License Grants
-
-    2.1 Copyright Grant. Subject to the terms and conditions of this
-    License, each Licensor grants to you a perpetual, worldwide,
-    non-exclusive, royalty-free, copyright license to reproduce,
-    prepare derivative works of, publicly display, publicly perform,
-    sublicense and distribute its Work and any resulting derivative
-    works in any form.
-
-3. Limitations
-
-    3.1 Redistribution. You may reproduce or distribute the Work only
-    if (a) you do so under this License, (b) you include a complete
-    copy of this License with your distribution, and (c) you retain
-    without modification any copyright, patent, trademark, or
-    attribution notices that are present in the Work.
-
-    3.2 Derivative Works. You may specify that additional or different
-    terms apply to the use, reproduction, and distribution of your
-    derivative works of the Work ("Your Terms") only if (a) Your Terms
-    provide that the use limitation in Section 3.3 applies to your
-    derivative works, and (b) you identify the specific derivative
-    works that are subject to Your Terms. Notwithstanding Your Terms,
-    this License (including the redistribution requirements in Section
-    3.1) will continue to apply to the Work itself.
-
-    3.3 Use Limitation. The Work and any derivative works thereof only
-    may be used or intended for use non-commercially. Notwithstanding
-    the foregoing, NVIDIA and its affiliates may use the Work and any
-    derivative works commercially. As used herein, "non-commercially"
-    means for research or evaluation purposes only.
-
-    3.4 Patent Claims. If you bring or threaten to bring a patent claim
-    against any Licensor (including any claim, cross-claim or
-    counterclaim in a lawsuit) to enforce any patents that you allege
-    are infringed by any Work, then your rights under this License from
-    such Licensor (including the grant in Section 2.1) will terminate
-    immediately.
-
-    3.5 Trademarks. This License does not grant any rights to use any
-    Licensor’s or its affiliates’ names, logos, or trademarks, except
-    as necessary to reproduce the notices described in this License.
-
-    3.6 Termination. If you violate any term of this License, then your
-    rights under this License (including the grant in Section 2.1) will
-    terminate immediately.
-
-4. Disclaimer of Warranty.
-
-THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
-NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
-THIS LICENSE.
-
-5. Limitation of Liability.
-
-EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
-THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
-SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
-INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
-OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
-(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
-LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
-COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
-THE POSSIBILITY OF SUCH DAMAGES.
-
-=======================================================================
-*/
-
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-torch::Tensor upfirdn2d_op_impl(const torch::Tensor& input,
-                                const torch::Tensor& kernel, int up_x, int up_y,
-                                int down_x, int down_y, int pad_x0, int pad_x1,
-                                int pad_y0, int pad_y1) {
-  return DISPATCH_DEVICE_IMPL(upfirdn2d_op_impl, input, kernel, up_x, up_y,
-                              down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1);
-}
-
-torch::Tensor upfirdn2d(const torch::Tensor& input, const torch::Tensor& kernel,
-                        int up_x, int up_y, int down_x, int down_y, int pad_x0,
-                        int pad_x1, int pad_y0, int pad_y1) {
-  return upfirdn2d_op_impl(input, kernel, up_x, up_y, down_x, down_y, pad_x0,
-                           pad_x1, pad_y0, pad_y1);
-}
diff --git a/mmcv/ops/csrc/parrots/upfirdn2d_parrots.cpp b/mmcv/ops/csrc/parrots/upfirdn2d_parrots.cpp
deleted file mode 100644
index f0c50db5c..000000000
--- a/mmcv/ops/csrc/parrots/upfirdn2d_parrots.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <torch/extension.h>
-
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-using namespace at;
-using namespace parrots;
-
-torch::Tensor upfirdn2d(const Tensor &input, const Tensor &kernel, int up_x,
-                        int up_y, int down_x, int down_y, int pad_x0,
-                        int pad_x1, int pad_y0, int pad_y1);
-
-void upfirdn2d_parrots(CudaContext &ctx, const SSElement &attr,
-                       const OperatorBase::in_list_t &ins,
-                       OperatorBase::out_list_t &outs) {
-  int up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1;
-  const auto &input = buildATensor(ctx, ins[0]);
-  const auto &kernel = buildATensor(ctx, ins[1]);
-  SSAttrs(attr)
-      .get("up_x", up_x)
-      .get("up_y", up_y)
-      .get("down_x", down_x)
-      .get("down_y", down_y)
-      .get("pad_x0", pad_x0)
-      .get("pad_x1", pad_x1)
-      .get("pad_y0", pad_y0)
-      .get("pad_y1", pad_y1)
-      .done();
-  auto out = upfirdn2d(input, kernel, up_x, up_y, down_x, down_y, pad_x0,
-                       pad_x1, pad_y0, pad_y1);
-  updateDArray(ctx, out, outs[0]);
-}
-
-PARROTS_EXTENSION_REGISTER(upfirdn2d)
-    .attr("up_x")
-    .attr("up_y")
-    .attr("down_x")
-    .attr("down_y")
-    .attr("pad_x0")
-    .attr("pad_x1")
-    .attr("pad_y0")
-    .attr("pad_y1")
-    .input(2)
-    .output(1)
-    .apply(upfirdn2d_parrots)
-    .done();
diff --git a/mmcv/ops/csrc/parrots/voxelization.cpp b/mmcv/ops/csrc/parrots/voxelization.cpp
deleted file mode 100644
index 7946be617..000000000
--- a/mmcv/ops/csrc/parrots/voxelization.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved.
-#include "pytorch_cpp_helper.hpp"
-#include "pytorch_device_registry.hpp"
-
-int hard_voxelize_forward_impl(const at::Tensor &points, at::Tensor &voxels,
-                               at::Tensor &coors,
-                               at::Tensor &num_points_per_voxel,
-                               const std::vector<float> voxel_size,
-                               const std::vector<float> coors_range,
-                               const int max_points, const int max_voxels,
-                               const int NDim = 3) {
-  return DISPATCH_DEVICE_IMPL(hard_voxelize_forward_impl, points, voxels, coors,
-                              num_points_per_voxel, voxel_size, coors_range,
-                              max_points, max_voxels, NDim);
-}
-
-int nondeterministic_hard_voxelize_forward_impl(
-    const at::Tensor &points, at::Tensor &voxels, at::Tensor &coors,
-    at::Tensor &num_points_per_voxel, const std::vector<float> voxel_size,
-    const std::vector<float> coors_range, const int max_points,
-    const int max_voxels, const int NDim = 3) {
-  return DISPATCH_DEVICE_IMPL(nondeterministic_hard_voxelize_forward_impl,
-                              points, voxels, coors, num_points_per_voxel,
-                              voxel_size, coors_range, max_points, max_voxels,
-                              NDim);
-}
-
-void dynamic_voxelize_forward_impl(const at::Tensor &points, at::Tensor &coors,
-                                   const std::vector<float> voxel_size,
-                                   const std::vector<float> coors_range,
-                                   const int NDim = 3) {
-  DISPATCH_DEVICE_IMPL(dynamic_voxelize_forward_impl, points, coors, voxel_size,
-                       coors_range, NDim);
-}
-
-void hard_voxelize_forward(const at::Tensor &points,
-                           const at::Tensor &voxel_size,
-                           const at::Tensor &coors_range, at::Tensor &voxels,
-                           at::Tensor &coors, at::Tensor &num_points_per_voxel,
-                           at::Tensor &voxel_num, const int max_points,
-                           const int max_voxels, const int NDim = 3,
-                           const bool deterministic = true) {
-  int64_t *voxel_num_data = voxel_num.data_ptr<int64_t>();
-  std::vector<float> voxel_size_v(
-      voxel_size.data_ptr<float>(),
-      voxel_size.data_ptr<float>() + voxel_size.numel());
-  std::vector<float> coors_range_v(
-      coors_range.data_ptr<float>(),
-      coors_range.data_ptr<float>() + coors_range.numel());
-
-  if (deterministic) {
-    *voxel_num_data = hard_voxelize_forward_impl(
-        points, voxels, coors, num_points_per_voxel, voxel_size_v,
-        coors_range_v, max_points, max_voxels, NDim);
-  } else {
-    *voxel_num_data = nondeterministic_hard_voxelize_forward_impl(
-        points, voxels, coors, num_points_per_voxel, voxel_size_v,
-        coors_range_v, max_points, max_voxels, NDim);
-  }
-}
-
-void dynamic_voxelize_forward(const at::Tensor &points,
-                              const at::Tensor &voxel_size,
-                              const at::Tensor &coors_range, at::Tensor &coors,
-                              const int NDim = 3) {
-  std::vector<float> voxel_size_v(
-      voxel_size.data_ptr<float>(),
-      voxel_size.data_ptr<float>() + voxel_size.numel());
-  std::vector<float> coors_range_v(
-      coors_range.data_ptr<float>(),
-      coors_range.data_ptr<float>() + coors_range.numel());
-  dynamic_voxelize_forward_impl(points, coors, voxel_size_v, coors_range_v,
-                                NDim);
-}
diff --git a/mmcv/ops/csrc/parrots/voxelization_parrots.cpp b/mmcv/ops/csrc/parrots/voxelization_parrots.cpp
deleted file mode 100644
index 90e2a4445..000000000
--- a/mmcv/ops/csrc/parrots/voxelization_parrots.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#include <parrots/compute/aten.hpp>
-#include <parrots/extension.hpp>
-#include <parrots/foundation/ssattrs.hpp>
-
-#include "voxelization_pytorch.h"
-
-using namespace parrots;
-
-#ifdef MMCV_WITH_CUDA
-void hard_voxelize_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
-                                        const OperatorBase::in_list_t& ins,
-                                        OperatorBase::out_list_t& outs) {
-  int max_points, max_voxels, NDim;
-  bool deterministic;
-  SSAttrs(attr)
-      .get<int>("max_points", max_points)
-      .get<int>("max_voxels", max_voxels)
-      .get<int>("NDim", NDim)
-      .get<bool>("deterministic", deterministic)
-      .done();
-  const auto& points = buildATensor(ctx, ins[0]);
-  const auto& voxel_size = buildATensor(ctx, ins[1]);
-  const auto& coors_range = buildATensor(ctx, ins[2]);
-
-  auto voxels = buildATensor(ctx, outs[0]);
-  auto coors = buildATensor(ctx, outs[1]);
-  auto num_points_per_voxel = buildATensor(ctx, outs[2]);
-  auto voxel_num = buildATensor(ctx, outs[3]);
-
-  hard_voxelize_forward(points, voxel_size, coors_range, voxels, coors,
-                        num_points_per_voxel, voxel_num, max_points, max_voxels,
-                        NDim, deterministic);
-}
-
-void dynamic_voxelize_forward_cuda_parrots(CudaContext& ctx,
-                                           const SSElement& attr,
-                                           const OperatorBase::in_list_t& ins,
-                                           OperatorBase::out_list_t& outs) {
-  int NDim;
-  SSAttrs(attr).get<int>("NDim", NDim).done();
-  const auto& points = buildATensor(ctx, ins[0]);
-  const auto& voxel_size = buildATensor(ctx, ins[1]);
-  const auto& coors_range = buildATensor(ctx, ins[2]);
-
-  auto coors = buildATensor(ctx, outs[0]);
-
-  dynamic_voxelize_forward(points, voxel_size, coors_range, coors, NDim);
-}
-#endif
-
-void hard_voxelize_forward_cpu_parrots(HostContext& ctx, const SSElement& attr,
-                                       const OperatorBase::in_list_t& ins,
-                                       OperatorBase::out_list_t& outs) {
-  int max_points, max_voxels, NDim;
-  bool deterministic;
-  SSAttrs(attr)
-      .get<int>("max_points", max_points)
-      .get<int>("max_voxels", max_voxels)
-      .get<int>("NDim", NDim)
-      .get<bool>("deterministic", deterministic)
-      .done();
-  const auto& points = buildATensor(ctx, ins[0]);
-  const auto& voxel_size = buildATensor(ctx, ins[1]);
-  const auto& coors_range = buildATensor(ctx, ins[2]);
-
-  auto voxels = buildATensor(ctx, outs[0]);
-  auto coors = buildATensor(ctx, outs[1]);
-  auto num_points_per_voxel = buildATensor(ctx, outs[2]);
-  auto voxel_num = buildATensor(ctx, outs[3]);
-
-  hard_voxelize_forward(points, voxel_size, coors_range, voxels, coors,
-                        num_points_per_voxel, voxel_num, max_points, max_voxels,
-                        NDim, deterministic);
-}
-
-void dynamic_voxelize_forward_cpu_parrots(HostContext& ctx,
-                                          const SSElement& attr,
-                                          const OperatorBase::in_list_t& ins,
-                                          OperatorBase::out_list_t& outs) {
-  int NDim;
-  SSAttrs(attr).get<int>("NDim", NDim).done();
-  const auto& points = buildATensor(ctx, ins[0]);
-  const auto& voxel_size = buildATensor(ctx, ins[1]);
-  const auto& coors_range = buildATensor(ctx, ins[2]);
-
-  auto coors = buildATensor(ctx, outs[0]);
-
-  dynamic_voxelize_forward(points, voxel_size, coors_range, coors, NDim);
-}
-
-PARROTS_EXTENSION_REGISTER(hard_voxelize_forward)
-    .attr("max_points")
-    .attr("max_voxels")
-    .attr("NDim")
-    .attr("deterministic")
-    .input(3)
-    .output(4)
-    .apply(hard_voxelize_forward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(hard_voxelize_forward_cuda_parrots)
-#endif
-    .done();
-
-PARROTS_EXTENSION_REGISTER(dynamic_voxelize_forward)
-    .attr("NDim")
-    .input(3)
-    .output(1)
-    .apply(dynamic_voxelize_forward_cpu_parrots)
-#ifdef MMCV_WITH_CUDA
-    .apply(dynamic_voxelize_forward_cuda_parrots)
-#endif
-    .done();
diff --git a/mmcv/ops/csrc/parrots/voxelization_pytorch.h b/mmcv/ops/csrc/parrots/voxelization_pytorch.h
deleted file mode 100644
index 0019d5191..000000000
--- a/mmcv/ops/csrc/parrots/voxelization_pytorch.h
+++ /dev/null
@@ -1,20 +0,0 @@
-// Copyright (c) OpenMMLab. All rights reserved
-#ifndef VOXELIZATION_PYTORCH_H
-#define VOXELIZATION_PYTORCH_H
-#include <torch/extension.h>
-using namespace at;
-
-void hard_voxelize_forward(const at::Tensor &points,
-                           const at::Tensor &voxel_size,
-                           const at::Tensor &coors_range, at::Tensor &voxels,
-                           at::Tensor &coors, at::Tensor &num_points_per_voxel,
-                           at::Tensor &voxel_num, const int max_points,
-                           const int max_voxels, const int NDim = 3,
-                           const bool deterministic = true);
-
-void dynamic_voxelize_forward(const at::Tensor &points,
-                              const at::Tensor &voxel_size,
-                              const at::Tensor &coors_range, at::Tensor &coors,
-                              const int NDim = 3);
-
-#endif  // VOXELIZATION_PYTORCH_H
diff --git a/mmcv/ops/diff_iou_rotated.py b/mmcv/ops/diff_iou_rotated.py
index ddcf4b4fc..edb875884 100644
--- a/mmcv/ops/diff_iou_rotated.py
+++ b/mmcv/ops/diff_iou_rotated.py
@@ -20,8 +20,7 @@ class SortVertices(Function):
     def forward(ctx, vertices, mask, num_valid):
         idx = ext_module.diff_iou_rotated_sort_vertices_forward(
             vertices, mask, num_valid)
-        if torch.__version__ != 'parrots':
-            ctx.mark_non_differentiable(idx)
+        ctx.mark_non_differentiable(idx)
         return idx
 
     @staticmethod
diff --git a/mmcv/ops/furthest_point_sample.py b/mmcv/ops/furthest_point_sample.py
index 22b1a3048..35bef7221 100644
--- a/mmcv/ops/furthest_point_sample.py
+++ b/mmcv/ops/furthest_point_sample.py
@@ -38,8 +38,7 @@ class FurthestPointSampling(Function):
             n=N,
             m=num_points,
         )
-        if torch.__version__ != 'parrots':
-            ctx.mark_non_differentiable(output)
+        ctx.mark_non_differentiable(output)
         return output
 
     @staticmethod
@@ -71,8 +70,7 @@ class FurthestPointSamplingWithDist(Function):
 
         ext_module.furthest_point_sampling_with_dist_forward(
             points_dist, temp, output, b=B, n=N, m=num_points)
-        if torch.__version__ != 'parrots':
-            ctx.mark_non_differentiable(output)
+        ctx.mark_non_differentiable(output)
         return output
 
     @staticmethod
diff --git a/mmcv/ops/gather_points.py b/mmcv/ops/gather_points.py
index 895bfab64..b4c948cd3 100644
--- a/mmcv/ops/gather_points.py
+++ b/mmcv/ops/gather_points.py
@@ -34,8 +34,7 @@ class GatherPoints(Function):
             features, indices, output, b=B, c=C, n=N, npoints=npoint)
 
         ctx.for_backwards = (indices, C, N)
-        if torch.__version__ != 'parrots':
-            ctx.mark_non_differentiable(indices)
+        ctx.mark_non_differentiable(indices)
         return output
 
     @staticmethod
diff --git a/mmcv/ops/info.py b/mmcv/ops/info.py
index b24b981f8..1fed42a4d 100644
--- a/mmcv/ops/info.py
+++ b/mmcv/ops/info.py
@@ -1,21 +1,13 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-import torch
+from ..utils import ext_loader
 
-if torch.__version__ == 'parrots':
-    import parrots
+ext_module = ext_loader.load_ext(
+    '_ext', ['get_compiler_version', 'get_compiling_cuda_version'])
 
-    def get_compiler_version():
-        return 'GCC ' + parrots.version.compiler
 
-    def get_compiling_cuda_version():
-        return parrots.version.cuda
-else:
-    from ..utils import ext_loader
-    ext_module = ext_loader.load_ext(
-        '_ext', ['get_compiler_version', 'get_compiling_cuda_version'])
+def get_compiler_version():
+    return ext_module.get_compiler_version()
 
-    def get_compiler_version():
-        return ext_module.get_compiler_version()
 
-    def get_compiling_cuda_version():
-        return ext_module.get_compiling_cuda_version()
+def get_compiling_cuda_version():
+    return ext_module.get_compiling_cuda_version()
diff --git a/mmcv/ops/knn.py b/mmcv/ops/knn.py
index 48ce92f92..a0e1704db 100644
--- a/mmcv/ops/knn.py
+++ b/mmcv/ops/knn.py
@@ -68,8 +68,7 @@ class KNN(Function):
             xyz, center_xyz, idx, dist2, b=B, n=N, m=npoint, nsample=k)
         # idx shape to [B, k, npoint]
         idx = idx.transpose(2, 1).contiguous()
-        if torch.__version__ != 'parrots':
-            ctx.mark_non_differentiable(idx)
+        ctx.mark_non_differentiable(idx)
         return idx
 
     @staticmethod
diff --git a/mmcv/ops/nms.py b/mmcv/ops/nms.py
index feab4f3ca..8c74c3197 100644
--- a/mmcv/ops/nms.py
+++ b/mmcv/ops/nms.py
@@ -186,22 +186,10 @@ def soft_nms(boxes: array_like_type,
     method_dict = {'naive': 0, 'linear': 1, 'gaussian': 2}
     assert method in method_dict.keys()
 
-    if torch.__version__ == 'parrots':
-        dets = boxes.new_empty((boxes.size(0), 5), device='cpu')
-        indata_list = [boxes.cpu(), scores.cpu(), dets.cpu()]
-        indata_dict = {
-            'iou_threshold': float(iou_threshold),
-            'sigma': float(sigma),
-            'min_score': min_score,
-            'method': method_dict[method],
-            'offset': int(offset)
-        }
-        inds = ext_module.softnms(*indata_list, **indata_dict)
-    else:
-        dets, inds = SoftNMSop.apply(boxes.cpu(), scores.cpu(),
-                                     float(iou_threshold), float(sigma),
-                                     float(min_score), method_dict[method],
-                                     int(offset))
+    dets, inds = SoftNMSop.apply(boxes.cpu(), scores.cpu(),
+                                 float(iou_threshold), float(sigma),
+                                 float(min_score), method_dict[method],
+                                 int(offset))
 
     dets = dets[:inds.size(0)]
 
@@ -362,8 +350,6 @@ def nms_match(dets: array_like_type,
         indata_list = [dets_t]
         indata_dict = {'iou_threshold': float(iou_threshold)}
         matched = ext_module.nms_match(*indata_list, **indata_dict)
-        if torch.__version__ == 'parrots':
-            matched = matched.tolist()  # type: ignore
 
     if isinstance(dets, Tensor):
         return [dets.new_tensor(m, dtype=torch.long) for m in matched]
@@ -430,19 +416,9 @@ def nms_rotated(dets: Tensor,
     _, order = scores.sort(0, descending=True)
     dets_sorted = dets_wl.index_select(0, order)
 
-    if torch.__version__ == 'parrots':
-        keep_inds = ext_module.nms_rotated(
-            dets_wl,
-            scores,
-            order,
-            dets_sorted,
-            input_labels,
-            iou_threshold=iou_threshold,
-            multi_label=multi_label)
-    else:
-        keep_inds = ext_module.nms_rotated(dets_wl, scores, order, dets_sorted,
-                                           input_labels, iou_threshold,
-                                           multi_label)
+    keep_inds = ext_module.nms_rotated(dets_wl, scores, order, dets_sorted,
+                                       input_labels, iou_threshold,
+                                       multi_label)
     dets = torch.cat((dets[keep_inds], scores[keep_inds].reshape(-1, 1)),
                      dim=1)
     return dets, keep_inds
diff --git a/mmcv/ops/pixel_group.py b/mmcv/ops/pixel_group.py
index cf73e326d..2ac121ff4 100644
--- a/mmcv/ops/pixel_group.py
+++ b/mmcv/ops/pixel_group.py
@@ -59,28 +59,8 @@ def pixel_group(
     if isinstance(kernel_contour, np.ndarray):
         kernel_contour = torch.from_numpy(kernel_contour)
 
-    if torch.__version__ == 'parrots':
-        label = ext_module.pixel_group(
-            score,
-            mask,
-            embedding,
-            kernel_label,
-            kernel_contour,
-            kernel_region_num=kernel_region_num,
-            distance_threshold=distance_threshold)
-        label = label.tolist()
-        label = label[0]
-        list_index = kernel_region_num
-        pixel_assignment = []
-        for x in range(kernel_region_num):
-            pixel_assignment.append(
-                np.array(
-                    label[list_index:list_index + int(label[x])],
-                    dtype=np.float))
-            list_index = list_index + int(label[x])
-    else:
-        pixel_assignment = ext_module.pixel_group(score, mask, embedding,
-                                                  kernel_label, kernel_contour,
-                                                  kernel_region_num,
-                                                  distance_threshold)
+    pixel_assignment = ext_module.pixel_group(score, mask, embedding,
+                                              kernel_label, kernel_contour,
+                                              kernel_region_num,
+                                              distance_threshold)
     return pixel_assignment
diff --git a/mmcv/ops/prroi_pool.py b/mmcv/ops/prroi_pool.py
index 8c263e307..d3326a75d 100644
--- a/mmcv/ops/prroi_pool.py
+++ b/mmcv/ops/prroi_pool.py
@@ -3,7 +3,6 @@ from typing import Tuple, Union
 
 import torch
 import torch.nn as nn
-from mmengine.utils.dl_utils import TORCH_VERSION
 from torch.autograd import Function
 from torch.autograd.function import once_differentiable
 from torch.nn.modules.utils import _pair
@@ -71,7 +70,7 @@ class PrRoIPoolFunction(Function):
         grad_input = grad_output.new_zeros(*features.shape)
         grad_coor = grad_output.new_zeros(*rois.shape)
 
-        if features.requires_grad or TORCH_VERSION == 'parrots':
+        if features.requires_grad:
             grad_output = grad_output.contiguous()
             ext_module.prroi_pool_backward(
                 grad_output,
@@ -80,7 +79,7 @@ class PrRoIPoolFunction(Function):
                 pooled_height=ctx.params[0],
                 pooled_width=ctx.params[1],
                 spatial_scale=ctx.params[2])
-        if rois.requires_grad or TORCH_VERSION == 'parrots':
+        if rois.requires_grad:
             grad_output = grad_output.contiguous()
             ext_module.prroi_pool_coor_backward(
                 output,
diff --git a/mmcv/ops/saconv.py b/mmcv/ops/saconv.py
index f93288407..26bb8aece 100644
--- a/mmcv/ops/saconv.py
+++ b/mmcv/ops/saconv.py
@@ -111,8 +111,7 @@ class SAConv2d(ConvAWS2d):
             out_s = deform_conv2d(x, offset, weight, self.stride, self.padding,
                                   self.dilation, self.groups, 1)
         else:
-            if (TORCH_VERSION == 'parrots'
-                    or digit_version(TORCH_VERSION) < digit_version('1.5.0')):
+            if digit_version(TORCH_VERSION) < digit_version('1.5.0'):
                 out_s = super().conv2d_forward(x, weight)
             elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'):
                 # bias is a required argument of _conv_forward in torch 1.8.0
@@ -129,8 +128,7 @@ class SAConv2d(ConvAWS2d):
             out_l = deform_conv2d(x, offset, weight, self.stride, self.padding,
                                   self.dilation, self.groups, 1)
         else:
-            if (TORCH_VERSION == 'parrots'
-                    or digit_version(TORCH_VERSION) < digit_version('1.5.0')):
+            if digit_version(TORCH_VERSION) < digit_version('1.5.0'):
                 out_l = super().conv2d_forward(x, weight)
             elif digit_version(TORCH_VERSION) >= digit_version('1.8.0'):
                 # bias is a required argument of _conv_forward in torch 1.8.0
diff --git a/mmcv/ops/three_nn.py b/mmcv/ops/three_nn.py
index d41b9789c..5192de893 100644
--- a/mmcv/ops/three_nn.py
+++ b/mmcv/ops/three_nn.py
@@ -38,8 +38,7 @@ class ThreeNN(Function):
         idx = target.new_empty(B, N, 3, dtype=torch.int32)
 
         ext_module.three_nn_forward(target, source, dist2, idx, b=B, n=N, m=m)
-        if torch.__version__ != 'parrots':
-            ctx.mark_non_differentiable(idx)
+        ctx.mark_non_differentiable(idx)
 
         return torch.sqrt(dist2), idx
 
diff --git a/mmcv/utils/__init__.py b/mmcv/utils/__init__.py
index 53ebb9453..2bb6a145f 100644
--- a/mmcv/utils/__init__.py
+++ b/mmcv/utils/__init__.py
@@ -2,9 +2,8 @@
 from .device_type import (IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE,
                           IS_MPS_AVAILABLE, IS_NPU_AVAILABLE)
 from .env import collect_env
-from .parrots_jit import jit, skip_no_elena
 
 __all__ = [
     'IS_MLU_AVAILABLE', 'IS_MPS_AVAILABLE', 'IS_CUDA_AVAILABLE',
-    'IS_NPU_AVAILABLE', 'collect_env', 'jit', 'skip_no_elena'
+    'IS_NPU_AVAILABLE', 'collect_env'
 ]
diff --git a/mmcv/utils/ext_loader.py b/mmcv/utils/ext_loader.py
index a31e107df..0ff299cd3 100644
--- a/mmcv/utils/ext_loader.py
+++ b/mmcv/utils/ext_loader.py
@@ -1,70 +1,13 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import importlib
-import os
 import pkgutil
-import warnings
-from collections import namedtuple
 
-import torch
 
-if torch.__version__ != 'parrots':
-
-    def load_ext(name, funcs):
-        ext = importlib.import_module('mmcv.' + name)
-        for fun in funcs:
-            assert hasattr(ext, fun), f'{fun} miss in module {name}'
-        return ext
-else:
-    from parrots import extension
-    from parrots.base import ParrotsException
-
-    has_return_value_ops = [
-        'nms',
-        'softnms',
-        'nms_match',
-        'nms_rotated',
-        'top_pool_forward',
-        'top_pool_backward',
-        'bottom_pool_forward',
-        'bottom_pool_backward',
-        'left_pool_forward',
-        'left_pool_backward',
-        'right_pool_forward',
-        'right_pool_backward',
-        'fused_bias_leakyrelu',
-        'upfirdn2d',
-        'ms_deform_attn_forward',
-        'pixel_group',
-        'contour_expand',
-        'diff_iou_rotated_sort_vertices_forward',
-    ]
-
-    def get_fake_func(name, e):
-
-        def fake_func(*args, **kwargs):
-            warnings.warn(f'{name} is not supported in parrots now')
-            raise e
-
-        return fake_func
-
-    def load_ext(name, funcs):
-        ExtModule = namedtuple('ExtModule', funcs)
-        ext_list = []
-        lib_root = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
-        for fun in funcs:
-            try:
-                ext_fun = extension.load(fun, name, lib_dir=lib_root)
-            except ParrotsException as e:
-                if 'No element registered' not in e.message:
-                    warnings.warn(e.message)
-                ext_fun = get_fake_func(fun, e)
-                ext_list.append(ext_fun)
-            else:
-                if fun in has_return_value_ops:
-                    ext_list.append(ext_fun.op)
-                else:
-                    ext_list.append(ext_fun.op_)
-        return ExtModule(*ext_list)
+def load_ext(name, funcs):
+    ext = importlib.import_module('mmcv.' + name)
+    for fun in funcs:
+        assert hasattr(ext, fun), f'{fun} miss in module {name}'
+    return ext
 
 
 def check_ops_exist() -> bool:
diff --git a/mmcv/utils/parrots_jit.py b/mmcv/utils/parrots_jit.py
deleted file mode 100644
index 0e3a58c24..000000000
--- a/mmcv/utils/parrots_jit.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import os
-
-from mmengine.utils.dl_utils.parrots_wrapper import TORCH_VERSION
-
-parrots_jit_option = os.getenv('PARROTS_JIT_OPTION')
-
-if TORCH_VERSION == 'parrots' and parrots_jit_option == 'ON':
-    from parrots.jit import pat as jit
-else:
-
-    def jit(func=None,
-            check_input=None,
-            full_shape=True,
-            derivate=False,
-            coderize=False,
-            optimize=False):
-
-        def wrapper(func):
-
-            def wrapper_inner(*args, **kargs):
-                return func(*args, **kargs)
-
-            return wrapper_inner
-
-        if func is None:
-            return wrapper
-        else:
-            return func
-
-
-if TORCH_VERSION == 'parrots':
-    from parrots.utils.tester import skip_no_elena
-else:
-
-    def skip_no_elena(func):
-
-        def wrapper(*args, **kargs):
-            return func(*args, **kargs)
-
-        return wrapper
diff --git a/setup.py b/setup.py
index 6040117e6..bb72e2436 100644
--- a/setup.py
+++ b/setup.py
@@ -8,10 +8,7 @@ from setuptools import find_packages, setup
 EXT_TYPE = ''
 try:
     import torch
-    if torch.__version__ == 'parrots':
-        from parrots.utils.build_extension import BuildExtension
-        EXT_TYPE = 'parrots'
-    elif (hasattr(torch, 'is_mlu_available') and torch.is_mlu_available()) or \
+    if (hasattr(torch, 'is_mlu_available') and torch.is_mlu_available()) or \
             os.getenv('FORCE_MLU', '0') == '1':
         from torch_mlu.utils.cpp_extension import BuildExtension
         EXT_TYPE = 'pytorch'
@@ -140,261 +137,222 @@ def get_extensions():
     if os.getenv('MMCV_WITH_OPS', '1') == '0':
         return extensions
 
-    if EXT_TYPE == 'parrots':
-        ext_name = 'mmcv._ext'
-        from parrots.utils.build_extension import Extension
+    ext_name = 'mmcv._ext'
+    from torch.utils.cpp_extension import CppExtension, CUDAExtension
 
-        # new parrots op impl do not use MMCV_USE_PARROTS
-        # define_macros = [('MMCV_USE_PARROTS', None)]
-        define_macros = []
-        include_dirs = []
-        op_files = glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cu') +\
-            glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') +\
-            glob.glob('./mmcv/ops/csrc/parrots/*.cpp')
+    # prevent ninja from using too many resources
+    try:
+        import psutil
+        num_cpu = len(psutil.Process().cpu_affinity())
+        cpu_use = max(4, num_cpu - 1)
+    except (ModuleNotFoundError, AttributeError):
+        cpu_use = 4
+
+    os.environ.setdefault('MAX_JOBS', str(cpu_use))
+    define_macros = []
+
+    # Before PyTorch1.8.0, when compiling CUDA code, `cxx` is a
+    # required key passed to PyTorch. Even if there is no flag passed
+    # to cxx, users also need to pass an empty list to PyTorch.
+    # Since PyTorch1.8.0, it has a default value so users do not need
+    # to pass an empty list anymore.
+    # More details at https://github.com/pytorch/pytorch/pull/45956
+    extra_compile_args = {'cxx': []}
+
+    if platform.system() != 'Windows':
+        extra_compile_args['cxx'] = ['-std=c++14']
+    else:
+        # TODO: In Windows, C++17 is chosen to compile extensions in
+        # PyTorch2.0 , but a compile error will be reported.
+        # As a temporary solution, force the use of C++14.
+        if parse_version(torch.__version__) >= parse_version('2.0.0'):
+            extra_compile_args['cxx'] = ['/std:c++14']
+
+    include_dirs = []
+
+    extra_objects = []
+    is_rocm_pytorch = False
+    try:
+        from torch.utils.cpp_extension import ROCM_HOME
+        is_rocm_pytorch = True if ((torch.version.hip is not None) and
+                                   (ROCM_HOME is not None)) else False
+    except ImportError:
+        pass
+
+    if is_rocm_pytorch or torch.cuda.is_available() or os.getenv(
+            'FORCE_CUDA', '0') == '1':
+        if is_rocm_pytorch:
+            define_macros += [('MMCV_WITH_HIP', None)]
+        define_macros += [('MMCV_WITH_CUDA', None)]
+        cuda_args = os.getenv('MMCV_CUDA_ARGS')
+        extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
+        op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
+            glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') + \
+            glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cu') + \
+            glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cpp')
+        extension = CUDAExtension
+        include_dirs.append(os.path.abspath('./mmcv/ops/csrc/pytorch'))
         include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
         include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/cuda'))
-        op_files.remove('./mmcv/ops/csrc/pytorch/cuda/iou3d_cuda.cu')
-        op_files.remove('./mmcv/ops/csrc/pytorch/cpu/bbox_overlaps_cpu.cpp')
-        op_files.remove('./mmcv/ops/csrc/pytorch/cuda/bias_act_cuda.cu')
-        cuda_args = os.getenv('MMCV_CUDA_ARGS')
-        extra_compile_args = {
-            'nvcc': [cuda_args, '-std=c++14'] if cuda_args else ['-std=c++14'],
-            'cxx': ['-std=c++14'],
-        }
-        if torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1':
-            define_macros += [('MMCV_WITH_CUDA', None)]
-            extra_compile_args['nvcc'] += [
-                '-D__CUDA_NO_HALF_OPERATORS__',
-                '-D__CUDA_NO_HALF_CONVERSIONS__',
-                '-D__CUDA_NO_HALF2_OPERATORS__',
-            ]
-        ext_ops = Extension(
-            name=ext_name,
-            sources=op_files,
-            include_dirs=include_dirs,
-            define_macros=define_macros,
-            extra_compile_args=extra_compile_args,
-            cuda=True,
-            pytorch=True)
-        extensions.append(ext_ops)
-    elif EXT_TYPE == 'pytorch':
-        ext_name = 'mmcv._ext'
-        from torch.utils.cpp_extension import CppExtension, CUDAExtension
+    elif (hasattr(torch, 'is_mlu_available') and
+            torch.is_mlu_available()) or \
+            os.getenv('FORCE_MLU', '0') == '1':
+        from torch_mlu.utils.cpp_extension import MLUExtension
 
-        # prevent ninja from using too many resources
-        try:
-            import psutil
-            num_cpu = len(psutil.Process().cpu_affinity())
-            cpu_use = max(4, num_cpu - 1)
-        except (ModuleNotFoundError, AttributeError):
-            cpu_use = 4
+        def get_mluops_version(file_path):
+            with open(file_path) as f:
+                for line in f:
+                    if re.search('MLUOP_MAJOR', line):
+                        major = line.strip().split(' ')[2]
+                    if re.search('MLUOP_MINOR', line):
+                        minor = line.strip().split(' ')[2]
+                    if re.search('MLUOP_PATCHLEVEL', line):
+                        patchlevel = line.strip().split(' ')[2]
+            mluops_version = f'v{major}.{minor}.{patchlevel}'
+            return mluops_version
 
-        os.environ.setdefault('MAX_JOBS', str(cpu_use))
-        define_macros = []
-
-        # Before PyTorch1.8.0, when compiling CUDA code, `cxx` is a
-        # required key passed to PyTorch. Even if there is no flag passed
-        # to cxx, users also need to pass an empty list to PyTorch.
-        # Since PyTorch1.8.0, it has a default value so users do not need
-        # to pass an empty list anymore.
-        # More details at https://github.com/pytorch/pytorch/pull/45956
-        extra_compile_args = {'cxx': []}
-
-        if platform.system() != 'Windows':
-            extra_compile_args['cxx'] = ['-std=c++14']
+        mmcv_mluops_version = get_mluops_version(
+            './mmcv/ops/csrc/pytorch/mlu/mlu_common_helper.h')
+        mlu_ops_path = os.getenv('MMCV_MLU_OPS_PATH')
+        if mlu_ops_path:
+            exists_mluops_version = get_mluops_version(mlu_ops_path +
+                                                       '/bangc-ops/mlu_op.h')
+            if exists_mluops_version != mmcv_mluops_version:
+                print('the version of mlu-ops provided is %s,'
+                      ' while %s is needed.' %
+                      (exists_mluops_version, mmcv_mluops_version))
+                exit()
+            try:
+                if os.path.exists('mlu-ops'):
+                    if os.path.islink('mlu-ops'):
+                        os.remove('mlu-ops')
+                        os.symlink(mlu_ops_path, 'mlu-ops')
+                    elif os.path.abspath('mlu-ops') != mlu_ops_path:
+                        os.symlink(mlu_ops_path, 'mlu-ops')
+                else:
+                    os.symlink(mlu_ops_path, 'mlu-ops')
+            except Exception:
+                raise FileExistsError(
+                    'mlu-ops already exists, please move it out,'
+                    'or rename or remove it.')
         else:
-            # TODO: In Windows, C++17 is chosen to compile extensions in
-            # PyTorch2.0 , but a compile error will be reported.
-            # As a temporary solution, force the use of C++14.
-            if parse_version(torch.__version__) >= parse_version('2.0.0'):
-                extra_compile_args['cxx'] = ['/std:c++14']
+            if not os.path.exists('mlu-ops'):
+                import requests
+                mluops_url = 'https://github.com/Cambricon/mlu-ops/' + \
+                    'archive/refs/tags/' + mmcv_mluops_version + '.zip'
+                req = requests.get(mluops_url)
+                with open('./mlu-ops.zip', 'wb') as f:
+                    try:
+                        f.write(req.content)
+                    except Exception:
+                        raise ImportError('failed to download mlu-ops')
 
-        include_dirs = []
-
-        extra_objects = []
-        is_rocm_pytorch = False
-        try:
-            from torch.utils.cpp_extension import ROCM_HOME
-            is_rocm_pytorch = True if ((torch.version.hip is not None) and
-                                       (ROCM_HOME is not None)) else False
-        except ImportError:
-            pass
-
-        if is_rocm_pytorch or torch.cuda.is_available() or os.getenv(
-                'FORCE_CUDA', '0') == '1':
-            if is_rocm_pytorch:
-                define_macros += [('MMCV_WITH_HIP', None)]
-            define_macros += [('MMCV_WITH_CUDA', None)]
-            cuda_args = os.getenv('MMCV_CUDA_ARGS')
-            extra_compile_args['nvcc'] = [cuda_args] if cuda_args else []
-            op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
-                glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') + \
-                glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cu') + \
-                glob.glob('./mmcv/ops/csrc/pytorch/cuda/*.cpp')
-            extension = CUDAExtension
-            include_dirs.append(os.path.abspath('./mmcv/ops/csrc/pytorch'))
-            include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
-            include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/cuda'))
-        elif (hasattr(torch, 'is_mlu_available') and
-                torch.is_mlu_available()) or \
-                os.getenv('FORCE_MLU', '0') == '1':
-            from torch_mlu.utils.cpp_extension import MLUExtension
-
-            def get_mluops_version(file_path):
-                with open(file_path) as f:
-                    for line in f:
-                        if re.search('MLUOP_MAJOR', line):
-                            major = line.strip().split(' ')[2]
-                        if re.search('MLUOP_MINOR', line):
-                            minor = line.strip().split(' ')[2]
-                        if re.search('MLUOP_PATCHLEVEL', line):
-                            patchlevel = line.strip().split(' ')[2]
-                mluops_version = f'v{major}.{minor}.{patchlevel}'
-                return mluops_version
-
-            mmcv_mluops_version = get_mluops_version(
-                './mmcv/ops/csrc/pytorch/mlu/mlu_common_helper.h')
-            mlu_ops_path = os.getenv('MMCV_MLU_OPS_PATH')
-            if mlu_ops_path:
+                from zipfile import BadZipFile, ZipFile
+                with ZipFile('./mlu-ops.zip', 'r') as archive:
+                    try:
+                        archive.extractall()
+                        dir_name = archive.namelist()[0].split('/')[0]
+                        os.rename(dir_name, 'mlu-ops')
+                    except BadZipFile:
+                        print('invalid mlu-ops.zip file')
+            else:
                 exists_mluops_version = get_mluops_version(
-                    mlu_ops_path + '/bangc-ops/mlu_op.h')
+                    './mlu-ops/bangc-ops/mlu_op.h')
                 if exists_mluops_version != mmcv_mluops_version:
-                    print('the version of mlu-ops provided is %s,'
+                    print('the version of provided mlu-ops is %s,'
                           ' while %s is needed.' %
                           (exists_mluops_version, mmcv_mluops_version))
                     exit()
-                try:
-                    if os.path.exists('mlu-ops'):
-                        if os.path.islink('mlu-ops'):
-                            os.remove('mlu-ops')
-                            os.symlink(mlu_ops_path, 'mlu-ops')
-                        elif os.path.abspath('mlu-ops') != mlu_ops_path:
-                            os.symlink(mlu_ops_path, 'mlu-ops')
-                    else:
-                        os.symlink(mlu_ops_path, 'mlu-ops')
-                except Exception:
-                    raise FileExistsError(
-                        'mlu-ops already exists, please move it out,'
-                        'or rename or remove it.')
-            else:
-                if not os.path.exists('mlu-ops'):
-                    import requests
-                    mluops_url = 'https://github.com/Cambricon/mlu-ops/' + \
-                        'archive/refs/tags/' + mmcv_mluops_version + '.zip'
-                    req = requests.get(mluops_url)
-                    with open('./mlu-ops.zip', 'wb') as f:
-                        try:
-                            f.write(req.content)
-                        except Exception:
-                            raise ImportError('failed to download mlu-ops')
 
-                    from zipfile import BadZipFile, ZipFile
-                    with ZipFile('./mlu-ops.zip', 'r') as archive:
-                        try:
-                            archive.extractall()
-                            dir_name = archive.namelist()[0].split('/')[0]
-                            os.rename(dir_name, 'mlu-ops')
-                        except BadZipFile:
-                            print('invalid mlu-ops.zip file')
-                else:
-                    exists_mluops_version = get_mluops_version(
-                        './mlu-ops/bangc-ops/mlu_op.h')
-                    if exists_mluops_version != mmcv_mluops_version:
-                        print('the version of provided mlu-ops is %s,'
-                              ' while %s is needed.' %
-                              (exists_mluops_version, mmcv_mluops_version))
-                        exit()
+        define_macros += [('MMCV_WITH_MLU', None)]
+        mlu_args = os.getenv('MMCV_MLU_ARGS', '-DNDEBUG ')
+        mluops_includes = []
+        mluops_includes.append('-I' + os.path.abspath('./mlu-ops/bangc-ops'))
+        mluops_includes.append('-I' +
+                               os.path.abspath('./mlu-ops/bangc-ops/kernels'))
+        extra_compile_args['cncc'] = [mlu_args] + \
+            mluops_includes if mlu_args else mluops_includes
+        extra_compile_args['cxx'] += ['-fno-gnu-unique']
+        op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
+            glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') + \
+            glob.glob('./mmcv/ops/csrc/pytorch/mlu/*.cpp') + \
+            glob.glob('./mmcv/ops/csrc/common/mlu/*.mlu') + \
+            glob.glob(
+                './mlu-ops/bangc-ops/core/**/*.cpp', recursive=True) + \
+            glob.glob(
+                './mlu-ops/bangc-ops/kernels/**/*.cpp', recursive=True) + \
+            glob.glob(
+                './mlu-ops/bangc-ops/kernels/**/*.mlu', recursive=True)
+        extra_objects = glob.glob(
+            './mlu-ops/bangc-ops/kernels/kernel_wrapper/*.o')
+        extension = MLUExtension
+        include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
+        include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/mlu'))
+        include_dirs.append(os.path.abspath('./mlu-ops/bangc-ops'))
+    elif (hasattr(torch.backends, 'mps')
+          and torch.backends.mps.is_available()) or os.getenv(
+              'FORCE_MPS', '0') == '1':
+        # objc compiler support
+        from distutils.unixccompiler import UnixCCompiler
+        if '.mm' not in UnixCCompiler.src_extensions:
+            UnixCCompiler.src_extensions.append('.mm')
+            UnixCCompiler.language_map['.mm'] = 'objc'
 
-            define_macros += [('MMCV_WITH_MLU', None)]
-            mlu_args = os.getenv('MMCV_MLU_ARGS', '-DNDEBUG ')
-            mluops_includes = []
-            mluops_includes.append('-I' +
-                                   os.path.abspath('./mlu-ops/bangc-ops'))
-            mluops_includes.append(
-                '-I' + os.path.abspath('./mlu-ops/bangc-ops/kernels'))
-            extra_compile_args['cncc'] = [mlu_args] + \
-                mluops_includes if mlu_args else mluops_includes
-            extra_compile_args['cxx'] += ['-fno-gnu-unique']
-            op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
-                glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') + \
-                glob.glob('./mmcv/ops/csrc/pytorch/mlu/*.cpp') + \
-                glob.glob('./mmcv/ops/csrc/common/mlu/*.mlu') + \
-                glob.glob(
-                    './mlu-ops/bangc-ops/core/**/*.cpp', recursive=True) + \
-                glob.glob(
-                    './mlu-ops/bangc-ops/kernels/**/*.cpp', recursive=True) + \
-                glob.glob(
-                    './mlu-ops/bangc-ops/kernels/**/*.mlu', recursive=True)
-            extra_objects = glob.glob(
-                './mlu-ops/bangc-ops/kernels/kernel_wrapper/*.o')
-            extension = MLUExtension
-            include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
-            include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/mlu'))
-            include_dirs.append(os.path.abspath('./mlu-ops/bangc-ops'))
-        elif (hasattr(torch.backends, 'mps')
-              and torch.backends.mps.is_available()) or os.getenv(
-                  'FORCE_MPS', '0') == '1':
-            # objc compiler support
-            from distutils.unixccompiler import UnixCCompiler
-            if '.mm' not in UnixCCompiler.src_extensions:
-                UnixCCompiler.src_extensions.append('.mm')
-                UnixCCompiler.language_map['.mm'] = 'objc'
+        define_macros += [('MMCV_WITH_MPS', None)]
+        extra_compile_args = {}
+        extra_compile_args['cxx'] = ['-Wall', '-std=c++17']
+        extra_compile_args['cxx'] += [
+            '-framework', 'Metal', '-framework', 'Foundation'
+        ]
+        extra_compile_args['cxx'] += ['-ObjC++']
+        # src
+        op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
+            glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') + \
+            glob.glob('./mmcv/ops/csrc/common/mps/*.mm') + \
+            glob.glob('./mmcv/ops/csrc/pytorch/mps/*.mm')
+        extension = CppExtension
+        include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
+        include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/mps'))
+    elif (os.getenv('FORCE_NPU', '0') == '1'):
+        print(f'Compiling {ext_name} only with CPU and NPU')
+        try:
+            from torch_npu.utils.cpp_extension import NpuExtension
+            define_macros += [('MMCV_WITH_NPU', None)]
+            extension = NpuExtension
+        except Exception:
+            raise ImportError('can not find any torch_npu')
+        # src
+        op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
+            glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') + \
+            glob.glob('./mmcv/ops/csrc/common/npu/*.cpp') + \
+            glob.glob('./mmcv/ops/csrc/pytorch/npu/*.cpp')
+        include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
+        include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/npu'))
+    else:
+        print(f'Compiling {ext_name} only with CPU')
+        op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
+            glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp')
+        extension = CppExtension
+        include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
 
-            define_macros += [('MMCV_WITH_MPS', None)]
-            extra_compile_args = {}
-            extra_compile_args['cxx'] = ['-Wall', '-std=c++17']
-            extra_compile_args['cxx'] += [
-                '-framework', 'Metal', '-framework', 'Foundation'
-            ]
-            extra_compile_args['cxx'] += ['-ObjC++']
-            # src
-            op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
-                glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') + \
-                glob.glob('./mmcv/ops/csrc/common/mps/*.mm') + \
-                glob.glob('./mmcv/ops/csrc/pytorch/mps/*.mm')
-            extension = CppExtension
-            include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
-            include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/mps'))
-        elif (os.getenv('FORCE_NPU', '0') == '1'):
-            print(f'Compiling {ext_name} only with CPU and NPU')
-            try:
-                from torch_npu.utils.cpp_extension import NpuExtension
-                define_macros += [('MMCV_WITH_NPU', None)]
-                extension = NpuExtension
-            except Exception:
-                raise ImportError('can not find any torch_npu')
-            # src
-            op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
-                glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp') + \
-                glob.glob('./mmcv/ops/csrc/common/npu/*.cpp') + \
-                glob.glob('./mmcv/ops/csrc/pytorch/npu/*.cpp')
-            include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
-            include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common/npu'))
-        else:
-            print(f'Compiling {ext_name} only with CPU')
-            op_files = glob.glob('./mmcv/ops/csrc/pytorch/*.cpp') + \
-                glob.glob('./mmcv/ops/csrc/pytorch/cpu/*.cpp')
-            extension = CppExtension
-            include_dirs.append(os.path.abspath('./mmcv/ops/csrc/common'))
+    # Since the PR (https://github.com/open-mmlab/mmcv/pull/1463) uses
+    # c++14 features, the argument ['std=c++14'] must be added here.
+    # However, in the windows environment, some standard libraries
+    # will depend on c++17 or higher. In fact, for the windows
+    # environment, the compiler will choose the appropriate compiler
+    # to compile those cpp files, so there is no need to add the
+    # argument
+    if 'nvcc' in extra_compile_args and platform.system() != 'Windows':
+        extra_compile_args['nvcc'] += ['-std=c++14']
 
-        # Since the PR (https://github.com/open-mmlab/mmcv/pull/1463) uses
-        # c++14 features, the argument ['std=c++14'] must be added here.
-        # However, in the windows environment, some standard libraries
-        # will depend on c++17 or higher. In fact, for the windows
-        # environment, the compiler will choose the appropriate compiler
-        # to compile those cpp files, so there is no need to add the
-        # argument
-        if 'nvcc' in extra_compile_args and platform.system() != 'Windows':
-            extra_compile_args['nvcc'] += ['-std=c++14']
-
-        ext_ops = extension(
-            name=ext_name,
-            sources=op_files,
-            include_dirs=include_dirs,
-            define_macros=define_macros,
-            extra_objects=extra_objects,
-            extra_compile_args=extra_compile_args)
-        extensions.append(ext_ops)
+    ext_ops = extension(
+        name=ext_name,
+        sources=op_files,
+        include_dirs=include_dirs,
+        define_macros=define_macros,
+        extra_objects=extra_objects,
+        extra_compile_args=extra_compile_args)
+    extensions.append(ext_ops)
     return extensions
 
 
diff --git a/tests/test_cnn/test_build_layers.py b/tests/test_cnn/test_build_layers.py
index c8903ac40..67264309b 100644
--- a/tests/test_cnn/test_build_layers.py
+++ b/tests/test_cnn/test_build_layers.py
@@ -6,7 +6,7 @@ import pytest
 import torch
 import torch.nn as nn
 from mmengine.registry import MODELS
-from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm
+from torch.nn.modules.batchnorm import _BatchNorm
 
 from mmcv.cnn.bricks import (build_activation_layer, build_conv_layer,
                              build_norm_layer, build_padding_layer,
diff --git a/tests/test_cnn/test_conv_module.py b/tests/test_cnn/test_conv_module.py
index d31167a74..a6fa48754 100644
--- a/tests/test_cnn/test_conv_module.py
+++ b/tests/test_cnn/test_conv_module.py
@@ -142,8 +142,7 @@ def test_conv_module():
 
     # HSwish
     conv = ConvModule(3, 8, 3, padding=1, act_cfg=dict(type='HSwish'))
-    if (TORCH_VERSION == 'parrots'
-            or digit_version(TORCH_VERSION) < digit_version('1.7')):
+    if digit_version(TORCH_VERSION) < digit_version('1.7'):
         assert isinstance(conv.activate, HSwish)
     else:
         assert isinstance(conv.activate, nn.Hardswish)
diff --git a/tests/test_cnn/test_non_local.py b/tests/test_cnn/test_non_local.py
index 25d788339..bffdd55e6 100644
--- a/tests/test_cnn/test_non_local.py
+++ b/tests/test_cnn/test_non_local.py
@@ -25,29 +25,18 @@ def test_nonlocal3d():
     # NonLocal3d with 'embedded_gaussian' mode
     imgs = torch.randn(2, 3, 10, 20, 20)
     nonlocal_3d = NonLocal3d(3)
-    if torch.__version__ == 'parrots':
-        if torch.cuda.is_available():
-            # NonLocal is only implemented on gpu in parrots
-            imgs = imgs.cuda()
-            nonlocal_3d.cuda()
     out = nonlocal_3d(imgs)
     assert out.shape == imgs.shape
 
     # NonLocal3d with 'dot_product' mode
     nonlocal_3d = NonLocal3d(3, mode='dot_product')
     assert nonlocal_3d.mode == 'dot_product'
-    if torch.__version__ == 'parrots':
-        if torch.cuda.is_available():
-            nonlocal_3d.cuda()
     out = nonlocal_3d(imgs)
     assert out.shape == imgs.shape
 
     # NonLocal3d with 'concatenation' mode
     nonlocal_3d = NonLocal3d(3, mode='concatenation')
     assert nonlocal_3d.mode == 'concatenation'
-    if torch.__version__ == 'parrots':
-        if torch.cuda.is_available():
-            nonlocal_3d.cuda()
     out = nonlocal_3d(imgs)
     assert out.shape == imgs.shape
 
@@ -55,9 +44,6 @@ def test_nonlocal3d():
     nonlocal_3d = NonLocal3d(3, mode='gaussian')
     assert not hasattr(nonlocal_3d, 'phi')
     assert nonlocal_3d.mode == 'gaussian'
-    if torch.__version__ == 'parrots':
-        if torch.cuda.is_available():
-            nonlocal_3d.cuda()
     out = nonlocal_3d(imgs)
     assert out.shape == imgs.shape
 
@@ -67,9 +53,6 @@ def test_nonlocal3d():
     assert isinstance(nonlocal_3d.g[1], nn.MaxPool3d)
     assert nonlocal_3d.g[1].kernel_size == (1, 2, 2)
     assert isinstance(nonlocal_3d.phi, nn.MaxPool3d)
-    if torch.__version__ == 'parrots':
-        if torch.cuda.is_available():
-            nonlocal_3d.cuda()
     out = nonlocal_3d(imgs)
     assert out.shape == imgs.shape
 
@@ -79,9 +62,6 @@ def test_nonlocal3d():
         assert isinstance(m, nn.Sequential) and len(m) == 2
         assert isinstance(m[1], nn.MaxPool3d)
         assert m[1].kernel_size == (1, 2, 2)
-    if torch.__version__ == 'parrots':
-        if torch.cuda.is_available():
-            nonlocal_3d.cuda()
     out = nonlocal_3d(imgs)
     assert out.shape == imgs.shape
 
@@ -90,30 +70,18 @@ def test_nonlocal2d():
     # NonLocal2d with 'embedded_gaussian' mode
     imgs = torch.randn(2, 3, 20, 20)
     nonlocal_2d = NonLocal2d(3)
-    if torch.__version__ == 'parrots':
-        if torch.cuda.is_available():
-            imgs = imgs.cuda()
-            nonlocal_2d.cuda()
     out = nonlocal_2d(imgs)
     assert out.shape == imgs.shape
 
     # NonLocal2d with 'dot_product' mode
     imgs = torch.randn(2, 3, 20, 20)
     nonlocal_2d = NonLocal2d(3, mode='dot_product')
-    if torch.__version__ == 'parrots':
-        if torch.cuda.is_available():
-            imgs = imgs.cuda()
-            nonlocal_2d.cuda()
     out = nonlocal_2d(imgs)
     assert out.shape == imgs.shape
 
     # NonLocal2d with 'concatenation' mode
     imgs = torch.randn(2, 3, 20, 20)
     nonlocal_2d = NonLocal2d(3, mode='concatenation')
-    if torch.__version__ == 'parrots':
-        if torch.cuda.is_available():
-            imgs = imgs.cuda()
-            nonlocal_2d.cuda()
     out = nonlocal_2d(imgs)
     assert out.shape == imgs.shape
 
@@ -121,10 +89,6 @@ def test_nonlocal2d():
     imgs = torch.randn(2, 3, 20, 20)
     nonlocal_2d = NonLocal2d(3, mode='gaussian')
     assert not hasattr(nonlocal_2d, 'phi')
-    if torch.__version__ == 'parrots':
-        if torch.cuda.is_available():
-            imgs = imgs.cuda()
-            nonlocal_2d.cuda()
     out = nonlocal_2d(imgs)
     assert out.shape == imgs.shape
 
@@ -134,9 +98,6 @@ def test_nonlocal2d():
     assert isinstance(nonlocal_2d.g[1], nn.MaxPool2d)
     assert nonlocal_2d.g[1].kernel_size == (2, 2)
     assert isinstance(nonlocal_2d.phi, nn.MaxPool2d)
-    if torch.__version__ == 'parrots':
-        if torch.cuda.is_available():
-            nonlocal_2d.cuda()
     out = nonlocal_2d(imgs)
     assert out.shape == imgs.shape
 
@@ -146,9 +107,6 @@ def test_nonlocal2d():
         assert isinstance(m, nn.Sequential) and len(m) == 2
         assert isinstance(m[1], nn.MaxPool2d)
         assert m[1].kernel_size == (2, 2)
-    if torch.__version__ == 'parrots':
-        if torch.cuda.is_available():
-            nonlocal_2d.cuda()
     out = nonlocal_2d(imgs)
     assert out.shape == imgs.shape
 
@@ -157,30 +115,18 @@ def test_nonlocal1d():
     # NonLocal1d with 'embedded_gaussian' mode
     imgs = torch.randn(2, 3, 20)
     nonlocal_1d = NonLocal1d(3)
-    if torch.__version__ == 'parrots':
-        if torch.cuda.is_available():
-            imgs = imgs.cuda()
-            nonlocal_1d.cuda()
     out = nonlocal_1d(imgs)
     assert out.shape == imgs.shape
 
     # NonLocal1d with 'dot_product' mode
     imgs = torch.randn(2, 3, 20)
     nonlocal_1d = NonLocal1d(3, mode='dot_product')
-    if torch.__version__ == 'parrots':
-        if torch.cuda.is_available():
-            imgs = imgs.cuda()
-            nonlocal_1d.cuda()
     out = nonlocal_1d(imgs)
     assert out.shape == imgs.shape
 
     # NonLocal1d with 'concatenation' mode
     imgs = torch.randn(2, 3, 20)
     nonlocal_1d = NonLocal1d(3, mode='concatenation')
-    if torch.__version__ == 'parrots':
-        if torch.cuda.is_available():
-            imgs = imgs.cuda()
-            nonlocal_1d.cuda()
     out = nonlocal_1d(imgs)
     assert out.shape == imgs.shape
 
@@ -188,10 +134,6 @@ def test_nonlocal1d():
     imgs = torch.randn(2, 3, 20)
     nonlocal_1d = NonLocal1d(3, mode='gaussian')
     assert not hasattr(nonlocal_1d, 'phi')
-    if torch.__version__ == 'parrots':
-        if torch.cuda.is_available():
-            imgs = imgs.cuda()
-            nonlocal_1d.cuda()
     out = nonlocal_1d(imgs)
     assert out.shape == imgs.shape
 
@@ -201,9 +143,6 @@ def test_nonlocal1d():
     assert isinstance(nonlocal_1d.g[1], nn.MaxPool1d)
     assert nonlocal_1d.g[1].kernel_size == 2
     assert isinstance(nonlocal_1d.phi, nn.MaxPool1d)
-    if torch.__version__ == 'parrots':
-        if torch.cuda.is_available():
-            nonlocal_1d.cuda()
     out = nonlocal_1d(imgs)
     assert out.shape == imgs.shape
 
@@ -213,8 +152,5 @@ def test_nonlocal1d():
         assert isinstance(m, nn.Sequential) and len(m) == 2
         assert isinstance(m[1], nn.MaxPool1d)
         assert m[1].kernel_size == 2
-    if torch.__version__ == 'parrots':
-        if torch.cuda.is_available():
-            nonlocal_1d.cuda()
     out = nonlocal_1d(imgs)
     assert out.shape == imgs.shape
diff --git a/tests/test_cnn/test_wrappers.py b/tests/test_cnn/test_wrappers.py
index 02e0f13cd..d4aaab280 100644
--- a/tests/test_cnn/test_wrappers.py
+++ b/tests/test_cnn/test_wrappers.py
@@ -8,10 +8,7 @@ import torch.nn as nn
 from mmcv.cnn.bricks import (Conv2d, Conv3d, ConvTranspose2d, ConvTranspose3d,
                              Linear, MaxPool2d, MaxPool3d)
 
-if torch.__version__ != 'parrots':
-    torch_version = '1.1'
-else:
-    torch_version = 'parrots'
+torch_version = '1.1'
 
 
 @patch('torch.__version__', torch_version)
@@ -139,8 +136,6 @@ def test_conv_transposed_2d(in_w, in_h, in_channel, out_channel, kernel_size,
     x_empty = torch.randn(0, in_channel, in_h, in_w, requires_grad=True)
     # out padding must be smaller than either stride or dilation
     op = min(stride, dilation) - 1
-    if torch.__version__ == 'parrots':
-        op = 0
     torch.manual_seed(0)
     wrapper = ConvTranspose2d(
         in_channel,
@@ -273,24 +268,17 @@ def test_max_pool_2d(in_w, in_h, in_channel, out_channel, kernel_size, stride,
 @pytest.mark.parametrize(
     'in_w,in_h,in_t,in_channel,out_channel,kernel_size,stride,padding,dilation',  # noqa: E501
     [(10, 10, 10, 1, 1, 3, 1, 0, 1), (20, 20, 20, 3, 3, 5, 2, 1, 2)])
-@pytest.mark.skipif(
-    torch.__version__ == 'parrots' and not torch.cuda.is_available(),
-    reason='parrots requires CUDA support')
 def test_max_pool_3d(in_w, in_h, in_t, in_channel, out_channel, kernel_size,
                      stride, padding, dilation):
     # wrapper op with 0-dim input
     x_empty = torch.randn(0, in_channel, in_t, in_h, in_w, requires_grad=True)
     wrapper = MaxPool3d(
         kernel_size, stride=stride, padding=padding, dilation=dilation)
-    if torch.__version__ == 'parrots':
-        x_empty = x_empty.cuda()
     wrapper_out = wrapper(x_empty)
     # torch op with 3-dim input as shape reference
     x_normal = torch.randn(3, in_channel, in_t, in_h, in_w)
     ref = nn.MaxPool3d(
         kernel_size, stride=stride, padding=padding, dilation=dilation)
-    if torch.__version__ == 'parrots':
-        x_normal = x_normal.cuda()
     ref_out = ref(x_normal)
 
     assert wrapper_out.shape[0] == 0
diff --git a/tests/test_image/test_io.py b/tests/test_image/test_io.py
index 6742924f2..26e8ac592 100644
--- a/tests/test_image/test_io.py
+++ b/tests/test_image/test_io.py
@@ -10,15 +10,11 @@ import cv2
 import mmengine
 import numpy as np
 import pytest
-import torch
 from mmengine.fileio.file_client import HTTPBackend, PetrelBackend
 from numpy.testing import assert_allclose, assert_array_equal
 
 import mmcv
 
-if torch.__version__ == 'parrots':
-    pytest.skip('not necessary in parrots test', allow_module_level=True)
-
 
 class TestIO:
 
diff --git a/tests/test_ops/test_bias_act.py b/tests/test_ops/test_bias_act.py
index 01b57c4ae..d8d02348c 100644
--- a/tests/test_ops/test_bias_act.py
+++ b/tests/test_ops/test_bias_act.py
@@ -1,17 +1,11 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import pytest
 import torch
+from torch.autograd import gradcheck, gradgradcheck
 
 from mmcv.ops import bias_act
 from mmcv.ops.bias_act import EasyDict
 
-_USING_PARROTS = True
-try:
-    from parrots.autograd import gradcheck
-except ImportError:
-    from torch.autograd import gradcheck, gradgradcheck
-    _USING_PARROTS = False
-
 
 class TestBiasAct:
 
@@ -65,21 +59,15 @@ class TestBiasAct:
 
     @pytest.mark.skipif(not torch.cuda.is_available(), reason='requires cuda')
     def test_bias_act_cuda(self):
-        if _USING_PARROTS:
-            gradcheck(
-                bias_act, (self.input_tensor.cuda(), self.bias.cuda()),
-                delta=1e-4,
-                pt_atol=1e-3)
-        else:
-            gradcheck(
-                bias_act, (self.input_tensor.cuda(), self.bias.cuda()),
-                eps=1e-4,
-                atol=1e-3)
+        gradcheck(
+            bias_act, (self.input_tensor.cuda(), self.bias.cuda()),
+            eps=1e-4,
+            atol=1e-3)
 
-            gradgradcheck(
-                bias_act, (self.input_tensor.cuda(), self.bias.cuda()),
-                eps=1e-4,
-                atol=1e-3)
+        gradgradcheck(
+            bias_act, (self.input_tensor.cuda(), self.bias.cuda()),
+            eps=1e-4,
+            atol=1e-3)
 
         out = bias_act(self.input_tensor.cuda(), self.bias.cuda())
         assert out.shape == (1, 3)
diff --git a/tests/test_ops/test_deform_conv.py b/tests/test_ops/test_deform_conv.py
index 64dcccfde..58825b850 100644
--- a/tests/test_ops/test_deform_conv.py
+++ b/tests/test_ops/test_deform_conv.py
@@ -193,8 +193,7 @@ class TestDeformconv:
 
         # test amp when torch version >= '1.6.0', the type of
         # input data for deformconv might be torch.float or torch.half
-        if (TORCH_VERSION != 'parrots'
-                and digit_version(TORCH_VERSION) >= digit_version('1.6.0')):
+        if digit_version(TORCH_VERSION) >= digit_version('1.6.0'):
             with autocast(enabled=True):
                 self._test_amp_deformconv(torch.float, 1e-1)
                 self._test_amp_deformconv(torch.half, 1e-1)
diff --git a/tests/test_ops/test_deform_roi_pool.py b/tests/test_ops/test_deform_roi_pool.py
index 346301fe4..0ab1e33ab 100644
--- a/tests/test_ops/test_deform_roi_pool.py
+++ b/tests/test_ops/test_deform_roi_pool.py
@@ -4,16 +4,10 @@ import os
 import numpy as np
 import pytest
 import torch
+from torch.autograd import gradcheck
 
 from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE, IS_NPU_AVAILABLE
 
-_USING_PARROTS = True
-try:
-    from parrots.autograd import gradcheck
-except ImportError:
-    from torch.autograd import gradcheck
-    _USING_PARROTS = False
-
 cur_dir = os.path.dirname(os.path.abspath(__file__))
 
 inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]),
@@ -63,10 +57,7 @@ class TestDeformRoIPool:
                                          spatial_scale=spatial_scale,
                                          sampling_ratio=sampling_ratio).cuda()
 
-            if _USING_PARROTS:
-                gradcheck(droipool, (x, rois), no_grads=[rois])
-            else:
-                gradcheck(droipool, (x, rois), eps=1e-2, atol=1e-2)
+            gradcheck(droipool, (x, rois), eps=1e-2, atol=1e-2)
 
     def test_modulated_deform_roi_pool_gradcheck(self):
         if not torch.cuda.is_available():
@@ -92,10 +83,7 @@ class TestDeformRoIPool:
                 spatial_scale=spatial_scale,
                 sampling_ratio=sampling_ratio).cuda()
 
-            if _USING_PARROTS:
-                gradcheck(droipool, (x, rois), no_grads=[rois])
-            else:
-                gradcheck(droipool, (x, rois), eps=1e-2, atol=1e-2)
+            gradcheck(droipool, (x, rois), eps=1e-2, atol=1e-2)
 
     def _test_deform_roi_pool_allclose(self, device, dtype=torch.float):
         from mmcv.ops import DeformRoIPoolPack
diff --git a/tests/test_ops/test_focal_loss.py b/tests/test_ops/test_focal_loss.py
index ee7c9861a..81390f2f4 100644
--- a/tests/test_ops/test_focal_loss.py
+++ b/tests/test_ops/test_focal_loss.py
@@ -2,16 +2,10 @@
 import numpy as np
 import pytest
 import torch
+from torch.autograd import gradcheck
 
 from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE, IS_NPU_AVAILABLE
 
-_USING_PARROTS = True
-try:
-    from parrots.autograd import gradcheck
-except ImportError:
-    from torch.autograd import gradcheck
-    _USING_PARROTS = False
-
 # torch.set_printoptions(precision=8, threshold=100)
 
 inputs = [
@@ -94,12 +88,7 @@ class Testfocalloss:
             y = torch.from_numpy(np_y).cuda().long()
 
             floss = SoftmaxFocalLoss(gamma, alpha)
-            if _USING_PARROTS:
-                # gradcheck(floss, (x, y),
-                #           no_grads=[y])
-                pass
-            else:
-                gradcheck(floss, (x, y), eps=1e-2, atol=1e-2)
+            gradcheck(floss, (x, y), eps=1e-2, atol=1e-2)
 
     def _test_grad_sigmoid(self, dtype=torch.float):
         if not torch.cuda.is_available():
@@ -116,12 +105,7 @@ class Testfocalloss:
             y = torch.from_numpy(np_y).cuda().long()
 
             floss = SigmoidFocalLoss(gamma, alpha)
-            if _USING_PARROTS:
-                # gradcheck(floss, (x, y),
-                #           no_grads=[y])
-                pass
-            else:
-                gradcheck(floss, (x, y), eps=1e-2, atol=1e-2)
+            gradcheck(floss, (x, y), eps=1e-2, atol=1e-2)
 
     def test_softmax_float(self):
         self._test_softmax(dtype=torch.float)
diff --git a/tests/test_ops/test_fused_bias_leakyrelu.py b/tests/test_ops/test_fused_bias_leakyrelu.py
index e6f6fb9f7..1b84e70c9 100644
--- a/tests/test_ops/test_fused_bias_leakyrelu.py
+++ b/tests/test_ops/test_fused_bias_leakyrelu.py
@@ -1,16 +1,10 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import pytest
 import torch
+from torch.autograd import gradcheck, gradgradcheck
 
 from mmcv.utils import IS_CUDA_AVAILABLE, IS_NPU_AVAILABLE
 
-_USING_PARROTS = True
-try:
-    from parrots.autograd import gradcheck
-except ImportError:
-    from torch.autograd import gradcheck, gradgradcheck
-    _USING_PARROTS = False
-
 
 class TestFusedBiasLeakyReLU:
 
@@ -40,19 +34,11 @@ class TestFusedBiasLeakyReLU:
     def test_gradient(self, device):
 
         from mmcv.ops import FusedBiasLeakyReLU
-        if _USING_PARROTS:
-            if IS_CUDA_AVAILABLE:
-                gradcheck(
-                    FusedBiasLeakyReLU(2).cuda(),
-                    self.input_tensor,
-                    delta=1e-4,
-                    pt_atol=1e-3)
-        else:
-            gradcheck(
-                FusedBiasLeakyReLU(2).to(device),
-                self.input_tensor,
-                eps=1e-4,
-                atol=1e-3)
+        gradcheck(
+            FusedBiasLeakyReLU(2).to(device),
+            self.input_tensor,
+            eps=1e-4,
+            atol=1e-3)
 
     @pytest.mark.parametrize('device', [
         pytest.param(
diff --git a/tests/test_ops/test_modulated_deform_conv.py b/tests/test_ops/test_modulated_deform_conv.py
index ee29e73eb..0575e0db9 100644
--- a/tests/test_ops/test_modulated_deform_conv.py
+++ b/tests/test_ops/test_modulated_deform_conv.py
@@ -120,8 +120,7 @@ class TestMdconv:
 
         # test amp when torch version >= '1.6.0', the type of
         # input data for mdconv might be torch.float or torch.half
-        if (TORCH_VERSION != 'parrots'
-                and digit_version(TORCH_VERSION) >= digit_version('1.6.0')):
+        if digit_version(TORCH_VERSION) >= digit_version('1.6.0'):
             with autocast(enabled=True):
                 self._test_amp_mdconv(torch.float)
                 self._test_amp_mdconv(torch.half)
diff --git a/tests/test_ops/test_ms_deformable_attn.py b/tests/test_ops/test_ms_deformable_attn.py
index 8e9f1af8c..37ac26030 100644
--- a/tests/test_ops/test_ms_deformable_attn.py
+++ b/tests/test_ops/test_ms_deformable_attn.py
@@ -1,20 +1,14 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import pytest
 import torch
+from torch.autograd import gradcheck
 
 from mmcv.ops.multi_scale_deform_attn import (
     MultiScaleDeformableAttention, MultiScaleDeformableAttnFunction,
     multi_scale_deformable_attn_pytorch)
 from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
 
-_USING_PARROTS = True
 _IS_AUTOCAST_AVAILABLE = True
-try:
-    from parrots.autograd import gradcheck
-except ImportError:
-    from torch.autograd import gradcheck
-    _USING_PARROTS = False
-
 try:
     # If PyTorch version >= 1.6.0 and fp16 is enabled, torch.cuda.amp.autocast
     # would be imported and used; we should test if our modules support it.
@@ -289,17 +283,10 @@ def test_gradient_numerical(channels,
     elif device == 'mlu':
         dtype = torch.float
         eps = 1e-4
-    if _USING_PARROTS:
-        assert gradcheck(
-            func, (value.to(dtype), shapes, level_start_index,
-                   sampling_locations.to(dtype), attention_weights.to(dtype),
-                   im2col_step),
-            no_grads=[shapes, level_start_index],
-            eps=eps)
-    else:
-        assert gradcheck(
-            func, (value.to(dtype), shapes, level_start_index,
-                   sampling_locations.to(dtype), attention_weights.to(dtype),
-                   im2col_step),
-            eps=eps,
-            atol=1e-2)
+
+    assert gradcheck(
+        func, (value.to(dtype), shapes, level_start_index,
+               sampling_locations.to(dtype), attention_weights.to(dtype),
+               im2col_step),
+        eps=eps,
+        atol=1e-2)
diff --git a/tests/test_ops/test_nms.py b/tests/test_ops/test_nms.py
index 9f1ac65d6..7662c1795 100644
--- a/tests/test_ops/test_nms.py
+++ b/tests/test_ops/test_nms.py
@@ -94,19 +94,18 @@ class Testnms:
             assert np.allclose(dets.cpu().numpy(), np_output[m]['dets'])
             assert np.allclose(inds.cpu().numpy(), np_output[m]['inds'])
 
-        if torch.__version__ != 'parrots':
-            boxes = boxes.cuda()
-            scores = scores.cuda()
-            for iou, sig, mscore, m in configs:
-                dets, inds = soft_nms(
-                    boxes,
-                    scores,
-                    iou_threshold=iou,
-                    sigma=sig,
-                    min_score=mscore,
-                    method=m)
-                assert np.allclose(dets.cpu().numpy(), np_output[m]['dets'])
-                assert np.allclose(inds.cpu().numpy(), np_output[m]['inds'])
+        boxes = boxes.cuda()
+        scores = scores.cuda()
+        for iou, sig, mscore, m in configs:
+            dets, inds = soft_nms(
+                boxes,
+                scores,
+                iou_threshold=iou,
+                sigma=sig,
+                min_score=mscore,
+                method=m)
+            assert np.allclose(dets.cpu().numpy(), np_output[m]['dets'])
+            assert np.allclose(inds.cpu().numpy(), np_output[m]['inds'])
 
     def test_nms_match(self):
         if not torch.cuda.is_available():
diff --git a/tests/test_ops/test_onnx.py b/tests/test_ops/test_onnx.py
index 1058fa765..bc73b196d 100644
--- a/tests/test_ops/test_onnx.py
+++ b/tests/test_ops/test_onnx.py
@@ -8,8 +8,6 @@ import torch
 import torch.nn as nn
 
 onnx_file = 'tmp.onnx'
-if torch.__version__ == 'parrots':
-    pytest.skip('not supported in parrots now', allow_module_level=True)
 
 
 @pytest.fixture(autouse=True)
diff --git a/tests/test_ops/test_prroi_pool.py b/tests/test_ops/test_prroi_pool.py
index 0535dfbe2..290c18af5 100644
--- a/tests/test_ops/test_prroi_pool.py
+++ b/tests/test_ops/test_prroi_pool.py
@@ -2,17 +2,10 @@
 import numpy as np
 import pytest
 import torch
+from torch.autograd import gradcheck
 
 from mmcv.utils import IS_CUDA_AVAILABLE
 
-_USING_PARROTS = True
-try:
-    from parrots.autograd import gradcheck
-except ImportError:
-    from torch.autograd import gradcheck
-
-    _USING_PARROTS = False
-
 inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]),
           ([[[[1., 2.], [3., 4.]], [[4., 3.], [2.,
                                                1.]]]], [[0., 0., 0., 1., 1.]]),
@@ -58,10 +51,7 @@ class TestPrRoiPool:
 
             froipool = PrRoIPool((pool_h, pool_w), spatial_scale)
 
-            if _USING_PARROTS:
-                gradcheck(froipool, (x, rois), no_grads=[rois])
-            else:
-                gradcheck(froipool, (x, rois), eps=1e-2, atol=1e-2)
+            gradcheck(froipool, (x, rois), eps=1e-2, atol=1e-2)
 
     def _test_roipool_allclose(self, device, dtype=torch.float):
         from mmcv.ops import prroi_pool
diff --git a/tests/test_ops/test_riroi_align_rotated.py b/tests/test_ops/test_riroi_align_rotated.py
index c7b501cf4..3347cfb58 100644
--- a/tests/test_ops/test_riroi_align_rotated.py
+++ b/tests/test_ops/test_riroi_align_rotated.py
@@ -2,16 +2,10 @@
 import numpy as np
 import pytest
 import torch
+from torch.autograd import gradcheck
 
 from mmcv.ops import RiRoIAlignRotated
 
-if torch.__version__ == 'parrots':
-    from parrots.autograd import gradcheck
-    _USING_PARROTS = True
-else:
-    from torch.autograd import gradcheck
-    _USING_PARROTS = False
-
 np_feature = np.array([[[[1, 2], [3, 4]], [[1, 2], [4, 3]], [[4, 3], [2, 1]],
                         [[1, 2], [5, 6]], [[3, 4], [7, 8]], [[9, 10], [13,
                                                                        14]],
@@ -61,11 +55,7 @@ def test_roialign_rotated_gradcheck():
     rois = torch.tensor(np_rois, dtype=torch.float, device='cuda')
     froipool = RiRoIAlignRotated((pool_h, pool_w), spatial_scale, num_samples,
                                  num_orientations, clockwise)
-    if _USING_PARROTS:
-        gradcheck(
-            froipool, (x, rois), no_grads=[rois], delta=1e-3, pt_atol=1e-3)
-    else:
-        gradcheck(froipool, (x, rois), eps=1e-3, atol=1e-3)
+    gradcheck(froipool, (x, rois), eps=1e-3, atol=1e-3)
 
 
 @pytest.mark.skipif(
diff --git a/tests/test_ops/test_roi_align.py b/tests/test_ops/test_roi_align.py
index 6caf5c535..ccbbeef3f 100644
--- a/tests/test_ops/test_roi_align.py
+++ b/tests/test_ops/test_roi_align.py
@@ -2,16 +2,10 @@
 import numpy as np
 import pytest
 import torch
+from torch.autograd import gradcheck
 
 from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
 
-_USING_PARROTS = True
-try:
-    from parrots.autograd import gradcheck
-except ImportError:
-    from torch.autograd import gradcheck
-    _USING_PARROTS = False
-
 # yapf:disable
 
 inputs = [([[[[1., 2.], [3., 4.]]]],
@@ -58,11 +52,7 @@ def _test_roialign_gradcheck(device, dtype):
 
         froipool = RoIAlign((pool_h, pool_w), spatial_scale, sampling_ratio)
 
-        if torch.__version__ == 'parrots':
-            gradcheck(
-                froipool, (x, rois), no_grads=[rois], delta=1e-5, pt_atol=1e-5)
-        else:
-            gradcheck(froipool, (x, rois), eps=1e-5, atol=1e-5)
+        gradcheck(froipool, (x, rois), eps=1e-5, atol=1e-5)
 
 
 def _test_roialign_allclose(device, dtype):
diff --git a/tests/test_ops/test_roi_align_rotated.py b/tests/test_ops/test_roi_align_rotated.py
index 1ad6b6e92..e77c7fda9 100644
--- a/tests/test_ops/test_roi_align_rotated.py
+++ b/tests/test_ops/test_roi_align_rotated.py
@@ -2,16 +2,10 @@
 import numpy as np
 import pytest
 import torch
+from torch.autograd import gradcheck
 
 from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
 
-_USING_PARROTS = True
-try:
-    from parrots.autograd import gradcheck
-except ImportError:
-    from torch.autograd import gradcheck
-    _USING_PARROTS = False
-
 # yapf:disable
 inputs = [([[[[1., 2.], [3., 4.]]]],
            [[0., 0.5, 0.5, 1., 1., 0]]),
@@ -69,11 +63,7 @@ def _test_roialign_rotated_gradcheck(device, dtype):
 
         froipool = RoIAlignRotated((pool_h, pool_w), spatial_scale,
                                    sampling_ratio)
-        if torch.__version__ == 'parrots':
-            gradcheck(
-                froipool, (x, rois), no_grads=[rois], delta=1e-5, pt_atol=1e-5)
-        else:
-            gradcheck(froipool, (x, rois), eps=1e-5, atol=1e-5)
+        gradcheck(froipool, (x, rois), eps=1e-5, atol=1e-5)
 
 
 def _test_roialign_rotated_allclose(device, dtype):
diff --git a/tests/test_ops/test_roi_pool.py b/tests/test_ops/test_roi_pool.py
index 5ab04bce2..0db5bf7f4 100644
--- a/tests/test_ops/test_roi_pool.py
+++ b/tests/test_ops/test_roi_pool.py
@@ -4,17 +4,10 @@ import os
 import numpy as np
 import pytest
 import torch
+from torch.autograd import gradcheck
 
 from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE, IS_NPU_AVAILABLE
 
-_USING_PARROTS = True
-try:
-    from parrots.autograd import gradcheck
-except ImportError:
-    from torch.autograd import gradcheck
-
-    _USING_PARROTS = False
-
 cur_dir = os.path.dirname(os.path.abspath(__file__))
 
 inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]),
@@ -51,11 +44,7 @@ class TestRoiPool:
 
             froipool = RoIPool((pool_h, pool_w), spatial_scale)
 
-            if _USING_PARROTS:
-                pass
-                # gradcheck(froipool, (x, rois), no_grads=[rois])
-            else:
-                gradcheck(froipool, (x, rois), eps=1e-2, atol=1e-2)
+            gradcheck(froipool, (x, rois), eps=1e-2, atol=1e-2)
 
     def _test_roipool_allclose(self, device, dtype=torch.float):
         from mmcv.ops import roi_pool
diff --git a/tests/test_ops/test_rotated_feature_align.py b/tests/test_ops/test_rotated_feature_align.py
index e7422a310..04438f447 100644
--- a/tests/test_ops/test_rotated_feature_align.py
+++ b/tests/test_ops/test_rotated_feature_align.py
@@ -13,10 +13,7 @@ from mmcv.utils import IS_CUDA_AVAILABLE
         'cuda',
         marks=pytest.mark.skipif(
             not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
-    pytest.param(
-        'cpu',
-        marks=pytest.mark.skipif(
-            torch.__version__ == 'parrots', reason='requires PyTorch support'))
+    pytest.param('cpu')
 ])
 def test_rotated_feature_align(device):
     feature = torch.tensor([[[[1.2924, -0.2172, -0.5222, 0.1172],
diff --git a/tests/test_ops/test_scatter_points.py b/tests/test_ops/test_scatter_points.py
index cf4516047..1f4343752 100644
--- a/tests/test_ops/test_scatter_points.py
+++ b/tests/test_ops/test_scatter_points.py
@@ -5,9 +5,6 @@ from torch.autograd import gradcheck
 
 from mmcv.ops import DynamicScatter
 
-if torch.__version__ == 'parrots':
-    pytest.skip('not supported in parrots now', allow_module_level=True)
-
 
 @pytest.mark.skipif(
     not torch.cuda.is_available(), reason='requires CUDA support')
diff --git a/tests/test_ops/test_spconv.py b/tests/test_ops/test_spconv.py
index 17ca5678e..57abeafa8 100644
--- a/tests/test_ops/test_spconv.py
+++ b/tests/test_ops/test_spconv.py
@@ -6,10 +6,6 @@ from torch import nn
 from mmcv.cnn import build_conv_layer, build_norm_layer
 from mmcv.ops import (SparseConvTensor, SparseInverseConv3d, SparseSequential,
                       SubMConv3d)
-
-if torch.__version__ == 'parrots':
-    pytest.skip('not supported in parrots now', allow_module_level=True)
-
 from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
 
 
diff --git a/tests/test_ops/test_tin_shift.py b/tests/test_ops/test_tin_shift.py
index c8ce14465..7b2bab473 100755
--- a/tests/test_ops/test_tin_shift.py
+++ b/tests/test_ops/test_tin_shift.py
@@ -4,17 +4,10 @@ import os
 import numpy as np
 import pytest
 import torch
+from torch.autograd import gradcheck
 
 from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
 
-_USING_PARROTS = True
-try:
-    from parrots.autograd import gradcheck
-except ImportError:
-    from torch.autograd import gradcheck
-
-    _USING_PARROTS = False
-
 cur_dir = os.path.dirname(os.path.abspath(__file__))
 
 inputs = ([[[[0.88572276, 0.46422583], [0.97408265, 0.59547687],
@@ -149,10 +142,8 @@ def _test_tinshift_gradcheck(device, dtype):
         x = torch.tensor(
             np_input, dtype=dtype, device=device, requires_grad=True)
         shift = torch.tensor(np_shift, device=device).int()
-        if torch.__version__ == 'parrots':
-            gradcheck(tin_shift, (x, shift))
-        else:
-            gradcheck(tin_shift, (x, shift), atol=1, rtol=0.1)
+
+        gradcheck(tin_shift, (x, shift), atol=1, rtol=0.1)
 
 
 def _test_tinshift_allclose(device, dtype):
diff --git a/tests/test_ops/test_upfirdn2d.py b/tests/test_ops/test_upfirdn2d.py
index 1342480a6..f8260cac6 100644
--- a/tests/test_ops/test_upfirdn2d.py
+++ b/tests/test_ops/test_upfirdn2d.py
@@ -1,13 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import pytest
 import torch
-
-_USING_PARROTS = True
-try:
-    from parrots.autograd import gradcheck
-except ImportError:
-    from torch.autograd import gradcheck, gradgradcheck
-    _USING_PARROTS = False
+from torch.autograd import gradcheck, gradgradcheck
 
 
 class TestUpFirDn2d:
@@ -32,30 +26,19 @@ class TestUpFirDn2d:
     @pytest.mark.skipif(not torch.cuda.is_available(), reason='requires cuda')
     def test_upfirdn2d(self):
         from mmcv.ops import upfirdn2d
-        if _USING_PARROTS:
-            gradcheck(
-                upfirdn2d,
-                (self.input_tensor.cuda(),
-                 self.kernel.type_as(
-                     self.input_tensor).cuda(), self.factor, 1, self.pad),
-                delta=1e-4,
-                pt_atol=1e-3)
-        else:
-            gradcheck(
-                upfirdn2d,
-                (self.input_tensor.cuda(),
-                 self.kernel.type_as(
-                     self.input_tensor).cuda(), self.factor, 1, self.pad),
-                eps=1e-4,
-                atol=1e-3)
+        gradcheck(
+            upfirdn2d,
+            (self.input_tensor.cuda(), self.kernel.type_as(
+                self.input_tensor).cuda(), self.factor, 1, self.pad),
+            eps=1e-4,
+            atol=1e-3)
 
-            gradgradcheck(
-                upfirdn2d,
-                (self.input_tensor.cuda(),
-                 self.kernel.type_as(
-                     self.input_tensor).cuda(), self.factor, 1, self.pad),
-                eps=1e-4,
-                atol=1e-3)
+        gradgradcheck(
+            upfirdn2d,
+            (self.input_tensor.cuda(), self.kernel.type_as(
+                self.input_tensor).cuda(), self.factor, 1, self.pad),
+            eps=1e-4,
+            atol=1e-3)
 
         # test with different up
         kernel = torch.randn(3, 3)
diff --git a/tests/test_utils/test_parrots_jit.py b/tests/test_utils/test_parrots_jit.py
deleted file mode 100644
index 921a4402d..000000000
--- a/tests/test_utils/test_parrots_jit.py
+++ /dev/null
@@ -1,278 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import pytest
-import torch
-from mmengine.utils.dl_utils import TORCH_VERSION
-
-import mmcv
-
-pytest.skip('this test not ready now', allow_module_level=True)
-skip_no_parrots = pytest.mark.skipif(
-    TORCH_VERSION != 'parrots', reason='test case under parrots environment')
-
-
-class TestJit:
-
-    def test_add_dict(self):
-
-        @mmcv.jit
-        def add_dict(oper):
-            rets = oper['x'] + oper['y']
-            return {'result': rets}
-
-        def add_dict_pyfunc(oper):
-            rets = oper['x'] + oper['y']
-            return {'result': rets}
-
-        a = torch.rand((3, 4))
-        b = torch.rand((3, 4))
-        oper = {'x': a, 'y': b}
-
-        rets_t = add_dict(oper)
-        rets = add_dict_pyfunc(oper)
-        assert 'result' in rets
-        assert (rets_t['result'] == rets['result']).all()
-
-    def test_add_list(self):
-
-        @mmcv.jit
-        def add_list(oper, x, y):
-            rets = {}
-            for idx, pair in enumerate(oper):
-                rets[f'k{idx}'] = pair['x'] + pair['y']
-            rets[f'k{len(oper)}'] = x + y
-            return rets
-
-        def add_list_pyfunc(oper, x, y):
-            rets = {}
-            for idx, pair in enumerate(oper):
-                rets[f'k{idx}'] = pair['x'] + pair['y']
-            rets[f'k{len(oper)}'] = x + y
-            return rets
-
-        pair_num = 3
-        oper = []
-        for _ in range(pair_num):
-            oper.append({'x': torch.rand((3, 4)), 'y': torch.rand((3, 4))})
-        a = torch.rand((3, 4))
-        b = torch.rand((3, 4))
-        rets = add_list_pyfunc(oper, x=a, y=b)
-        rets_t = add_list(oper, x=a, y=b)
-        for idx in range(pair_num + 1):
-            assert f'k{idx}' in rets_t
-            assert (rets[f'k{idx}'] == rets_t[f'k{idx}']).all()
-
-    @skip_no_parrots
-    def test_jit_cache(self):
-
-        @mmcv.jit
-        def func(oper):
-            if oper['const'] > 1:
-                return oper['x'] * 2 + oper['y']
-            else:
-                return oper['x'] * 2 - oper['y']
-
-        def pyfunc(oper):
-            if oper['const'] > 1:
-                return oper['x'] * 2 + oper['y']
-            else:
-                return oper['x'] * 2 - oper['y']
-
-        assert len(func._cache._cache) == 0
-
-        oper = {'const': 2, 'x': torch.rand((3, 4)), 'y': torch.rand((3, 4))}
-        rets_plus = pyfunc(oper)
-        rets_plus_t = func(oper)
-        assert (rets_plus == rets_plus_t).all()
-        assert len(func._cache._cache) == 1
-
-        oper['const'] = 0.5
-        rets_minus = pyfunc(oper)
-        rets_minus_t = func(oper)
-        assert (rets_minus == rets_minus_t).all()
-        assert len(func._cache._cache) == 2
-
-        rets_a = (rets_minus_t + rets_plus_t) / 4
-        assert torch.allclose(oper['x'], rets_a)
-
-    @skip_no_parrots
-    def test_jit_shape(self):
-
-        @mmcv.jit
-        def func(a):
-            return a + 1
-
-        assert len(func._cache._cache) == 0
-
-        a = torch.ones((3, 4))
-        r = func(a)
-        assert r.shape == (3, 4)
-        assert (r == 2).all()
-        assert len(func._cache._cache) == 1
-
-        a = torch.ones((2, 3, 4))
-        r = func(a)
-        assert r.shape == (2, 3, 4)
-        assert (r == 2).all()
-        assert len(func._cache._cache) == 2
-
-    @skip_no_parrots
-    def test_jit_kwargs(self):
-
-        @mmcv.jit
-        def func(a, b):
-            return torch.mean((a - b) * (a - b))
-
-        assert len(func._cache._cache) == 0
-        x = torch.rand((16, 32))
-        y = torch.rand((16, 32))
-        func(x, y)
-        assert len(func._cache._cache) == 1
-        func(x, b=y)
-        assert len(func._cache._cache) == 1
-        func(b=y, a=x)
-        assert len(func._cache._cache) == 1
-
-    def test_jit_derivate(self):
-
-        @mmcv.jit(derivate=True)
-        def func(x, y):
-            return (x + 2) * (y - 2)
-
-        a = torch.rand((3, 4))
-        b = torch.rand((3, 4))
-        a.requires_grad = True
-
-        c = func(a, b)
-        assert c.requires_grad
-        d = torch.empty_like(c)
-        d.fill_(1.0)
-        c.backward(d)
-        assert torch.allclose(a.grad, (b - 2))
-        assert b.grad is None
-
-        a.grad = None
-        c = func(a, b)
-        assert c.requires_grad
-        d = torch.empty_like(c)
-        d.fill_(2.7)
-        c.backward(d)
-        assert torch.allclose(a.grad, 2.7 * (b - 2))
-        assert b.grad is None
-
-    def test_jit_optimize(self):
-
-        @mmcv.jit(optimize=True)
-        def func(a, b):
-            return torch.mean((a - b) * (a - b))
-
-        def pyfunc(a, b):
-            return torch.mean((a - b) * (a - b))
-
-        a = torch.rand((16, 32))
-        b = torch.rand((16, 32))
-
-        c = func(a, b)
-        d = pyfunc(a, b)
-        assert torch.allclose(c, d)
-
-    @mmcv.skip_no_elena
-    def test_jit_coderize(self):
-        if not torch.cuda.is_available():
-            return
-
-        @mmcv.jit(coderize=True)
-        def func(a, b):
-            return (a + b) * (a - b)
-
-        def pyfunc(a, b):
-            return (a + b) * (a - b)
-
-        a = torch.rand((16, 32), device='cuda')
-        b = torch.rand((16, 32), device='cuda')
-
-        c = func(a, b)
-        d = pyfunc(a, b)
-        assert torch.allclose(c, d)
-
-    def test_jit_value_dependent(self):
-
-        @mmcv.jit
-        def func(a, b):
-            torch.nonzero(a)
-            return torch.mean((a - b) * (a - b))
-
-        def pyfunc(a, b):
-            torch.nonzero(a)
-            return torch.mean((a - b) * (a - b))
-
-        a = torch.rand((16, 32))
-        b = torch.rand((16, 32))
-
-        c = func(a, b)
-        d = pyfunc(a, b)
-        assert torch.allclose(c, d)
-
-    @skip_no_parrots
-    def test_jit_check_input(self):
-
-        def func(x):
-            y = torch.rand_like(x)
-            return x + y
-
-        a = torch.ones((3, 4))
-        with pytest.raises(AssertionError):
-            func = mmcv.jit(func, check_input=(a, ))
-
-    @skip_no_parrots
-    def test_jit_partial_shape(self):
-
-        @mmcv.jit(full_shape=False)
-        def func(a, b):
-            return torch.mean((a - b) * (a - b))
-
-        def pyfunc(a, b):
-            return torch.mean((a - b) * (a - b))
-
-        a = torch.rand((3, 4))
-        b = torch.rand((3, 4))
-        assert torch.allclose(func(a, b), pyfunc(a, b))
-        assert len(func._cache._cache) == 1
-
-        a = torch.rand((6, 5))
-        b = torch.rand((6, 5))
-        assert torch.allclose(func(a, b), pyfunc(a, b))
-        assert len(func._cache._cache) == 1
-
-        a = torch.rand((3, 4, 5))
-        b = torch.rand((3, 4, 5))
-        assert torch.allclose(func(a, b), pyfunc(a, b))
-        assert len(func._cache._cache) == 2
-
-        a = torch.rand((1, 9, 8))
-        b = torch.rand((1, 9, 8))
-        assert torch.allclose(func(a, b), pyfunc(a, b))
-        assert len(func._cache._cache) == 2
-
-    def test_instance_method(self):
-
-        class T:
-
-            def __init__(self, shape):
-                self._c = torch.rand(shape)
-
-            @mmcv.jit
-            def test_method(self, x, y):
-                return (x * self._c) + y
-
-        shape = (16, 32)
-        t = T(shape)
-        a = torch.rand(shape)
-        b = torch.rand(shape)
-        res = (a * t._c) + b
-        jit_res = t.test_method(a, b)
-        assert torch.allclose(res, jit_res)
-
-        t = T(shape)
-        res = (a * t._c) + b
-        jit_res = t.test_method(a, b)
-        assert torch.allclose(res, jit_res)