Merge 9f72387cde into f19d3e771c

2025-04-18 07:54:53 +00:00 · 2025-04-18 07:54:53 +00:00 · 79f4a5a04c
parent f19d3e771c 9f72387cde
commit 79f4a5a04c
3 changed files with 13 additions and 9 deletions
--- a/mmcv/ops/csrc/common/box_iou_rotated_utils.hpp
+++ b/mmcv/ops/csrc/common/box_iou_rotated_utils.hpp
@ -187,17 +187,18 @@ HOST_DEVICE_INLINE int convex_hull_graham(const Point<T> (&p)[24],
  // (essentially sorting according to angles)
  // If the angles are the same, sort according to their distance to origin
  T dist[24];
-  for (int i = 0; i < num_in; i++) {
-    dist[i] = dot_2d<T>(q[i], q[i]);
-  }

 #if defined(__CUDACC__) || defined(__MUSACC__)
  // CUDA version
  // In the future, we can potentially use thrust
  // for sorting here to improve speed (though not guaranteed)
+  for (int i = 0; i < num_in; i++) {
+    dist[i] = dot_2d<T>(q[i], q[i]);
+    dist[i] = sqrtf(float(dist[i])) + 1e-6;
+  }
  for (int i = 1; i < num_in - 1; i++) {
    for (int j = i + 1; j < num_in; j++) {
-      T crossProduct = cross_2d<T>(q[i], q[j]);
+      T crossProduct = cross_2d<T>(q[i] * (1 / dist[i]), q[j] * (1 / dist[j]));
      if ((crossProduct < -1e-6) ||
          (fabs(crossProduct) < 1e-6 && dist[i] > dist[j])) {
        auto q_tmp = q[i];
@ -211,18 +212,21 @@ HOST_DEVICE_INLINE int convex_hull_graham(const Point<T> (&p)[24],
  }
 #else
  // CPU version
+  // compute distance to origin after sort, since the points are now different.
  std::sort(q + 1, q + num_in,
            [](const Point<T>& A, const Point<T>& B) -> bool {
-              T temp = cross_2d<T>(A, B);
+              const T dot_A = sqrtf(float(dot_2d<T>(A, A))) + 1e-6;
+              const T dot_B = sqrtf(float(dot_2d<T>(B, B))) + 1e-6;
+              T temp = cross_2d<T>(A * (1 / dot_A), B * (1 / dot_B));
              if (fabs(temp) < 1e-6) {
-                return dot_2d<T>(A, A) < dot_2d<T>(B, B);
+                return dot_A < dot_B;
              } else {
                return temp > 0;
              }
            });
-  // compute distance to origin after sort, since the points are now different.
  for (int i = 0; i < num_in; i++) {
    dist[i] = dot_2d<T>(q[i], q[i]);
+    dist[i] = sqrtf(float(dist[i]));
  }
 #endif

--- a/mmcv/ops/csrc/pytorch/cuda/box_iou_quadri_cuda.cu
+++ b/mmcv/ops/csrc/pytorch/cuda/box_iou_quadri_cuda.cu
@ -17,7 +17,7 @@ void box_iou_quadri_cuda(const Tensor boxes1, const Tensor boxes2, Tensor ious,
  box_iou_quadri_cuda_kernel<scalar_t>
      <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
          num_boxes1, num_boxes2, boxes1.data_ptr<scalar_t>(),
-          boxes2.data_ptr<scalar_t>(), (scalar_t*)ious.data_ptr<scalar_t>(),
+          boxes2.data_ptr<scalar_t>(), (scalar_t *)ious.data_ptr<scalar_t>(),
          mode_flag, aligned);
  AT_CUDA_CHECK(cudaGetLastError());
 }
--- a/mmcv/ops/csrc/pytorch/cuda/box_iou_rotated_cuda.cu
+++ b/mmcv/ops/csrc/pytorch/cuda/box_iou_rotated_cuda.cu
@ -19,7 +19,7 @@ void box_iou_rotated_cuda(const Tensor boxes1, const Tensor boxes2, Tensor ious,
  box_iou_rotated_cuda_kernel<scalar_t>
      <<<GET_BLOCKS(output_size), THREADS_PER_BLOCK, 0, stream>>>(
          num_boxes1, num_boxes2, boxes1.data_ptr<scalar_t>(),
-          boxes2.data_ptr<scalar_t>(), (scalar_t*)ious.data_ptr<scalar_t>(),
+          boxes2.data_ptr<scalar_t>(), (scalar_t *)ious.data_ptr<scalar_t>(),
          mode_flag, aligned);
  AT_CUDA_CHECK(cudaGetLastError());
 }