[Docs] Add npu build.md and update roi_pool_npu adpater (#2812)

2025-06-03 21:54:52 +08:00 · 2023-06-09 10:35:41 +08:00 · 2023-06-09 10:35:41 +08:00 · 36e6b19b78
commit 36e6b19b78
parent c9627e867f
2 changed files with 79 additions and 13 deletions
--- a/docs/zh_cn/get_started/build.md
+++ b/docs/zh_cn/get_started/build.md
@ -354,3 +354,51 @@ y = torch.tensor([1, 5, 3]).mlu()
 w = torch.ones(10).float().mlu()
 output = sigmoid_focal_loss(x, y, 2.0, 0.25, w, 'none')
 ```
+
+### 在昇腾 NPU 机器编译 mmcv
+
+在编译 mmcv 前，需要安装 torch_npu，完整安装教程详见 [PyTorch 安装指南](https://gitee.com/ascend/pytorch/blob/master/docs/zh/PyTorch%E5%AE%89%E8%A3%85%E6%8C%87%E5%8D%97/PyTorch%E5%AE%89%E8%A3%85%E6%8C%87%E5%8D%97.md#pytorch%E5%AE%89%E8%A3%85%E6%8C%87%E5%8D%97)
+
+#### 选项 1: 使用 NPU 设备源码编译安装 mmcv (推荐方式)
+
+- 拉取 [MMCV 源码](https://github.com/open-mmlab/mmcv.git)
+
+```bash
+git pull https://github.com/open-mmlab/mmcv.git
+```
+
+- 编译
+
+```bash
+MMCV_WITH_OPS=1 MAX_JOBS=8 FORCE_NPU=1 python setup.py build_ext
+```
+
+- 安装
+
+```bash
+MMCV_WITH_OPS=1 FORCE_NPU=1 python setup.py develop
+```
+
+#### 选项 2: 使用 pip 安装 Ascend 编译版本的 mmcv
+
+Ascend 编译版本的 mmcv 在 mmcv >= 1.7.0 时已经支持直接 pip 安装
+
+```bash
+pip install mmcv -f https://download.openmmlab.com/mmcv/dist/ascend/torch1.8.0/index.html
+```
+
+#### 验证
+
+```python
+import torch
+import torch_npu
+from mmcv.ops import softmax_focal_loss
+
+# Init tensor to the NPU
+x = torch.randn(3, 10).npu()
+y = torch.tensor([1, 5, 3]).npu()
+w = torch.ones(10).float().npu()
+
+output = softmax_focal_loss(x, y, 2.0, 0.25, w, 'none')
+print(output)
+```
--- a/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp
+++ b/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp
@ -11,19 +11,37 @@ void roi_pool_forward_npu(Tensor input, Tensor rois, Tensor output,
  int64_t pooled_channel = 1;
  at::Tensor roi_actual_num = at_npu::native::OpPreparation::ApplyTensor(
      {}, rois.options().dtype(at::kInt), rois);
-  OpCommand cmd;
-  cmd.Name("RoiPoolingWithArgMax")
-      .Input(input)
-      .Input(rois)
-      .Input(roi_actual_num)
-      .Output(output)
-      .Output(argmax)
-      .Attr("pooled_h", pooled_height_64)
-      .Attr("pooled_w", pooled_width_64)
-      .Attr("spatial_scale_h", spatial_scale)
-      .Attr("spatial_scale_w", spatial_scale)
-      .Attr("pool_channel", pooled_channel)
-      .Run();
+  if (input.sizes()[1] % 16 == 0) {
+    OpCommand cmd;
+    cmd.Name("RoiPoolingWithArgMax")
+        .Input(input)
+        .Input(rois)
+        .Input(roi_actual_num)
+        .Output(output)
+        .Output(argmax)
+        .Attr("pooled_h", pooled_height_64)
+        .Attr("pooled_w", pooled_width_64)
+        .Attr("spatial_scale_h", spatial_scale)
+        .Attr("spatial_scale_w", spatial_scale)
+        .Attr("pool_channel", pooled_channel)
+        .Run();
+
+  } else {
+    OpCommand cmd;
+    cmd.Name("RoiPoolingWithArgMax")
+        .Input(input)
+        .Input(rois)
+        .Input(roi_actual_num)
+        .Output(output)
+        .Output(argmax)
+        .Attr("pooled_h", pooled_height_64)
+        .Attr("pooled_w", pooled_width_64)
+        .Attr("spatial_scale_h", spatial_scale)
+        .Attr("spatial_scale_w", spatial_scale)
+        .Attr("pool_channel", pooled_channel)
+        .Attr("_exclude_engines", (string) "AiCore")
+        .Run();
+  }
 }

 void roi_pool_backward_npu(Tensor grad_output, Tensor rois, Tensor argmax,