From 14e8e4904b29c55ce5326af99a218e6fd3e226ed Mon Sep 17 00:00:00 2001 From: wangjiangben-hw <111729245+wangjiangben-hw@users.noreply.github.com> Date: Mon, 31 Oct 2022 22:56:48 +0800 Subject: [PATCH] [Fix] Fix set_device bug when using multi-machine multi-device (#2370) --- mmcv/runner/dist_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mmcv/runner/dist_utils.py b/mmcv/runner/dist_utils.py index 45f73c9b0..c061b3c11 100644 --- a/mmcv/runner/dist_utils.py +++ b/mmcv/runner/dist_utils.py @@ -60,7 +60,8 @@ def _init_dist_pytorch(backend: str, **kwargs) -> None: **kwargs) elif IS_NPU_AVAILABLE: import torch_npu # noqa: F401 - torch.npu.set_device(rank) + num_npus = torch.npu.device_count() + torch.npu.set_device(rank % num_npus) dist.init_process_group( backend='hccl', rank=rank,