mirror of
https://github.com/open-mmlab/mmengine.git
synced 2025-06-03 21:54:44 +08:00
Support multi-node distributed training with MLU backend (#1266)
This commit is contained in:
parent
68360e7ce8
commit
78205c3254
3
mmengine/dist/utils.py
vendored
3
mmengine/dist/utils.py
vendored
@ -101,7 +101,8 @@ def _init_dist_pytorch(backend, init_backend='torch', **kwargs) -> None:
|
||||
rank = int(os.environ['RANK'])
|
||||
if is_mlu_available():
|
||||
import torch_mlu # noqa: F401
|
||||
torch.mlu.set_device(rank)
|
||||
local_rank = int(os.environ['LOCAL_RANK'])
|
||||
torch.mlu.set_device(local_rank)
|
||||
torch_dist.init_process_group(
|
||||
backend='cncl',
|
||||
rank=rank,
|
||||
|
Loading…
x
Reference in New Issue
Block a user