make train.py compatible with torchrun

This commit is contained in:
Xiao Wang 2022-07-07 14:44:55 -07:00
parent beef62e7ab
commit 11060f84c5

View File

@ -355,6 +355,8 @@ def main():
args.world_size = 1
args.rank = 0 # global rank
if args.distributed:
if 'LOCAL_RANK' in os.environ:
args.local_rank = int(os.getenv('LOCAL_RANK'))
args.device = 'cuda:%d' % args.local_rank
torch.cuda.set_device(args.local_rank)
torch.distributed.init_process_group(backend='nccl', init_method='env://')