API: Change --batch-size argument to be across all nodes, not one

Changed the behaviour of main_lincls.py to be consistent with the behaviour of main_moco.py. https://github.com/facebookresearch/moco-v3/blob/878544a/main_moco.py#L206
2021-11-24 21:47:12 -04:00 · 2021-11-24 21:47:12 -04:00 · a29021ab3e
parent 7074f47f0d
commit a29021ab3e
1 changed files with 2 additions and 2 deletions
--- a/main_lincls.py
+++ b/main_lincls.py
@ -53,7 +53,7 @@ parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
 parser.add_argument('-b', '--batch-size', default=1024, type=int,
                    metavar='N',
                    help='mini-batch size (default: 1024), this is the total '
-                         'batch size of all GPUs on the current node when '
+                         'batch size of all GPUs on all nodes when '
                         'using Data Parallel or Distributed Data Parallel')
 parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
                    metavar='LR', help='initial (base) learning rate', dest='lr')
@ -207,7 +207,7 @@ def main_worker(gpu, ngpus_per_node, args):
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
-            args.batch_size = int(args.batch_size / ngpus_per_node)
+            args.batch_size = int(args.batch_size / args.world_size)
            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        else: