Merge pull request #17 from scottclowe/api_batchsize
API: Change main_lincls --batch-size argument to match main_mocomain
commit
c349e6e24f
|
@ -53,7 +53,7 @@ parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
|
||||||
parser.add_argument('-b', '--batch-size', default=1024, type=int,
|
parser.add_argument('-b', '--batch-size', default=1024, type=int,
|
||||||
metavar='N',
|
metavar='N',
|
||||||
help='mini-batch size (default: 1024), this is the total '
|
help='mini-batch size (default: 1024), this is the total '
|
||||||
'batch size of all GPUs on the current node when '
|
'batch size of all GPUs on all nodes when '
|
||||||
'using Data Parallel or Distributed Data Parallel')
|
'using Data Parallel or Distributed Data Parallel')
|
||||||
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
|
parser.add_argument('--lr', '--learning-rate', default=0.1, type=float,
|
||||||
metavar='LR', help='initial (base) learning rate', dest='lr')
|
metavar='LR', help='initial (base) learning rate', dest='lr')
|
||||||
|
@ -207,7 +207,7 @@ def main_worker(gpu, ngpus_per_node, args):
|
||||||
# When using a single GPU per process and per
|
# When using a single GPU per process and per
|
||||||
# DistributedDataParallel, we need to divide the batch size
|
# DistributedDataParallel, we need to divide the batch size
|
||||||
# ourselves based on the total number of GPUs we have
|
# ourselves based on the total number of GPUs we have
|
||||||
args.batch_size = int(args.batch_size / ngpus_per_node)
|
args.batch_size = int(args.batch_size / args.world_size)
|
||||||
args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
|
args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
|
||||||
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
|
model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -60,7 +60,7 @@ parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
|
||||||
parser.add_argument('-b', '--batch-size', default=4096, type=int,
|
parser.add_argument('-b', '--batch-size', default=4096, type=int,
|
||||||
metavar='N',
|
metavar='N',
|
||||||
help='mini-batch size (default: 4096), this is the total '
|
help='mini-batch size (default: 4096), this is the total '
|
||||||
'batch size of all GPUs on the current node when '
|
'batch size of all GPUs on all nodes when '
|
||||||
'using Data Parallel or Distributed Data Parallel')
|
'using Data Parallel or Distributed Data Parallel')
|
||||||
parser.add_argument('--lr', '--learning-rate', default=0.6, type=float,
|
parser.add_argument('--lr', '--learning-rate', default=0.6, type=float,
|
||||||
metavar='LR', help='initial (base) learning rate', dest='lr')
|
metavar='LR', help='initial (base) learning rate', dest='lr')
|
||||||
|
|
Loading…
Reference in New Issue