deep-person-reid/torchreid/optimizers.py

64 lines
2.7 KiB
Python
Raw Normal View History

2018-07-04 10:32:43 +01:00
from __future__ import absolute_import
2019-02-03 14:04:10 +00:00
from __future__ import print_function
2018-07-04 10:32:43 +01:00
2018-04-27 09:51:04 +01:00
import torch
2019-02-03 14:04:10 +00:00
import torch.nn as nn
2018-04-27 09:51:04 +01:00
2019-02-03 14:04:10 +00:00
def init_optimizer(model,
optim='adam', # optimizer choices
lr=0.003, # learning rate
weight_decay=5e-4, # weight decay
2018-11-07 17:09:23 +00:00
momentum=0.9, # momentum factor for sgd and rmsprop
sgd_dampening=0, # sgd's dampening for momentum
sgd_nesterov=False, # whether to enable sgd's Nesterov momentum
rmsprop_alpha=0.99, # rmsprop's smoothing constant
adam_beta1=0.9, # exponential decay rate for adam's first moment
2019-02-03 14:04:10 +00:00
adam_beta2=0.999, # # exponential decay rate for adam's second moment
staged_lr=False, # different lr for different layers
new_layers=None, # new layers use the default lr, while other layers's lr is scaled by base_lr_mult
base_lr_mult=0.1, # learning rate multiplier for base layers
2018-11-07 17:09:23 +00:00
):
2019-02-01 22:30:41 +00:00
2019-02-03 14:04:10 +00:00
if staged_lr:
assert new_layers is not None
base_params = []
base_layers = []
new_params = []
if isinstance(model, nn.DataParallel):
model = model.module
for name, module in model.named_children():
if name in new_layers:
new_params += [p for p in module.parameters()]
else:
base_params += [p for p in module.parameters()]
base_layers.append(name)
param_groups = [
{'params': base_params, 'lr': lr * base_lr_mult},
{'params': new_params},
]
print('Use staged learning rate')
print('* Base layers (initial lr = {}): {}'.format(lr * base_lr_mult, base_layers))
print('* New layers (initial lr = {}): {}'.format(lr, new_layers))
else:
param_groups = model.parameters()
# Construct optimizer
2018-04-27 09:51:04 +01:00
if optim == 'adam':
2019-02-03 14:04:10 +00:00
return torch.optim.Adam(param_groups, lr=lr, weight_decay=weight_decay,
2018-11-07 17:09:23 +00:00
betas=(adam_beta1, adam_beta2))
2018-07-02 13:56:46 +01:00
elif optim == 'amsgrad':
2019-02-03 14:04:10 +00:00
return torch.optim.Adam(param_groups, lr=lr, weight_decay=weight_decay,
2018-11-07 17:09:23 +00:00
betas=(adam_beta1, adam_beta2), amsgrad=True)
2018-04-27 09:51:04 +01:00
elif optim == 'sgd':
2019-02-03 14:04:10 +00:00
return torch.optim.SGD(param_groups, lr=lr, momentum=momentum, weight_decay=weight_decay,
2018-11-07 17:09:23 +00:00
dampening=sgd_dampening, nesterov=sgd_nesterov)
2018-04-30 17:50:17 +01:00
elif optim == 'rmsprop':
2019-02-03 14:04:10 +00:00
return torch.optim.RMSprop(param_groups, lr=lr, momentum=momentum, weight_decay=weight_decay,
2018-11-07 17:09:23 +00:00
alpha=rmsprop_alpha)
2018-04-27 09:51:04 +01:00
else:
2019-01-30 22:41:47 +00:00
raise ValueError('Unsupported optimizer: {}'.format(optim))