Change optimizer parameters group method (#1239)

* Change optimizer parameters group method

* Add torch nn

* Change isinstance method(torch.Tensor to nn.Parameter)

* parameter freeze fix, PEP8 reformat

* freeze bug fix

Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
pull/1279/head
Junghoon Kim 2020-11-02 08:08:36 +09:00 committed by GitHub
parent 96fcde40b8
commit 187f7c2ed1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 14 additions and 14 deletions

View File

@ -10,6 +10,7 @@ from warnings import warn
import math import math
import numpy as np import numpy as np
import torch.distributed as dist import torch.distributed as dist
import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import torch.optim as optim import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler import torch.optim.lr_scheduler as lr_scheduler
@ -80,12 +81,12 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
model = Model(opt.cfg, ch=3, nc=nc).to(device) # create model = Model(opt.cfg, ch=3, nc=nc).to(device) # create
# Freeze # Freeze
freeze = ['', ] # parameter names to freeze (full or partial) freeze = [] # parameter names to freeze (full or partial)
if any(freeze): for k, v in model.named_parameters():
for k, v in model.named_parameters(): v.requires_grad = True # train all layers
if any(x in k for x in freeze): if any(x in k for x in freeze):
print('freezing %s' % k) print('freezing %s' % k)
v.requires_grad = False v.requires_grad = False
# Optimizer # Optimizer
nbs = 64 # nominal batch size nbs = 64 # nominal batch size
@ -93,14 +94,13 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay
pg0, pg1, pg2 = [], [], [] # optimizer parameter groups pg0, pg1, pg2 = [], [], [] # optimizer parameter groups
for k, v in model.named_parameters(): for k, v in model.named_modules():
v.requires_grad = True if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
if '.bias' in k: pg2.append(v.bias) # biases
pg2.append(v) # biases if isinstance(v, nn.BatchNorm2d):
elif '.weight' in k and '.bn' not in k: pg0.append(v.weight) # no decay
pg1.append(v) # apply weight decay elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
else: pg1.append(v.weight) # apply decay
pg0.append(v) # all else
if opt.adam: if opt.adam:
optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum