Change optimizer parameters group method (#1239)
* Change optimizer parameters group method * Add torch nn * Change isinstance method(torch.Tensor to nn.Parameter) * parameter freeze fix, PEP8 reformat * freeze bug fix Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>pull/1279/head
parent
96fcde40b8
commit
187f7c2ed1
28
train.py
28
train.py
|
@ -10,6 +10,7 @@ from warnings import warn
|
|||
import math
|
||||
import numpy as np
|
||||
import torch.distributed as dist
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
import torch.optim.lr_scheduler as lr_scheduler
|
||||
|
@ -80,12 +81,12 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
|
|||
model = Model(opt.cfg, ch=3, nc=nc).to(device) # create
|
||||
|
||||
# Freeze
|
||||
freeze = ['', ] # parameter names to freeze (full or partial)
|
||||
if any(freeze):
|
||||
for k, v in model.named_parameters():
|
||||
if any(x in k for x in freeze):
|
||||
print('freezing %s' % k)
|
||||
v.requires_grad = False
|
||||
freeze = [] # parameter names to freeze (full or partial)
|
||||
for k, v in model.named_parameters():
|
||||
v.requires_grad = True # train all layers
|
||||
if any(x in k for x in freeze):
|
||||
print('freezing %s' % k)
|
||||
v.requires_grad = False
|
||||
|
||||
# Optimizer
|
||||
nbs = 64 # nominal batch size
|
||||
|
@ -93,14 +94,13 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
|
|||
hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay
|
||||
|
||||
pg0, pg1, pg2 = [], [], [] # optimizer parameter groups
|
||||
for k, v in model.named_parameters():
|
||||
v.requires_grad = True
|
||||
if '.bias' in k:
|
||||
pg2.append(v) # biases
|
||||
elif '.weight' in k and '.bn' not in k:
|
||||
pg1.append(v) # apply weight decay
|
||||
else:
|
||||
pg0.append(v) # all else
|
||||
for k, v in model.named_modules():
|
||||
if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
|
||||
pg2.append(v.bias) # biases
|
||||
if isinstance(v, nn.BatchNorm2d):
|
||||
pg0.append(v.weight) # no decay
|
||||
elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
|
||||
pg1.append(v.weight) # apply decay
|
||||
|
||||
if opt.adam:
|
||||
optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
|
||||
|
|
Loading…
Reference in New Issue