mirror of
https://github.com/huggingface/pytorch-image-models.git
synced 2025-06-03 15:01:08 +08:00
Merge branch 'logger' into features. Change 'logger' to '_logger'.
This commit is contained in:
commit
7995295968
39
.github/workflows/autosquash.yml
vendored
Normal file
39
.github/workflows/autosquash.yml
vendored
Normal file
@ -0,0 +1,39 @@
|
||||
name: Autosquash
|
||||
on:
|
||||
check_run:
|
||||
types:
|
||||
# Check runs completing successfully can unblock the
|
||||
# corresponding pull requests and make them mergeable.
|
||||
- completed
|
||||
pull_request:
|
||||
types:
|
||||
# A closed pull request makes the checks on the other
|
||||
# pull request on the same base outdated.
|
||||
- closed
|
||||
# Adding the autosquash label to a pull request can
|
||||
# trigger an update or a merge.
|
||||
- labeled
|
||||
pull_request_review:
|
||||
types:
|
||||
# Review approvals can unblock the pull request and
|
||||
# make it mergeable.
|
||||
- submitted
|
||||
# Success statuses can unblock the corresponding
|
||||
# pull requests and make them mergeable.
|
||||
status: {}
|
||||
|
||||
jobs:
|
||||
autosquash:
|
||||
name: Autosquash
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- uses: tibdex/autosquash@v2
|
||||
with:
|
||||
# We can't use the built-in secrets.GITHUB_TOKEN yet because of this limitation:
|
||||
# https://github.community/t5/GitHub-Actions/Triggering-a-new-workflow-from-another-workflow/td-p/31676
|
||||
# In the meantime, use a token granting write access on the repo:
|
||||
# - a GitHub App token
|
||||
# See https://github.com/marketplace/actions/github-app-token.
|
||||
# - a personal access token
|
||||
# See https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line.
|
||||
github_token: ${{ secrets.AUTOSQUASH_TOKEN }}
|
@ -17,6 +17,8 @@ from timm.data import Dataset, create_loader, resolve_data_config
|
||||
from timm.utils import AverageMeter, setup_default_logging
|
||||
|
||||
torch.backends.cudnn.benchmark = True
|
||||
_logger = logging.getLogger('inference')
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='PyTorch ImageNet Inference')
|
||||
parser.add_argument('data', metavar='DIR',
|
||||
@ -67,7 +69,7 @@ def main():
|
||||
pretrained=args.pretrained,
|
||||
checkpoint_path=args.checkpoint)
|
||||
|
||||
logging.info('Model %s created, param count: %d' %
|
||||
_logger.info('Model %s created, param count: %d' %
|
||||
(args.model, sum([m.numel() for m in model.parameters()])))
|
||||
|
||||
config = resolve_data_config(vars(args), model=model)
|
||||
@ -107,7 +109,7 @@ def main():
|
||||
end = time.time()
|
||||
|
||||
if batch_idx % args.log_freq == 0:
|
||||
logging.info('Predict: [{0}/{1}] Time {batch_time.val:.3f} ({batch_time.avg:.3f})'.format(
|
||||
_logger.info('Predict: [{0}/{1}] Time {batch_time.val:.3f} ({batch_time.avg:.3f})'.format(
|
||||
batch_idx, len(loader), batch_time=batch_time))
|
||||
|
||||
topk_ids = np.concatenate(topk_ids, axis=0).squeeze()
|
||||
|
@ -2,6 +2,9 @@ import logging
|
||||
from .constants import *
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def resolve_data_config(args, default_cfg={}, model=None, verbose=True):
|
||||
new_config = {}
|
||||
default_cfg = default_cfg
|
||||
@ -65,8 +68,8 @@ def resolve_data_config(args, default_cfg={}, model=None, verbose=True):
|
||||
new_config['crop_pct'] = default_cfg['crop_pct']
|
||||
|
||||
if verbose:
|
||||
logging.info('Data processing configuration for current model + dataset:')
|
||||
_logger.info('Data processing configuration for current model + dataset:')
|
||||
for n, v in new_config.items():
|
||||
logging.info('\t%s: %s' % (n, str(v)))
|
||||
_logger.info('\t%s: %s' % (n, str(v)))
|
||||
|
||||
return new_config
|
||||
|
@ -18,10 +18,12 @@ from .layers import CondConv2d, get_condconv_initializer
|
||||
|
||||
__all__ = ["EfficientNetBuilder", "decode_arch_def", "efficientnet_init_weights"]
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _log_info_if(msg, condition):
|
||||
if condition:
|
||||
logging.info(msg)
|
||||
_logger.info(msg)
|
||||
|
||||
|
||||
def _parse_ksize(ss):
|
||||
@ -233,7 +235,7 @@ class EfficientNetBuilder:
|
||||
self.drop_path_rate = drop_path_rate
|
||||
if feature_location == 'depthwise':
|
||||
# old 'depthwise' mode renamed 'expansion' to match TF impl, old expansion mode didn't make sense
|
||||
logging.warning("feature_location=='depthwise' is deprecated, using 'expansion'")
|
||||
_logger.warning("feature_location=='depthwise' is deprecated, using 'expansion'")
|
||||
feature_location = 'expansion'
|
||||
self.feature_location = feature_location
|
||||
assert feature_location in ('bottleneck', 'expansion', '')
|
||||
@ -291,7 +293,7 @@ class EfficientNetBuilder:
|
||||
""" Build the blocks
|
||||
Args:
|
||||
in_chs: Number of input-channels passed to first block
|
||||
model_block_args: A list of lists, outer list defines stacks (block stages), inner
|
||||
model_block_args: A list of lists, outer list defines stages, inner
|
||||
list contains strings defining block configuration(s)
|
||||
Return:
|
||||
List of block stacks (each stack wrapped in nn.Sequential)
|
||||
|
@ -12,6 +12,9 @@ from .features import FeatureListNet, FeatureDictNet, FeatureHookNet
|
||||
from .layers import Conv2dSame
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def load_state_dict(checkpoint_path, use_ema=False):
|
||||
if checkpoint_path and os.path.isfile(checkpoint_path):
|
||||
checkpoint = torch.load(checkpoint_path, map_location='cpu')
|
||||
@ -28,10 +31,10 @@ def load_state_dict(checkpoint_path, use_ema=False):
|
||||
state_dict = new_state_dict
|
||||
else:
|
||||
state_dict = checkpoint
|
||||
logging.info("Loaded {} from checkpoint '{}'".format(state_dict_key, checkpoint_path))
|
||||
_logger.info("Loaded {} from checkpoint '{}'".format(state_dict_key, checkpoint_path))
|
||||
return state_dict
|
||||
else:
|
||||
logging.error("No checkpoint found at '{}'".format(checkpoint_path))
|
||||
_logger.error("No checkpoint found at '{}'".format(checkpoint_path))
|
||||
raise FileNotFoundError()
|
||||
|
||||
|
||||
@ -59,13 +62,13 @@ def resume_checkpoint(model, checkpoint_path):
|
||||
resume_epoch = checkpoint['epoch']
|
||||
if 'version' in checkpoint and checkpoint['version'] > 1:
|
||||
resume_epoch += 1 # start at the next epoch, old checkpoints incremented before save
|
||||
logging.info("Loaded checkpoint '{}' (epoch {})".format(checkpoint_path, checkpoint['epoch']))
|
||||
_logger.info("Loaded checkpoint '{}' (epoch {})".format(checkpoint_path, checkpoint['epoch']))
|
||||
else:
|
||||
model.load_state_dict(checkpoint)
|
||||
logging.info("Loaded checkpoint '{}'".format(checkpoint_path))
|
||||
_logger.info("Loaded checkpoint '{}'".format(checkpoint_path))
|
||||
return other_state, resume_epoch
|
||||
else:
|
||||
logging.error("No checkpoint found at '{}'".format(checkpoint_path))
|
||||
_logger.error("No checkpoint found at '{}'".format(checkpoint_path))
|
||||
raise FileNotFoundError()
|
||||
|
||||
|
||||
@ -73,7 +76,7 @@ def load_pretrained(model, cfg=None, num_classes=1000, in_chans=3, filter_fn=Non
|
||||
if cfg is None:
|
||||
cfg = getattr(model, 'default_cfg')
|
||||
if cfg is None or 'url' not in cfg or not cfg['url']:
|
||||
logging.warning("Pretrained model URL is invalid, using random initialization.")
|
||||
_logger.warning("Pretrained model URL is invalid, using random initialization.")
|
||||
return
|
||||
|
||||
state_dict = model_zoo.load_url(cfg['url'], progress=False, map_location='cpu')
|
||||
@ -83,7 +86,7 @@ def load_pretrained(model, cfg=None, num_classes=1000, in_chans=3, filter_fn=Non
|
||||
|
||||
if in_chans == 1:
|
||||
conv1_name = cfg['first_conv']
|
||||
logging.info('Converting first conv (%s) from 3 to 1 channel' % conv1_name)
|
||||
_logger.info('Converting first conv (%s) from 3 to 1 channel' % conv1_name)
|
||||
conv1_weight = state_dict[conv1_name + '.weight']
|
||||
state_dict[conv1_name + '.weight'] = conv1_weight.sum(dim=1, keepdim=True)
|
||||
elif in_chans != 3:
|
||||
|
@ -23,7 +23,7 @@ from .registry import register_model
|
||||
from .resnet import BasicBlock, Bottleneck # leveraging ResNet blocks w/ additional features like SE
|
||||
|
||||
_BN_MOMENTUM = 0.1
|
||||
logger = logging.getLogger(__name__)
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _cfg(url='', **kwargs):
|
||||
@ -412,7 +412,7 @@ class HighResolutionModule(nn.Module):
|
||||
elif num_branches != len(num_inchannels):
|
||||
error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format(num_branches, len(num_inchannels))
|
||||
if error_msg:
|
||||
logger.error(error_msg)
|
||||
_logger.error(error_msg)
|
||||
raise ValueError(error_msg)
|
||||
|
||||
def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1):
|
||||
|
@ -10,6 +10,9 @@ import torch.nn.functional as F
|
||||
from .adaptive_avgmax_pool import adaptive_avgmax_pool2d
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TestTimePoolHead(nn.Module):
|
||||
def __init__(self, base, original_pool=7):
|
||||
super(TestTimePoolHead, self).__init__()
|
||||
@ -40,7 +43,7 @@ def apply_test_time_pool(model, config, args):
|
||||
if not args.no_test_pool and \
|
||||
config['input_size'][-1] > model.default_cfg['input_size'][-1] and \
|
||||
config['input_size'][-2] > model.default_cfg['input_size'][-2]:
|
||||
logging.info('Target input size %s > pretrained default %s, using test time pooling' %
|
||||
_logger.info('Target input size %s > pretrained default %s, using test time pooling' %
|
||||
(str(config['input_size'][-2:]), str(model.default_cfg['input_size'][-2:])))
|
||||
model = TestTimePoolHead(model, original_pool=model.default_cfg['pool_size'])
|
||||
test_time_pool = True
|
||||
|
@ -5,4 +5,6 @@ from .radam import RAdam
|
||||
from .novograd import NovoGrad
|
||||
from .nvnovograd import NvNovoGrad
|
||||
from .lookahead import Lookahead
|
||||
from .adamp import AdamP
|
||||
from .sgdp import SGDP
|
||||
from .optim_factory import create_optimizer
|
||||
|
107
timm/optim/adamp.py
Normal file
107
timm/optim/adamp.py
Normal file
@ -0,0 +1,107 @@
|
||||
"""
|
||||
AdamP Optimizer Implementation copied from https://github.com/clovaai/AdamP/blob/master/adamp/adamp.py
|
||||
|
||||
Paper: `Slowing Down the Weight Norm Increase in Momentum-based Optimizers` - https://arxiv.org/abs/2006.08217
|
||||
Code: https://github.com/clovaai/AdamP
|
||||
|
||||
Copyright (c) 2020-present NAVER Corp.
|
||||
MIT license
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.optim.optimizer import Optimizer, required
|
||||
import math
|
||||
|
||||
class AdamP(Optimizer):
|
||||
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
|
||||
weight_decay=0, delta=0.1, wd_ratio=0.1, nesterov=False):
|
||||
defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay,
|
||||
delta=delta, wd_ratio=wd_ratio, nesterov=nesterov)
|
||||
super(AdamP, self).__init__(params, defaults)
|
||||
|
||||
def _channel_view(self, x):
|
||||
return x.view(x.size(0), -1)
|
||||
|
||||
def _layer_view(self, x):
|
||||
return x.view(1, -1)
|
||||
|
||||
def _cosine_similarity(self, x, y, eps, view_func):
|
||||
x = view_func(x)
|
||||
y = view_func(y)
|
||||
|
||||
x_norm = x.norm(dim=1).add_(eps)
|
||||
y_norm = y.norm(dim=1).add_(eps)
|
||||
dot = (x * y).sum(dim=1)
|
||||
|
||||
return dot.abs() / x_norm / y_norm
|
||||
|
||||
def _projection(self, p, grad, perturb, delta, wd_ratio, eps):
|
||||
wd = 1
|
||||
expand_size = [-1] + [1] * (len(p.shape) - 1)
|
||||
for view_func in [self._channel_view, self._layer_view]:
|
||||
|
||||
cosine_sim = self._cosine_similarity(grad, p.data, eps, view_func)
|
||||
|
||||
if cosine_sim.max() < delta / math.sqrt(view_func(p.data).size(1)):
|
||||
p_n = p.data / view_func(p.data).norm(dim=1).view(expand_size).add_(eps)
|
||||
perturb -= p_n * view_func(p_n * perturb).sum(dim=1).view(expand_size)
|
||||
wd = wd_ratio
|
||||
|
||||
return perturb, wd
|
||||
|
||||
return perturb, wd
|
||||
|
||||
def step(self, closure=None):
|
||||
loss = None
|
||||
if closure is not None:
|
||||
loss = closure()
|
||||
|
||||
for group in self.param_groups:
|
||||
for p in group['params']:
|
||||
if p.grad is None:
|
||||
continue
|
||||
|
||||
grad = p.grad.data
|
||||
beta1, beta2 = group['betas']
|
||||
nesterov = group['nesterov']
|
||||
|
||||
state = self.state[p]
|
||||
|
||||
# State initialization
|
||||
if len(state) == 0:
|
||||
state['step'] = 0
|
||||
state['exp_avg'] = torch.zeros_like(p.data)
|
||||
state['exp_avg_sq'] = torch.zeros_like(p.data)
|
||||
|
||||
# Adam
|
||||
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
|
||||
|
||||
state['step'] += 1
|
||||
bias_correction1 = 1 - beta1 ** state['step']
|
||||
bias_correction2 = 1 - beta2 ** state['step']
|
||||
|
||||
exp_avg.mul_(beta1).add_(1 - beta1, grad)
|
||||
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
|
||||
|
||||
denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps'])
|
||||
step_size = group['lr'] / bias_correction1
|
||||
|
||||
if nesterov:
|
||||
perturb = (beta1 * exp_avg + (1 - beta1) * grad) / denom
|
||||
else:
|
||||
perturb = exp_avg / denom
|
||||
|
||||
# Projection
|
||||
wd_ratio = 1
|
||||
if len(p.shape) > 1:
|
||||
perturb, wd_ratio = self._projection(p, grad, perturb, group['delta'], group['wd_ratio'], group['eps'])
|
||||
|
||||
# Weight decay
|
||||
if group['weight_decay'] > 0:
|
||||
p.data.mul_(1 - group['lr'] * group['weight_decay'] * wd_ratio)
|
||||
|
||||
# Step
|
||||
p.data.add_(-step_size, perturb)
|
||||
|
||||
return loss
|
@ -3,7 +3,7 @@ Hacked together by / Copyright 2020 Ross Wightman
|
||||
"""
|
||||
import torch
|
||||
from torch import optim as optim
|
||||
from timm.optim import Nadam, RMSpropTF, AdamW, RAdam, NovoGrad, NvNovoGrad, Lookahead
|
||||
from timm.optim import Nadam, RMSpropTF, AdamW, RAdam, NovoGrad, NvNovoGrad, Lookahead, AdamP, SGDP
|
||||
try:
|
||||
from apex.optimizers import FusedNovoGrad, FusedAdam, FusedLAMB, FusedSGD
|
||||
has_apex = True
|
||||
@ -63,6 +63,14 @@ def create_optimizer(args, model, filter_bias_and_bn=True):
|
||||
elif opt_lower == 'radam':
|
||||
optimizer = RAdam(
|
||||
parameters, lr=args.lr, weight_decay=weight_decay, eps=args.opt_eps)
|
||||
elif opt_lower == 'adamp':
|
||||
optimizer = AdamP(
|
||||
parameters, lr=args.lr, weight_decay=weight_decay, eps=args.opt_eps,
|
||||
delta=0.1, wd_ratio=0.01, nesterov=True)
|
||||
elif opt_lower == 'sgdp':
|
||||
optimizer = SGDP(
|
||||
parameters, lr=args.lr, momentum=args.momentum, weight_decay=weight_decay,
|
||||
eps=args.opt_eps, nesterov=True)
|
||||
elif opt_lower == 'adadelta':
|
||||
optimizer = optim.Adadelta(
|
||||
parameters, lr=args.lr, weight_decay=weight_decay, eps=args.opt_eps)
|
||||
|
96
timm/optim/sgdp.py
Normal file
96
timm/optim/sgdp.py
Normal file
@ -0,0 +1,96 @@
|
||||
"""
|
||||
SGDP Optimizer Implementation copied from https://github.com/clovaai/AdamP/blob/master/adamp/sgdp.py
|
||||
|
||||
Paper: `Slowing Down the Weight Norm Increase in Momentum-based Optimizers` - https://arxiv.org/abs/2006.08217
|
||||
Code: https://github.com/clovaai/AdamP
|
||||
|
||||
Copyright (c) 2020-present NAVER Corp.
|
||||
MIT license
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.optim.optimizer import Optimizer, required
|
||||
import math
|
||||
|
||||
class SGDP(Optimizer):
|
||||
def __init__(self, params, lr=required, momentum=0, dampening=0,
|
||||
weight_decay=0, nesterov=False, eps=1e-8, delta=0.1, wd_ratio=0.1):
|
||||
defaults = dict(lr=lr, momentum=momentum, dampening=dampening, weight_decay=weight_decay,
|
||||
nesterov=nesterov, eps=eps, delta=delta, wd_ratio=wd_ratio)
|
||||
super(SGDP, self).__init__(params, defaults)
|
||||
|
||||
def _channel_view(self, x):
|
||||
return x.view(x.size(0), -1)
|
||||
|
||||
def _layer_view(self, x):
|
||||
return x.view(1, -1)
|
||||
|
||||
def _cosine_similarity(self, x, y, eps, view_func):
|
||||
x = view_func(x)
|
||||
y = view_func(y)
|
||||
|
||||
x_norm = x.norm(dim=1).add_(eps)
|
||||
y_norm = y.norm(dim=1).add_(eps)
|
||||
dot = (x * y).sum(dim=1)
|
||||
|
||||
return dot.abs() / x_norm / y_norm
|
||||
|
||||
def _projection(self, p, grad, perturb, delta, wd_ratio, eps):
|
||||
wd = 1
|
||||
expand_size = [-1] + [1] * (len(p.shape) - 1)
|
||||
for view_func in [self._channel_view, self._layer_view]:
|
||||
|
||||
cosine_sim = self._cosine_similarity(grad, p.data, eps, view_func)
|
||||
|
||||
if cosine_sim.max() < delta / math.sqrt(view_func(p.data).size(1)):
|
||||
p_n = p.data / view_func(p.data).norm(dim=1).view(expand_size).add_(eps)
|
||||
perturb -= p_n * view_func(p_n * perturb).sum(dim=1).view(expand_size)
|
||||
wd = wd_ratio
|
||||
|
||||
return perturb, wd
|
||||
|
||||
return perturb, wd
|
||||
|
||||
def step(self, closure=None):
|
||||
loss = None
|
||||
if closure is not None:
|
||||
loss = closure()
|
||||
|
||||
for group in self.param_groups:
|
||||
weight_decay = group['weight_decay']
|
||||
momentum = group['momentum']
|
||||
dampening = group['dampening']
|
||||
nesterov = group['nesterov']
|
||||
|
||||
for p in group['params']:
|
||||
if p.grad is None:
|
||||
continue
|
||||
grad = p.grad.data
|
||||
state = self.state[p]
|
||||
|
||||
# State initialization
|
||||
if len(state) == 0:
|
||||
state['momentum'] = torch.zeros_like(p.data)
|
||||
|
||||
# SGD
|
||||
buf = state['momentum']
|
||||
buf.mul_(momentum).add_(1 - dampening, grad)
|
||||
if nesterov:
|
||||
d_p = grad + momentum * buf
|
||||
else:
|
||||
d_p = buf
|
||||
|
||||
# Projection
|
||||
wd_ratio = 1
|
||||
if len(p.shape) > 1:
|
||||
d_p, wd_ratio = self._projection(p, grad, d_p, group['delta'], group['wd_ratio'], group['eps'])
|
||||
|
||||
# Weight decay
|
||||
if weight_decay != 0:
|
||||
p.data.mul_(1 - group['lr'] * group['weight_decay'] * wd_ratio / (1-momentum))
|
||||
|
||||
# Step
|
||||
p.data.add_(-group['lr'], d_p)
|
||||
|
||||
return loss
|
@ -12,7 +12,7 @@ import torch
|
||||
from .scheduler import Scheduler
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class CosineLRScheduler(Scheduler):
|
||||
@ -48,7 +48,7 @@ class CosineLRScheduler(Scheduler):
|
||||
assert t_initial > 0
|
||||
assert lr_min >= 0
|
||||
if t_initial == 1 and t_mul == 1 and decay_rate == 1:
|
||||
logger.warning("Cosine annealing scheduler will have no effect on the learning "
|
||||
_logger.warning("Cosine annealing scheduler will have no effect on the learning "
|
||||
"rate since t_initial = t_mul = eta_mul = 1.")
|
||||
self.t_initial = t_initial
|
||||
self.t_mul = t_mul
|
||||
|
@ -12,7 +12,7 @@ import torch
|
||||
from .scheduler import Scheduler
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TanhLRScheduler(Scheduler):
|
||||
|
@ -14,6 +14,7 @@ import glob
|
||||
import csv
|
||||
import operator
|
||||
import logging
|
||||
import logging.handlers
|
||||
import numpy as np
|
||||
from collections import OrderedDict
|
||||
try:
|
||||
@ -26,6 +27,9 @@ except ImportError:
|
||||
from torch import distributed as dist
|
||||
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def unwrap_model(model):
|
||||
if isinstance(model, ModelEma):
|
||||
return unwrap_model(model.ema)
|
||||
@ -89,7 +93,7 @@ class CheckpointSaver:
|
||||
checkpoints_str = "Current checkpoints:\n"
|
||||
for c in self.checkpoint_files:
|
||||
checkpoints_str += ' {}\n'.format(c)
|
||||
logging.info(checkpoints_str)
|
||||
_logger.info(checkpoints_str)
|
||||
|
||||
if metric is not None and (self.best_metric is None or self.cmp(metric, self.best_metric)):
|
||||
self.best_epoch = epoch
|
||||
@ -126,10 +130,10 @@ class CheckpointSaver:
|
||||
to_delete = self.checkpoint_files[delete_index:]
|
||||
for d in to_delete:
|
||||
try:
|
||||
logging.debug("Cleaning checkpoint: {}".format(d))
|
||||
_logger.debug("Cleaning checkpoint: {}".format(d))
|
||||
os.remove(d[0])
|
||||
except Exception as e:
|
||||
logging.error("Exception '{}' while deleting checkpoint".format(e))
|
||||
_logger.error("Exception '{}' while deleting checkpoint".format(e))
|
||||
self.checkpoint_files = self.checkpoint_files[:delete_index]
|
||||
|
||||
def save_recovery(self, model, optimizer, args, epoch, model_ema=None, use_amp=False, batch_idx=0):
|
||||
@ -139,10 +143,10 @@ class CheckpointSaver:
|
||||
self._save(save_path, model, optimizer, args, epoch, model_ema, use_amp=use_amp)
|
||||
if os.path.exists(self.last_recovery_file):
|
||||
try:
|
||||
logging.debug("Cleaning recovery: {}".format(self.last_recovery_file))
|
||||
_logger.debug("Cleaning recovery: {}".format(self.last_recovery_file))
|
||||
os.remove(self.last_recovery_file)
|
||||
except Exception as e:
|
||||
logging.error("Exception '{}' while removing {}".format(e, self.last_recovery_file))
|
||||
_logger.error("Exception '{}' while removing {}".format(e, self.last_recovery_file))
|
||||
self.last_recovery_file = self.curr_recovery_file
|
||||
self.curr_recovery_file = save_path
|
||||
|
||||
@ -284,9 +288,9 @@ class ModelEma:
|
||||
name = k
|
||||
new_state_dict[name] = v
|
||||
self.ema.load_state_dict(new_state_dict)
|
||||
logging.info("Loaded state_dict_ema")
|
||||
_logger.info("Loaded state_dict_ema")
|
||||
else:
|
||||
logging.warning("Failed to find state_dict_ema, starting from loaded model weights")
|
||||
_logger.warning("Failed to find state_dict_ema, starting from loaded model weights")
|
||||
|
||||
def update(self, model):
|
||||
# correct a mismatch in state dict keys
|
||||
@ -312,8 +316,13 @@ class FormatterNoInfo(logging.Formatter):
|
||||
return logging.Formatter.format(self, record)
|
||||
|
||||
|
||||
def setup_default_logging(default_level=logging.INFO):
|
||||
def setup_default_logging(default_level=logging.INFO, log_path=''):
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setFormatter(FormatterNoInfo())
|
||||
logging.root.addHandler(console_handler)
|
||||
logging.root.setLevel(default_level)
|
||||
if log_path:
|
||||
file_handler = logging.handlers.RotatingFileHandler(log_path, maxBytes=(1024 ** 2 * 2), backupCount=3)
|
||||
file_formatter = logging.Formatter("%(asctime)s - %(name)20s: [%(levelname)8s] - %(message)s")
|
||||
file_handler.setFormatter(file_formatter)
|
||||
logging.root.addHandler(file_handler)
|
||||
|
37
train.py
37
train.py
@ -40,6 +40,7 @@ import torch.nn as nn
|
||||
import torchvision.utils
|
||||
|
||||
torch.backends.cudnn.benchmark = True
|
||||
_logger = logging.getLogger('train')
|
||||
|
||||
|
||||
# The first arg parser parses out only the --config argument, this argument is used to
|
||||
@ -232,7 +233,7 @@ def main():
|
||||
if 'WORLD_SIZE' in os.environ:
|
||||
args.distributed = int(os.environ['WORLD_SIZE']) > 1
|
||||
if args.distributed and args.num_gpu > 1:
|
||||
logging.warning('Using more than one GPU per process in distributed mode is not allowed. Setting num_gpu to 1.')
|
||||
_logger.warning('Using more than one GPU per process in distributed mode is not allowed. Setting num_gpu to 1.')
|
||||
args.num_gpu = 1
|
||||
|
||||
args.device = 'cuda:0'
|
||||
@ -248,10 +249,10 @@ def main():
|
||||
assert args.rank >= 0
|
||||
|
||||
if args.distributed:
|
||||
logging.info('Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.'
|
||||
_logger.info('Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.'
|
||||
% (args.rank, args.world_size))
|
||||
else:
|
||||
logging.info('Training with a single process on %d GPUs.' % args.num_gpu)
|
||||
_logger.info('Training with a single process on %d GPUs.' % args.num_gpu)
|
||||
|
||||
torch.manual_seed(args.seed + args.rank)
|
||||
|
||||
@ -270,7 +271,7 @@ def main():
|
||||
checkpoint_path=args.initial_checkpoint)
|
||||
|
||||
if args.local_rank == 0:
|
||||
logging.info('Model %s created, param count: %d' %
|
||||
_logger.info('Model %s created, param count: %d' %
|
||||
(args.model, sum([m.numel() for m in model.parameters()])))
|
||||
|
||||
data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0)
|
||||
@ -286,7 +287,7 @@ def main():
|
||||
|
||||
if args.num_gpu > 1:
|
||||
if args.amp:
|
||||
logging.warning(
|
||||
_logger.warning(
|
||||
'AMP does not work well with nn.DataParallel, disabling. Use distributed mode for multi-GPU AMP.')
|
||||
args.amp = False
|
||||
model = nn.DataParallel(model, device_ids=list(range(args.num_gpu))).cuda()
|
||||
@ -300,7 +301,7 @@ def main():
|
||||
model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
|
||||
use_amp = True
|
||||
if args.local_rank == 0:
|
||||
logging.info('NVIDIA APEX {}. AMP {}.'.format(
|
||||
_logger.info('NVIDIA APEX {}. AMP {}.'.format(
|
||||
'installed' if has_apex else 'not installed', 'on' if use_amp else 'off'))
|
||||
|
||||
# optionally resume from a checkpoint
|
||||
@ -311,11 +312,11 @@ def main():
|
||||
if resume_state and not args.no_resume_opt:
|
||||
if 'optimizer' in resume_state:
|
||||
if args.local_rank == 0:
|
||||
logging.info('Restoring Optimizer state from checkpoint')
|
||||
_logger.info('Restoring Optimizer state from checkpoint')
|
||||
optimizer.load_state_dict(resume_state['optimizer'])
|
||||
if use_amp and 'amp' in resume_state and 'load_state_dict' in amp.__dict__:
|
||||
if args.local_rank == 0:
|
||||
logging.info('Restoring NVIDIA AMP state from checkpoint')
|
||||
_logger.info('Restoring NVIDIA AMP state from checkpoint')
|
||||
amp.load_state_dict(resume_state['amp'])
|
||||
del resume_state
|
||||
|
||||
@ -337,16 +338,16 @@ def main():
|
||||
else:
|
||||
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
|
||||
if args.local_rank == 0:
|
||||
logging.info(
|
||||
_logger.info(
|
||||
'Converted model to use Synchronized BatchNorm. WARNING: You may have issues if using '
|
||||
'zero initialized BN layers (enabled by default for ResNets) while sync-bn enabled.')
|
||||
except Exception as e:
|
||||
logging.error('Failed to enable Synchronized BatchNorm. Install Apex or Torch >= 1.1')
|
||||
_logger.error('Failed to enable Synchronized BatchNorm. Install Apex or Torch >= 1.1')
|
||||
if has_apex:
|
||||
model = DDP(model, delay_allreduce=True)
|
||||
else:
|
||||
if args.local_rank == 0:
|
||||
logging.info("Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP.")
|
||||
_logger.info("Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP.")
|
||||
model = DDP(model, device_ids=[args.local_rank]) # can use device str in Torch >= 1.1
|
||||
# NOTE: EMA model does not need to be wrapped by DDP
|
||||
|
||||
@ -361,11 +362,11 @@ def main():
|
||||
lr_scheduler.step(start_epoch)
|
||||
|
||||
if args.local_rank == 0:
|
||||
logging.info('Scheduled epochs: {}'.format(num_epochs))
|
||||
_logger.info('Scheduled epochs: {}'.format(num_epochs))
|
||||
|
||||
train_dir = os.path.join(args.data, 'train')
|
||||
if not os.path.exists(train_dir):
|
||||
logging.error('Training folder does not exist at: {}'.format(train_dir))
|
||||
_logger.error('Training folder does not exist at: {}'.format(train_dir))
|
||||
exit(1)
|
||||
dataset_train = Dataset(train_dir)
|
||||
|
||||
@ -404,7 +405,7 @@ def main():
|
||||
if not os.path.isdir(eval_dir):
|
||||
eval_dir = os.path.join(args.data, 'validation')
|
||||
if not os.path.isdir(eval_dir):
|
||||
logging.error('Validation folder does not exist at: {}'.format(eval_dir))
|
||||
_logger.error('Validation folder does not exist at: {}'.format(eval_dir))
|
||||
exit(1)
|
||||
dataset_eval = Dataset(eval_dir)
|
||||
|
||||
@ -468,7 +469,7 @@ def main():
|
||||
|
||||
if args.distributed and args.dist_bn in ('broadcast', 'reduce'):
|
||||
if args.local_rank == 0:
|
||||
logging.info("Distributing BatchNorm running means and vars")
|
||||
_logger.info("Distributing BatchNorm running means and vars")
|
||||
distribute_bn(model, args.world_size, args.dist_bn == 'reduce')
|
||||
|
||||
eval_metrics = validate(model, loader_eval, validate_loss_fn, args)
|
||||
@ -499,7 +500,7 @@ def main():
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
if best_metric is not None:
|
||||
logging.info('*** Best metric: {0} (epoch {1})'.format(best_metric, best_epoch))
|
||||
_logger.info('*** Best metric: {0} (epoch {1})'.format(best_metric, best_epoch))
|
||||
|
||||
|
||||
def train_epoch(
|
||||
@ -559,7 +560,7 @@ def train_epoch(
|
||||
losses_m.update(reduced_loss.item(), input.size(0))
|
||||
|
||||
if args.local_rank == 0:
|
||||
logging.info(
|
||||
_logger.info(
|
||||
'Train: {} [{:>4d}/{} ({:>3.0f}%)] '
|
||||
'Loss: {loss.val:>9.6f} ({loss.avg:>6.4f}) '
|
||||
'Time: {batch_time.val:.3f}s, {rate:>7.2f}/s '
|
||||
@ -647,7 +648,7 @@ def validate(model, loader, loss_fn, args, log_suffix=''):
|
||||
end = time.time()
|
||||
if args.local_rank == 0 and (last_batch or batch_idx % args.log_interval == 0):
|
||||
log_name = 'Test' + log_suffix
|
||||
logging.info(
|
||||
_logger.info(
|
||||
'{0}: [{1:>4d}/{2}] '
|
||||
'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) '
|
||||
'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f}) '
|
||||
|
16
validate.py
16
validate.py
@ -5,7 +5,7 @@ This is intended to be a lean and easily modifiable ImageNet validation script f
|
||||
models or training checkpoints against ImageNet or similarly organized image datasets. It prioritizes
|
||||
canonical PyTorch, standard Python style, and good performance. Repurpose as you see fit.
|
||||
|
||||
Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman)
|
||||
Hacked together by Ross Wightman (https://github.com/rwightman)
|
||||
"""
|
||||
import argparse
|
||||
import os
|
||||
@ -29,6 +29,8 @@ from timm.data import Dataset, DatasetTar, create_loader, resolve_data_config, R
|
||||
from timm.utils import accuracy, AverageMeter, natural_key, setup_default_logging
|
||||
|
||||
torch.backends.cudnn.benchmark = True
|
||||
_logger = logging.getLogger('validate')
|
||||
|
||||
|
||||
parser = argparse.ArgumentParser(description='PyTorch ImageNet Validation')
|
||||
parser.add_argument('data', metavar='DIR',
|
||||
@ -115,7 +117,7 @@ def validate(args):
|
||||
load_checkpoint(model, args.checkpoint, args.use_ema)
|
||||
|
||||
param_count = sum([m.numel() for m in model.parameters()])
|
||||
logging.info('Model %s created, param count: %d' % (args.model, param_count))
|
||||
_logger.info('Model %s created, param count: %d' % (args.model, param_count))
|
||||
|
||||
data_config = resolve_data_config(vars(args), model=model)
|
||||
model, test_time_pool = apply_test_time_pool(model, data_config, args)
|
||||
@ -194,7 +196,7 @@ def validate(args):
|
||||
end = time.time()
|
||||
|
||||
if batch_idx % args.log_freq == 0:
|
||||
logging.info(
|
||||
_logger.info(
|
||||
'Test: [{0:>4d}/{1}] '
|
||||
'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) '
|
||||
'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f}) '
|
||||
@ -220,9 +222,9 @@ def validate(args):
|
||||
param_count=round(param_count / 1e6, 2),
|
||||
img_size=data_config['input_size'][-1],
|
||||
cropt_pct=crop_pct,
|
||||
interpolation=data_config['interpolation']
|
||||
))
|
||||
logging.info(' * Acc@1 {:.3f} ({:.3f}) Acc@5 {:.3f} ({:.3f})'.format(
|
||||
interpolation=data_config['interpolation']))
|
||||
|
||||
_logger.info(' * Acc@1 {:.3f} ({:.3f}) Acc@5 {:.3f} ({:.3f})'.format(
|
||||
results['top1'], results['top1_err'], results['top5'], results['top5_err']))
|
||||
|
||||
return results
|
||||
@ -252,7 +254,7 @@ def main():
|
||||
|
||||
if len(model_cfgs):
|
||||
results_file = args.results_file or './results-all.csv'
|
||||
logging.info('Running bulk validation on these pretrained models: {}'.format(', '.join(model_names)))
|
||||
_logger.info('Running bulk validation on these pretrained models: {}'.format(', '.join(model_names)))
|
||||
results = []
|
||||
try:
|
||||
start_batch_size = args.batch_size
|
||||
|
Loading…
x
Reference in New Issue
Block a user