yolov5/utils/torch_utils.py

# PyTorch utils

import logging
import math
import os
import time
from contextlib import contextmanager
from copy import deepcopy

import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.functional as F
import torchvision

try:
    import thop  # for FLOPS computation
except ImportError:
    thop = None
logger = logging.getLogger(__name__)


@contextmanager
def torch_distributed_zero_first(local_rank: int):
    """
    Decorator to make all processes in distributed training wait for each local_master to do something.
    """
    if local_rank not in [-1, 0]:
        torch.distributed.barrier()
    yield
    if local_rank == 0:
        torch.distributed.barrier()


def init_torch_seeds(seed=0):
    # Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html
    torch.manual_seed(seed)
    if seed == 0:  # slower, more reproducible
        cudnn.deterministic = True
        cudnn.benchmark = False
    else:  # faster, less reproducible
        cudnn.deterministic = False
        cudnn.benchmark = True


def select_device(device='', batch_size=None):
    # device = 'cpu' or '0' or '0,1,2,3'
    cpu_request = device.lower() == 'cpu'
    if device and not cpu_request:  # if device requested other than 'cpu'
        os.environ['CUDA_VISIBLE_DEVICES'] = device  # set environment variable
        assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested'  # check availablity

    cuda = False if cpu_request else torch.cuda.is_available()
    if cuda:
        c = 1024 ** 2  # bytes to MB
        ng = torch.cuda.device_count()
        if ng > 1 and batch_size:  # check that batch_size is compatible with device_count
            assert batch_size % ng == 0, f'batch-size {batch_size} not multiple of GPU count {ng}'
        x = [torch.cuda.get_device_properties(i) for i in range(ng)]
        s = f'Using torch {torch.__version__} '
        for i, d in enumerate((device or '0').split(',')):
            if i == 1:
                s = ' ' * len(s)
            logger.info(f"{s}CUDA:{d} ({x[i].name}, {x[i].total_memory / c}MB)")
    else:
        logger.info(f'Using torch {torch.__version__} CPU')

    logger.info('')  # skip a line
    return torch.device('cuda:0' if cuda else 'cpu')


def time_synchronized():
    # pytorch-accurate time
    torch.cuda.synchronize() if torch.cuda.is_available() else None
    return time.time()


def profile(x, ops, n=100, device=None):
    # profile a pytorch module or list of modules. Example usage:
    #     x = torch.randn(16, 3, 640, 640)  # input
    #     m1 = lambda x: x * torch.sigmoid(x)
    #     m2 = nn.SiLU()
    #     profile(x, [m1, m2], n=100)  # profile speed over 100 iterations

    device = device or torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    x = x.to(device)
    x.requires_grad = True
    print(torch.__version__, device.type, torch.cuda.get_device_properties(0) if device.type == 'cuda' else '')
    print(f"\n{'Params':>12s}{'GFLOPS':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}")
    for m in ops if isinstance(ops, list) else [ops]:
        m = m.to(device) if hasattr(m, 'to') else m  # device
        m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m  # type
        dtf, dtb, t = 0., 0., [0., 0., 0.]  # dt forward, backward
        try:
            flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2  # GFLOPS
        except:
            flops = 0

        for _ in range(n):
            t[0] = time_synchronized()
            y = m(x)
            t[1] = time_synchronized()
            try:
                _ = y.sum().backward()
                t[2] = time_synchronized()
            except:  # no backward method
                t[2] = float('nan')
            dtf += (t[1] - t[0]) * 1000 / n  # ms per op forward
            dtb += (t[2] - t[1]) * 1000 / n  # ms per op backward

        s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list'
        s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list'
        p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0  # parameters
        print(f'{p:12.4g}{flops:12.4g}{dtf:16.4g}{dtb:16.4g}{str(s_in):>24s}{str(s_out):>24s}')


def is_parallel(model):
    return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)


def intersect_dicts(da, db, exclude=()):
    # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
    return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape}


def initialize_weights(model):
    for m in model.modules():
        t = type(m)
        if t is nn.Conv2d:
            pass  # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
        elif t is nn.BatchNorm2d:
            m.eps = 1e-3
            m.momentum = 0.03
        elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
            m.inplace = True


def find_modules(model, mclass=nn.Conv2d):
    # Finds layer indices matching module class 'mclass'
    return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]


def sparsity(model):
    # Return global model sparsity
    a, b = 0., 0.
    for p in model.parameters():
        a += p.numel()
        b += (p == 0).sum()
    return b / a


def prune(model, amount=0.3):
    # Prune model to requested global sparsity
    import torch.nn.utils.prune as prune
    print('Pruning model... ', end='')
    for name, m in model.named_modules():
        if isinstance(m, nn.Conv2d):
            prune.l1_unstructured(m, name='weight', amount=amount)  # prune
            prune.remove(m, 'weight')  # make permanent
    print(' %.3g global sparsity' % sparsity(model))


def fuse_conv_and_bn(conv, bn):
    # Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
    fusedconv = nn.Conv2d(conv.in_channels,
                          conv.out_channels,
                          kernel_size=conv.kernel_size,
                          stride=conv.stride,
                          padding=conv.padding,
                          groups=conv.groups,
                          bias=True).requires_grad_(False).to(conv.weight.device)

    # prepare filters
    w_conv = conv.weight.clone().view(conv.out_channels, -1)
    w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
    fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))

    # prepare spatial bias
    b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias
    b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))
    fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)

    return fusedconv


def model_info(model, verbose=False, img_size=640):
    # Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320]
    n_p = sum(x.numel() for x in model.parameters())  # number parameters
    n_g = sum(x.numel() for x in model.parameters() if x.requires_grad)  # number gradients
    if verbose:
        print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))
        for i, (name, p) in enumerate(model.named_parameters()):
            name = name.replace('module_list.', '')
            print('%5g %40s %9s %12g %20s %10.3g %10.3g' %
                  (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))

    try:  # FLOPS
        from thop import profile
        stride = int(model.stride.max()) if hasattr(model, 'stride') else 32
        img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device)  # input
        flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2  # stride GFLOPS
        img_size = img_size if isinstance(img_size, list) else [img_size, img_size]  # expand if int/float
        fs = ', %.1f GFLOPS' % (flops * img_size[0] / stride * img_size[1] / stride)  # 640x640 GFLOPS
    except (ImportError, Exception):
        fs = ''

    logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")


def load_classifier(name='resnet101', n=2):
    # Loads a pretrained model reshaped to n-class output
    model = torchvision.models.__dict__[name](pretrained=True)

    # ResNet model properties
    # input_size = [3, 224, 224]
    # input_space = 'RGB'
    # input_range = [0, 1]
    # mean = [0.485, 0.456, 0.406]
    # std = [0.229, 0.224, 0.225]

    # Reshape output to n classes
    filters = model.fc.weight.shape[1]
    model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True)
    model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True)
    model.fc.out_features = n
    return model


def scale_img(img, ratio=1.0, same_shape=False):  # img(16,3,256,416), r=ratio
    # scales img(bs,3,y,x) by ratio
    if ratio == 1.0:
        return img
    else:
        h, w = img.shape[2:]
        s = (int(h * ratio), int(w * ratio))  # new size
        img = F.interpolate(img, size=s, mode='bilinear', align_corners=False)  # resize
        if not same_shape:  # pad/crop img
            gs = 32  # (pixels) grid size
            h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
        return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447)  # value = imagenet mean


def copy_attr(a, b, include=(), exclude=()):
    # Copy attributes from b to a, options to only include [...] and to exclude [...]
    for k, v in b.__dict__.items():
        if (len(include) and k not in include) or k.startswith('_') or k in exclude:
            continue
        else:
            setattr(a, k, v)


class ModelEMA:
    """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
    Keep a moving average of everything in the model state_dict (parameters and buffers).
    This is intended to allow functionality like
    https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
    A smoothed version of the weights is necessary for some training schemes to perform well.
    This class is sensitive where it is initialized in the sequence of model init,
    GPU assignment and distributed training wrappers.
    """

    def __init__(self, model, decay=0.9999, updates=0):
        # Create EMA
        self.ema = deepcopy(model.module if is_parallel(model) else model).eval()  # FP32 EMA
        # if next(model.parameters()).device.type != 'cpu':
        #     self.ema.half()  # FP16 EMA
        self.updates = updates  # number of EMA updates
        self.decay = lambda x: decay * (1 - math.exp(-x / 2000))  # decay exponential ramp (to help early epochs)
        for p in self.ema.parameters():
            p.requires_grad_(False)

    def update(self, model):
        # Update EMA parameters
        with torch.no_grad():
            self.updates += 1
            d = self.decay(self.updates)

            msd = model.module.state_dict() if is_parallel(model) else model.state_dict()  # model state_dict
            for k, v in self.ema.state_dict().items():
                if v.dtype.is_floating_point:
                    v *= d
                    v += (1. - d) * msd[k].detach()

    def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
        # Update EMA attributes
        copy_attr(self.ema, model, include, exclude)
Utils reorganization (#1392) * Utils reorganization * Add new utils files * cleanup * simplify * reduce datasets.py * remove evolve.sh * loadWebcam cleanup 2020-11-14 11:50:32 +01:00			`# PyTorch utils`

optimize imports 2020-08-20 21:17:40 -07:00			`import logging`
Profile() feature addition (#1673) * Profile() feature addition * cleanup 2020-12-11 09:30:39 -08:00			`import math`
initial commit 2020-05-29 17:04:54 -07:00			`import os`
			`import time`
Utils reorganization (#1392) * Utils reorganization * Add new utils files * cleanup * simplify * reduce datasets.py * remove evolve.sh * loadWebcam cleanup 2020-11-14 11:50:32 +01:00			`from contextlib import contextmanager`
initial commit 2020-05-29 17:04:54 -07:00			`from copy import deepcopy`

			`import torch`
			`import torch.backends.cudnn as cudnn`
			`import torch.nn as nn`
			`import torch.nn.functional as F`
torchvision nms bug fix 2020-10-06 15:09:24 +02:00			`import torchvision`
initial commit 2020-05-29 17:04:54 -07:00
Profile() feature addition (#1673) * Profile() feature addition * cleanup 2020-12-11 09:30:39 -08:00			`try:`
			`import thop # for FLOPS computation`
			`except ImportError:`
			`thop = None`
Fix redundant outputs via Logging in DDP training (#500) * Change print to logging * Clean function set_logging * Add line spacing * Change leftover prints to log * Fix scanning labels output * Fix rank naming * Change leftover print to logging * Reorganized DDP variables * Fix type error * Make quotes consistent * Fix spelling * Clean function call * Add line spacing * Update datasets.py * Update train.py Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com> 2020-08-12 01:18:45 +07:00			`logger = logging.getLogger(__name__)`
initial commit 2020-05-29 17:04:54 -07:00
reformat code 2020-08-14 11:53:44 -07:00
Utils reorganization (#1392) * Utils reorganization * Add new utils files * cleanup * simplify * reduce datasets.py * remove evolve.sh * loadWebcam cleanup 2020-11-14 11:50:32 +01:00			`@contextmanager`
			`def torch_distributed_zero_first(local_rank: int):`
			`"""`
			`Decorator to make all processes in distributed training wait for each local_master to do something.`
			`"""`
			`if local_rank not in [-1, 0]:`
			`torch.distributed.barrier()`
			`yield`
			`if local_rank == 0:`
			`torch.distributed.barrier()`

initial commit 2020-05-29 17:04:54 -07:00
Utils reorganization (#1392) * Utils reorganization * Add new utils files * cleanup * simplify * reduce datasets.py * remove evolve.sh * loadWebcam cleanup 2020-11-14 11:50:32 +01:00			`def init_torch_seeds(seed=0):`
speed-reproducibility fix #17 2020-06-05 13:07:09 -07:00			`# Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html`
Utils reorganization (#1392) * Utils reorganization * Add new utils files * cleanup * simplify * reduce datasets.py * remove evolve.sh * loadWebcam cleanup 2020-11-14 11:50:32 +01:00			`torch.manual_seed(seed)`
speed-reproducibility fix #17 2020-06-05 13:07:09 -07:00			`if seed == 0: # slower, more reproducible`
			`cudnn.deterministic = True`
			`cudnn.benchmark = False`
			`else: # faster, less reproducible`
initial commit 2020-05-29 17:04:54 -07:00			`cudnn.deterministic = False`
			`cudnn.benchmark = True`


PyTorch 1.6.0 update with native AMP (#573) * PyTorch have Automatic Mixed Precision (AMP) Training. * Fixed the problem of inconsistent code length indentation * Fixed the problem of inconsistent code length indentation * Mixed precision training is turned on by default 2020-08-01 01:52:45 +08:00			`def select_device(device='', batch_size=None):`
initial commit 2020-05-29 17:04:54 -07:00			`# device = 'cpu' or '0' or '0,1,2,3'`
			`cpu_request = device.lower() == 'cpu'`
			`if device and not cpu_request: # if device requested other than 'cpu'`
			`os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable`
Display correct CUDA devices (#1776) * Display correct CUDA devices * cleanup 2020-12-24 13:01:35 -08:00			`assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availablity`
initial commit 2020-05-29 17:04:54 -07:00
			`cuda = False if cpu_request else torch.cuda.is_available()`
			`if cuda:`
			`c = 1024 ** 2 # bytes to MB`
			`ng = torch.cuda.device_count()`
			`if ng > 1 and batch_size: # check that batch_size is compatible with device_count`
Display correct CUDA devices (#1776) * Display correct CUDA devices * cleanup 2020-12-24 13:01:35 -08:00			`assert batch_size % ng == 0, f'batch-size {batch_size} not multiple of GPU count {ng}'`
initial commit 2020-05-29 17:04:54 -07:00			`x = [torch.cuda.get_device_properties(i) for i in range(ng)]`
PyTorch version to screen and cleanup (#1325) * Create flatten_recursive() helper function * cleanup * print torch version 2020-11-09 12:24:11 +01:00			`s = f'Using torch {torch.__version__} '`
Display correct CUDA devices (#1776) * Display correct CUDA devices * cleanup 2020-12-24 13:01:35 -08:00			`for i, d in enumerate((device or '0').split(',')):`
initial commit 2020-05-29 17:04:54 -07:00			`if i == 1:`
			`s = ' ' * len(s)`
Display correct CUDA devices (#1776) * Display correct CUDA devices * cleanup 2020-12-24 13:01:35 -08:00			`logger.info(f"{s}CUDA:{d} ({x[i].name}, {x[i].total_memory / c}MB)")`
initial commit 2020-05-29 17:04:54 -07:00			`else:`
PyTorch version to screen and cleanup (#1325) * Create flatten_recursive() helper function * cleanup * print torch version 2020-11-09 12:24:11 +01:00			`logger.info(f'Using torch {torch.__version__} CPU')`
initial commit 2020-05-29 17:04:54 -07:00
Fix redundant outputs via Logging in DDP training (#500) * Change print to logging * Clean function set_logging * Add line spacing * Change leftover prints to log * Fix scanning labels output * Fix rank naming * Change leftover print to logging * Reorganized DDP variables * Fix type error * Make quotes consistent * Fix spelling * Clean function call * Add line spacing * Update datasets.py * Update train.py Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com> 2020-08-12 01:18:45 +07:00			`logger.info('') # skip a line`
initial commit 2020-05-29 17:04:54 -07:00			`return torch.device('cuda:0' if cuda else 'cpu')`


			`def time_synchronized():`
Profile() feature addition (#1673) * Profile() feature addition * cleanup 2020-12-11 09:30:39 -08:00			`# pytorch-accurate time`
initial commit 2020-05-29 17:04:54 -07:00			`torch.cuda.synchronize() if torch.cuda.is_available() else None`
			`return time.time()`


Fix torch multi-GPU --device error (#1701) * Fix torch GPU error * Update torch_utils.py single-line device = Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com> 2020-12-16 11:42:14 +07:00			`def profile(x, ops, n=100, device=None):`
Profile() feature addition (#1673) * Profile() feature addition * cleanup 2020-12-11 09:30:39 -08:00			`# profile a pytorch module or list of modules. Example usage:`
			`# x = torch.randn(16, 3, 640, 640) # input`
			`# m1 = lambda x: x * torch.sigmoid(x)`
			`# m2 = nn.SiLU()`
			`# profile(x, [m1, m2], n=100) # profile speed over 100 iterations`
Update loss criteria constructor (#1711) 2020-12-16 08:39:35 -08:00
			`device = device or torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')`
Profile() feature addition (#1673) * Profile() feature addition * cleanup 2020-12-11 09:30:39 -08:00			`x = x.to(device)`
			`x.requires_grad = True`
			`print(torch.__version__, device.type, torch.cuda.get_device_properties(0) if device.type == 'cuda' else '')`
Update torch_utils.py FLOPS to GFLOPS 2020-12-21 13:29:52 -08:00			`print(f"\n{'Params':>12s}{'GFLOPS':>12s}{'forward (ms)':>16s}{'backward (ms)':>16s}{'input':>24s}{'output':>24s}")`
Profile() feature addition (#1673) * Profile() feature addition * cleanup 2020-12-11 09:30:39 -08:00			`for m in ops if isinstance(ops, list) else [ops]:`
Automatic m.half() profile on x.half() 2020-12-21 15:20:33 -08:00			`m = m.to(device) if hasattr(m, 'to') else m # device`
			`m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m # type`
Profile() feature addition (#1673) * Profile() feature addition * cleanup 2020-12-11 09:30:39 -08:00			`dtf, dtb, t = 0., 0., [0., 0., 0.] # dt forward, backward`
			`try:`
			`flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPS`
			`except:`
			`flops = 0`

			`for _ in range(n):`
			`t[0] = time_synchronized()`
			`y = m(x)`
			`t[1] = time_synchronized()`
Update loss criteria constructor (#1711) 2020-12-16 08:39:35 -08:00			`try:`
			`_ = y.sum().backward()`
			`t[2] = time_synchronized()`
			`except: # no backward method`
			`t[2] = float('nan')`
Profile() feature addition (#1673) * Profile() feature addition * cleanup 2020-12-11 09:30:39 -08:00			`dtf += (t[1] - t[0]) * 1000 / n # ms per op forward`
			`dtb += (t[2] - t[1]) * 1000 / n # ms per op backward`

			`s_in = tuple(x.shape) if isinstance(x, torch.Tensor) else 'list'`
			`s_out = tuple(y.shape) if isinstance(y, torch.Tensor) else 'list'`
			`p = sum(list(x.numel() for x in m.parameters())) if isinstance(m, nn.Module) else 0 # parameters`
			`print(f'{p:12.4g}{flops:12.4g}{dtf:16.4g}{dtb:16.4g}{str(s_in):>24s}{str(s_out):>24s}')`


Update torch_utils.py 2020-07-02 12:03:45 -07:00			`def is_parallel(model):`
			`return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)`


Single-source training (#680) * Single-source training * Extract hyperparameters into seperate files * weight decay scientific notation yaml reader bug fix * remove import glob * intersect_dicts() implementation * 'or' bug fix * .to(device) bug fix 2020-08-09 02:12:44 -07:00			`def intersect_dicts(da, db, exclude=()):`
			`# Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values`
			`return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape}`


initial commit 2020-05-29 17:04:54 -07:00			`def initialize_weights(model):`
			`for m in model.modules():`
			`t = type(m)`
			`if t is nn.Conv2d:`
			`pass # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')`
			`elif t is nn.BatchNorm2d:`
v2.0 Release (#491) Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> 2020-07-23 15:34:23 -07:00			`m.eps = 1e-3`
initial commit 2020-05-29 17:04:54 -07:00			`m.momentum = 0.03`
PyTorch 1.7.0 Compatibility Updates (#1233) * torch 1.7.0 compatibility updates * add inference verification 2020-10-28 15:03:50 +01:00			`elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:`
initial commit 2020-05-29 17:04:54 -07:00			`m.inplace = True`


			`def find_modules(model, mclass=nn.Conv2d):`
Single-source training (#680) * Single-source training * Extract hyperparameters into seperate files * weight decay scientific notation yaml reader bug fix * remove import glob * intersect_dicts() implementation * 'or' bug fix * .to(device) bug fix 2020-08-09 02:12:44 -07:00			`# Finds layer indices matching module class 'mclass'`
initial commit 2020-05-29 17:04:54 -07:00			`return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]`


pruning and sparsity initial commit 2020-07-05 13:41:21 -07:00			`def sparsity(model):`
			`# Return global model sparsity`
			`a, b = 0., 0.`
			`for p in model.parameters():`
			`a += p.numel()`
			`b += (p == 0).sum()`
			`return b / a`


			`def prune(model, amount=0.3):`
			`# Prune model to requested global sparsity`
			`import torch.nn.utils.prune as prune`
			`print('Pruning model... ', end='')`
			`for name, m in model.named_modules():`
update fuse_conv_and_bn() 2020-07-06 11:46:10 -07:00			`if isinstance(m, nn.Conv2d):`
pruning and sparsity initial commit 2020-07-05 13:41:21 -07:00			`prune.l1_unstructured(m, name='weight', amount=amount) # prune`
			`prune.remove(m, 'weight') # make permanent`
			`print(' %.3g global sparsity' % sparsity(model))`


initial commit 2020-05-29 17:04:54 -07:00			`def fuse_conv_and_bn(conv, bn):`
.fuse() gradient introduction bug fix 2020-09-20 11:57:19 -07:00			`# Fuse convolution and batchnorm layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/`
			`fusedconv = nn.Conv2d(conv.in_channels,`
			`conv.out_channels,`
			`kernel_size=conv.kernel_size,`
			`stride=conv.stride,`
			`padding=conv.padding,`
			`groups=conv.groups,`
			`bias=True).requires_grad_(False).to(conv.weight.device)`

			`# prepare filters`
			`w_conv = conv.weight.clone().view(conv.out_channels, -1)`
			`w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))`
			`fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.size()))`

			`# prepare spatial bias`
			`b_conv = torch.zeros(conv.weight.size(0), device=conv.weight.device) if conv.bias is None else conv.bias`
			`b_bn = bn.bias - bn.weight.mul(bn.running_mean).div(torch.sqrt(bn.running_var + bn.eps))`
			`fusedconv.bias.copy_(torch.mm(w_bn, b_conv.reshape(-1, 1)).reshape(-1) + b_bn)`

			`return fusedconv`
initial commit 2020-05-29 17:04:54 -07:00

Improved FLOPS computation (#1398) * Improved FLOPS computation * update comment 2020-11-14 14:39:46 +01:00			`def model_info(model, verbose=False, img_size=640):`
			`# Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320]`
initial commit 2020-05-29 17:04:54 -07:00			`n_p = sum(x.numel() for x in model.parameters()) # number parameters`
			`n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients`
			`if verbose:`
			`print('%5s %40s %9s %12s %20s %10s %10s' % ('layer', 'name', 'gradient', 'parameters', 'shape', 'mu', 'sigma'))`
			`for i, (name, p) in enumerate(model.named_parameters()):`
			`name = name.replace('module_list.', '')`
			`print('%5g %40s %9s %12g %20s %10.3g %10.3g' %`
			`(i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std()))`

			`try: # FLOPS`
			`from thop import profile`
Increase FLOPS robustness (#1608) 2020-12-05 11:41:34 +01:00			`stride = int(model.stride.max()) if hasattr(model, 'stride') else 32`
Input channel yaml['ch'] addition (#1741) 2020-12-19 10:54:01 -08:00			`img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device) # input`
Update torch_utils.py FLOPS to GFLOPS 2020-12-21 13:29:52 -08:00			`flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2 # stride GFLOPS`
Improved FLOPS computation (#1398) * Improved FLOPS computation * update comment 2020-11-14 14:39:46 +01:00			`img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float`
Update torch_utils.py FLOPS to GFLOPS 2020-12-21 13:29:52 -08:00			`fs = ', %.1f GFLOPS' % (flops * img_size[0] / stride * img_size[1] / stride) # 640x640 GFLOPS`
Improved FLOPS computation (#1398) 2020-11-14 14:48:55 +01:00			`except (ImportError, Exception):`
initial commit 2020-05-29 17:04:54 -07:00			`fs = ''`

Utils reorganization (#1392) * Utils reorganization * Add new utils files * cleanup * simplify * reduce datasets.py * remove evolve.sh * loadWebcam cleanup 2020-11-14 11:50:32 +01:00			`logger.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")`
initial commit 2020-05-29 17:04:54 -07:00

			`def load_classifier(name='resnet101', n=2):`
			`# Loads a pretrained model reshaped to n-class output`
classifier, export, torch seed updates 2020-10-06 14:54:02 +02:00			`model = torchvision.models.__dict__[name](pretrained=True)`

			`# ResNet model properties`
			`# input_size = [3, 224, 224]`
			`# input_space = 'RGB'`
			`# input_range = [0, 1]`
			`# mean = [0.485, 0.456, 0.406]`
			`# std = [0.229, 0.224, 0.225]`
initial commit 2020-05-29 17:04:54 -07:00
			`# Reshape output to n classes`
There is no need to download extra packages, official bring it with you I submitted it once in your yolov3 project, you seem to accept it? I'm not sure. I'll submit PR again. 2020-06-16 08:28:52 +08:00			`filters = model.fc.weight.shape[1]`
update fuse_conv_and_bn() 2020-07-06 11:46:10 -07:00			`model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True)`
			`model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True)`
There is no need to download extra packages, official bring it with you I submitted it once in your yolov3 project, you seem to accept it? I'm not sure. I'll submit PR again. 2020-06-16 08:28:52 +08:00			`model.fc.out_features = n`
initial commit 2020-05-29 17:04:54 -07:00			`return model`


augmented inference 2020-06-05 21:14:15 -07:00			`def scale_img(img, ratio=1.0, same_shape=False): # img(16,3,256,416), r=ratio`
initial commit 2020-05-29 17:04:54 -07:00			`# scales img(bs,3,y,x) by ratio`
update yolo.py TTA flexibility and extensibility (#506) * update yolo.py TTA flexibility and extensibility * Update scale_img() 2020-07-24 11:42:23 -07:00			`if ratio == 1.0:`
			`return img`
			`else:`
			`h, w = img.shape[2:]`
			`s = (int(h * ratio), int(w * ratio)) # new size`
			`img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize`
			`if not same_shape: # pad/crop img`
			`gs = 32 # (pixels) grid size`
			`h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]`
			`return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean`
initial commit 2020-05-29 17:04:54 -07:00

multi-gpu ckpt filesize bug fix #253 2020-07-11 12:35:21 -07:00			`def copy_attr(a, b, include=(), exclude=()):`
			`# Copy attributes from b to a, options to only include [...] and to exclude [...]`
			`for k, v in b.__dict__.items():`
			`if (len(include) and k not in include) or k.startswith('_') or k in exclude:`
			`continue`
			`else:`
			`setattr(a, k, v)`


initial commit 2020-05-29 17:04:54 -07:00			`class ModelEMA:`
			`""" Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models`
			`Keep a moving average of everything in the model state_dict (parameters and buffers).`
			`This is intended to allow functionality like`
			`https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage`
			`A smoothed version of the weights is necessary for some training schemes to perform well.`
			`This class is sensitive where it is initialized in the sequence of model init,`
			`GPU assignment and distributed training wrappers.`
			`"""`

--resume EMA fix #292 2020-07-09 15:09:06 -07:00			`def __init__(self, model, decay=0.9999, updates=0):`
EMA bug fix #279 2020-07-03 11:56:14 -07:00			`# Create EMA`
--resume EMA fix #292 2020-07-09 15:09:06 -07:00			`self.ema = deepcopy(model.module if is_parallel(model) else model).eval() # FP32 EMA`
FP16 EMA bug fix 2020-07-13 15:47:46 -07:00			`# if next(model.parameters()).device.type != 'cpu':`
			`# self.ema.half() # FP16 EMA`
--resume EMA fix #292 2020-07-09 15:09:06 -07:00			`self.updates = updates # number of EMA updates`
initial commit 2020-05-29 17:04:54 -07:00			`self.decay = lambda x: decay * (1 - math.exp(-x / 2000)) # decay exponential ramp (to help early epochs)`
			`for p in self.ema.parameters():`
			`p.requires_grad_(False)`

			`def update(self, model):`
EMA bug fix #279 2020-07-03 11:56:14 -07:00			`# Update EMA parameters`
initial commit 2020-05-29 17:04:54 -07:00			`with torch.no_grad():`
EMA bug fix #279 2020-07-03 11:56:14 -07:00			`self.updates += 1`
			`d = self.decay(self.updates)`
initial commit 2020-05-29 17:04:54 -07:00
EMA bug fix #279 2020-07-03 11:56:14 -07:00			`msd = model.module.state_dict() if is_parallel(model) else model.state_dict() # model state_dict`
			`for k, v in self.ema.state_dict().items():`
initial commit 2020-05-29 17:04:54 -07:00			`if v.dtype.is_floating_point:`
			`v *= d`
			`v += (1. - d) * msd[k].detach()`

multi-gpu ckpt filesize bug fix #253 2020-07-11 12:35:21 -07:00			`def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):`
EMA bug fix #279 2020-07-03 11:56:14 -07:00			`# Update EMA attributes`
multi-gpu ckpt filesize bug fix #253 2020-07-11 12:35:21 -07:00			`copy_attr(self.ema, model, include, exclude)`