pull/13117/merge
San 2024-08-24 21:41:53 +00:00 committed by GitHub
commit fae7626b80
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 427 additions and 486 deletions

View File

@ -449,6 +449,7 @@ def export_openvino(file, metadata, half, int8, data, prefix=colorstr("OpenVINO:
Quantization transform function.
Extracts and preprocess input data from dataloader item for quantization.
Parameters:
data_item: Tuple with data item produced by DataLoader during iteration
Returns:

View File

@ -1,490 +1,439 @@
# Ultralytics YOLOv5 🚀, AGPL-3.0 license
"""
YOLO-specific modules.
Usage:
$ python models/yolo.py --cfg yolov5s.yaml
"""
import argparse
import contextlib
import math
import os
import platform
import sys
from copy import deepcopy
from pathlib import Path
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
FILE = Path(__file__).resolve()
ROOT = FILE.parents[1] # YOLOv5 root directory
if str(ROOT) not in sys.path:
sys.path.append(str(ROOT)) # add ROOT to PATH
if platform.system() != "Windows":
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
from models.common import (
C3,
C3SPP,
C3TR,
SPP,
SPPF,
Bottleneck,
BottleneckCSP,
C3Ghost,
C3x,
Classify,
Concat,
Contract,
Conv,
CrossConv,
DetectMultiBackend,
DWConv,
DWConvTranspose2d,
Expand,
Focus,
GhostBottleneck,
GhostConv,
Proto,
)
from models.experimental import MixConv2d
from utils.autoanchor import check_anchor_order
from utils.general import LOGGER, check_version, check_yaml, colorstr, make_divisible, print_args
from utils.plots import feature_visualization
from utils.torch_utils import (
fuse_conv_and_bn,
initialize_weights,
model_info,
profile,
scale_img,
select_device,
time_sync,
)
def autopad(k, p=None):
if p is None:
p = k // 2 if isinstance(k, int) else [x // 2 for x in k]
return p
try:
import thop # for FLOPs computation
except ImportError:
thop = None
class Conv(nn.Module):
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
super().__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
def forward(self, x):
return self.act(self.bn(self.conv(x)))
class Bottleneck(nn.Module):
def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):
super().__init__()
c_ = int(c2 * e)
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_, c2, 3, 1, g=g)
self.add = shortcut and c1 == c2
def forward(self, x):
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
class C3(nn.Module):
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
super().__init__()
c_ = int(c2 * e)
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c1, c_, 1, 1)
self.cv3 = Conv(2 * c_, c2, 1)
self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
def forward(self, x):
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
class SPPF(nn.Module):
def __init__(self, c1, c2, k=5):
super().__init__()
c_ = c1 // 2
self.cv1 = Conv(c1, c_, 1, 1)
self.cv2 = Conv(c_ * 4, c2, 1, 1)
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
def forward(self, x):
x = self.cv1(x)
y1 = self.m(x)
y2 = self.m(y1)
return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))
class Detect(nn.Module):
# YOLOv5 Detect head for detection models
stride = None # strides computed during build
dynamic = False # force grid reconstruction
export = False # export mode
stride = None
onnx_dynamic = False
def __init__(self, nc=80, anchors=(), ch=(), inplace=True):
"""Initializes YOLOv5 detection layer with specified classes, anchors, channels, and inplace operations."""
def __init__(self, nc=80, anchors=(), ch=()):
super().__init__()
self.nc = nc # number of classes
self.no = nc + 5 # number of outputs per anchor
self.nl = len(anchors) # number of detection layers
self.na = len(anchors[0]) // 2 # number of anchors
self.grid = [torch.empty(0) for _ in range(self.nl)] # init grid
self.anchor_grid = [torch.empty(0) for _ in range(self.nl)] # init anchor grid
self.register_buffer("anchors", torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2)
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
self.inplace = inplace # use inplace ops (e.g. slice assignment)
self.nc = nc
self.no = nc + 5
self.nl = len(anchors)
self.na = len(anchors[0]) // 2
self.grid = [torch.zeros(1)] * self.nl
a = torch.tensor(anchors).float().view(self.nl, -1, 2)
self.register_buffer("anchors", a)
self.register_buffer("anchor_grid", a.clone().view(self.nl, 1, -1, 1, 1, 2))
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)
def forward(self, x):
"""Processes input through YOLOv5 layers, altering shape for detection: `x(bs, 3, ny, nx, 85)`."""
z = [] # inference output
z = []
for i in range(self.nl):
x[i] = self.m[i](x[i]) # conv
bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)
x[i] = self.m[i](x[i])
bs, _, ny, nx = x[i].shape
x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
if not self.training: # inference
if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
if not self.training:
if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
if isinstance(self, Segment): # (boxes + masks)
xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i] # xy
wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # wh
y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
else: # Detect (boxes only)
xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy
wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
y = torch.cat((xy, wh, conf), 4)
z.append(y.view(bs, self.na * nx * ny, self.no))
y = x[i].sigmoid()
y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + self.grid[i]) * self.stride[i]
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]
z.append(y.view(bs, -1, self.no))
return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
return x if self.training else (torch.cat(z, 1), x)
def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, "1.10.0")):
"""Generates a mesh grid for anchor boxes with optional compatibility for torch versions < 1.10."""
d = self.anchors[i].device
t = self.anchors[i].dtype
shape = 1, self.na, ny, nx, 2 # grid shape
y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
yv, xv = torch.meshgrid(y, x, indexing="ij") if torch_1_10 else torch.meshgrid(y, x) # torch>=0.7 compatibility
grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5
anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
return grid, anchor_grid
@staticmethod
def _make_grid(nx=20, ny=20):
yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
class Segment(Detect):
# YOLOv5 Segment head for segmentation models
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
"""Initializes YOLOv5 Segment head with options for mask count, protos, and channel adjustments."""
super().__init__(nc, anchors, ch, inplace)
self.nm = nm # number of masks
self.npr = npr # number of protos
self.no = 5 + nc + self.nm # number of outputs per anchor
self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv
self.proto = Proto(ch[0], self.npr, self.nm) # protos
self.detect = Detect.forward
def forward(self, x):
"""Processes input through the network, returning detections and prototypes; adjusts output based on
training/export mode.
"""
p = self.proto(x[0])
x = self.detect(self, x)
return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
class BaseModel(nn.Module):
"""YOLOv5 base model."""
def forward(self, x, profile=False, visualize=False):
"""Executes a single-scale inference or training pass on the YOLOv5 base model, with options for profiling and
visualization.
"""
return self._forward_once(x, profile, visualize) # single-scale inference, train
def _forward_once(self, x, profile=False, visualize=False):
"""Performs a forward pass on the YOLOv5 model, enabling profiling and feature visualization options."""
y, dt = [], [] # outputs
for m in self.model:
if m.f != -1: # if not from previous layer
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers
if profile:
self._profile_one_layer(m, x, dt)
x = m(x) # run
y.append(x if m.i in self.save else None) # save output
if visualize:
feature_visualization(x, m.type, m.i, save_dir=visualize)
return x
def _profile_one_layer(self, m, x, dt):
"""Profiles a single layer's performance by computing GFLOPs, execution time, and parameters."""
c = m == self.model[-1] # is final layer, copy input as inplace fix
o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1e9 * 2 if thop else 0 # FLOPs
t = time_sync()
for _ in range(10):
m(x.copy() if c else x)
dt.append((time_sync() - t) * 100)
if m == self.model[0]:
LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module")
LOGGER.info(f"{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}")
if c:
LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total")
def fuse(self):
"""Fuses Conv2d() and BatchNorm2d() layers in the model to improve inference speed."""
LOGGER.info("Fusing layers... ")
for m in self.model.modules():
if isinstance(m, (Conv, DWConv)) and hasattr(m, "bn"):
m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv
delattr(m, "bn") # remove batchnorm
m.forward = m.forward_fuse # update forward
self.info()
return self
def info(self, verbose=False, img_size=640):
"""Prints model information given verbosity and image size, e.g., `info(verbose=True, img_size=640)`."""
model_info(self, verbose, img_size)
def _apply(self, fn):
"""Applies transformations like to(), cpu(), cuda(), half() to model tensors excluding parameters or registered
buffers.
"""
self = super()._apply(fn)
m = self.model[-1] # Detect()
if isinstance(m, (Detect, Segment)):
m.stride = fn(m.stride)
m.grid = list(map(fn, m.grid))
if isinstance(m.anchor_grid, list):
m.anchor_grid = list(map(fn, m.anchor_grid))
return self
class DetectionModel(BaseModel):
# YOLOv5 detection model
def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, anchors=None):
"""Initializes YOLOv5 model with configuration file, input channels, number of classes, and custom anchors."""
class Model(nn.Module):
def __init__(self, nc=80):
super().__init__()
if isinstance(cfg, dict):
self.yaml = cfg # model dict
else: # is *.yaml
import yaml # for torch hub
self.yaml_file = Path(cfg).name
with open(cfg, encoding="ascii", errors="ignore") as f:
self.yaml = yaml.safe_load(f) # model dict
# Define model
ch = self.yaml["ch"] = self.yaml.get("ch", ch) # input channels
if nc and nc != self.yaml["nc"]:
LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
self.yaml["nc"] = nc # override yaml value
if anchors:
LOGGER.info(f"Overriding model.yaml anchors with anchors={anchors}")
self.yaml["anchors"] = round(anchors) # override yaml value
self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist
self.names = [str(i) for i in range(self.yaml["nc"])] # default names
self.inplace = self.yaml.get("inplace", True)
# Build strides, anchors
m = self.model[-1] # Detect()
if isinstance(m, (Detect, Segment)):
def _forward(x):
"""Passes the input 'x' through the model and returns the processed output."""
return self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
s = 256 # 2x min stride
m.inplace = self.inplace
m.stride = torch.tensor([s / x.shape[-2] for x in _forward(torch.zeros(1, ch, s, s))]) # forward
check_anchor_order(m)
m.anchors /= m.stride.view(-1, 1, 1)
self.stride = m.stride
self._initialize_biases() # only run once
# Init weights, biases
initialize_weights(self)
self.info()
LOGGER.info("")
def forward(self, x, augment=False, profile=False, visualize=False):
"""Performs single-scale or augmented inference and may include profiling or visualization."""
if augment:
return self._forward_augment(x) # augmented inference, None
return self._forward_once(x, profile, visualize) # single-scale inference, train
def _forward_augment(self, x):
"""Performs augmented inference across different scales and flips, returning combined detections."""
img_size = x.shape[-2:] # height, width
s = [1, 0.83, 0.67] # scales
f = [None, 3, None] # flips (2-ud, 3-lr)
y = [] # outputs
for si, fi in zip(s, f):
xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
yi = self._forward_once(xi)[0] # forward
# cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
yi = self._descale_pred(yi, fi, si, img_size)
y.append(yi)
y = self._clip_augmented(y) # clip augmented tails
return torch.cat(y, 1), None # augmented inference, train
def _descale_pred(self, p, flips, scale, img_size):
"""De-scales predictions from augmented inference, adjusting for flips and image size."""
if self.inplace:
p[..., :4] /= scale # de-scale
if flips == 2:
p[..., 1] = img_size[0] - p[..., 1] # de-flip ud
elif flips == 3:
p[..., 0] = img_size[1] - p[..., 0] # de-flip lr
else:
x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale
if flips == 2:
y = img_size[0] - y # de-flip ud
elif flips == 3:
x = img_size[1] - x # de-flip lr
p = torch.cat((x, y, wh, p[..., 4:]), -1)
return p
def _clip_augmented(self, y):
"""Clips augmented inference tails for YOLOv5 models, affecting first and last tensors based on grid points and
layer counts.
"""
nl = self.model[-1].nl # number of detection layers (P3-P5)
g = sum(4**x for x in range(nl)) # grid points
e = 1 # exclude layer count
i = (y[0].shape[1] // g) * sum(4**x for x in range(e)) # indices
y[0] = y[0][:, :-i] # large
i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices
y[-1] = y[-1][:, i:] # small
return y
def _initialize_biases(self, cf=None):
"""
Initializes biases for YOLOv5's Detect() module, optionally using class frequencies (cf).
For details see https://arxiv.org/abs/1708.02002 section 3.3.
"""
# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
m = self.model[-1] # Detect() module
for mi, s in zip(m.m, m.stride): # from
b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85)
b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image)
b.data[:, 5 : 5 + m.nc] += (
math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum())
) # cls
mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibility
class SegmentationModel(DetectionModel):
# YOLOv5 segmentation model
def __init__(self, cfg="yolov5s-seg.yaml", ch=3, nc=None, anchors=None):
"""Initializes a YOLOv5 segmentation model with configurable params: cfg (str) for configuration, ch (int) for channels, nc (int) for num classes, anchors (list)."""
super().__init__(cfg, ch, nc, anchors)
class ClassificationModel(BaseModel):
# YOLOv5 classification model
def __init__(self, cfg=None, model=None, nc=1000, cutoff=10):
"""Initializes YOLOv5 model with config file `cfg`, input channels `ch`, number of classes `nc`, and `cuttoff`
index.
"""
super().__init__()
self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg)
def _from_detection_model(self, model, nc=1000, cutoff=10):
"""Creates a classification model from a YOLOv5 detection model, slicing at `cutoff` and adding a classification
layer.
"""
if isinstance(model, DetectMultiBackend):
model = model.model # unwrap DetectMultiBackend
model.model = model.model[:cutoff] # backbone
m = model.model[-1] # last layer
ch = m.conv.in_channels if hasattr(m, "conv") else m.cv1.conv.in_channels # ch into module
c = Classify(ch, nc) # Classify()
c.i, c.f, c.type = m.i, m.f, "models.common.Classify" # index, from, type
model.model[-1] = c # replace
self.model = model.model
self.stride = model.stride
self.save = []
self.nc = nc
def _from_yaml(self, cfg):
"""Creates a YOLOv5 classification model from a specified *.yaml configuration file."""
self.model = None
# YOLOv5s architecture
self.model = nn.Sequential(
Conv(3, 32, 6, 2, 2),
Conv(32, 64, 3, 2),
C3(64, 64, 1),
Conv(64, 128, 3, 2),
C3(128, 128, 2),
Conv(128, 256, 3, 2),
C3(256, 256, 3),
Conv(256, 512, 3, 2),
C3(512, 512, 1),
SPPF(512, 512),
Conv(512, 256, 1, 1),
nn.Upsample(None, 2, "nearest"),
C3(512, 256, 1, False),
Conv(256, 128, 1, 1),
nn.Upsample(None, 2, "nearest"),
C3(256, 128, 1, False),
Conv(128, 128, 3, 2),
C3(256, 256, 1, False),
Conv(256, 256, 3, 2),
C3(512, 512, 1, False),
Detect(
nc,
[[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]],
[128, 256, 512],
),
)
self.stride = torch.tensor([8.0, 16.0, 32.0])
self.model[-1].stride = self.stride
def forward(self, x):
return self.model(x)
def parse_model(d, ch):
"""Parses a YOLOv5 model from a dict `d`, configuring layers based on input channels `ch` and model architecture."""
LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}")
anchors, nc, gd, gw, act, ch_mul = (
d["anchors"],
d["nc"],
d["depth_multiple"],
d["width_multiple"],
d.get("activation"),
d.get("channel_multiple"),
)
if act:
Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU()
LOGGER.info(f"{colorstr('activation:')} {act}") # print
if not ch_mul:
ch_mul = 8
na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors
no = na * (nc + 5) # number of outputs = anchors * (classes + 5)
class YOLOLoss(nn.Module):
def __init__(self, nc=80, anchors=(), reduction="mean", device="cpu"):
super(YOLOLoss, self).__init__()
self.nc = nc
self.nl = len(anchors)
self.na = len(anchors[0]) // 2
self.anchors = torch.tensor(anchors).float().view(self.nl, -1, 2).to(device)
self.reduction = reduction
layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out
for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]): # from, number, module, args
m = eval(m) if isinstance(m, str) else m # eval strings
for j, a in enumerate(args):
with contextlib.suppress(NameError):
args[j] = eval(a) if isinstance(a, str) else a # eval strings
self.BCEcls = nn.BCEWithLogitsLoss(reduction=reduction)
self.BCEobj = nn.BCEWithLogitsLoss(reduction=reduction)
self.gr = 1.0
self.box_gain = 0.05
self.cls_gain = 0.5
self.obj_gain = 1.0
n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in {
Conv,
GhostConv,
Bottleneck,
GhostBottleneck,
SPP,
SPPF,
DWConv,
MixConv2d,
Focus,
CrossConv,
BottleneckCSP,
C3,
C3TR,
C3SPP,
C3Ghost,
nn.ConvTranspose2d,
DWConvTranspose2d,
C3x,
}:
c1, c2 = ch[f], args[0]
if c2 != no: # if not output
c2 = make_divisible(c2 * gw, ch_mul)
def forward(self, p, targets):
lcls, lbox, lobj = (
torch.zeros(1, device=targets.device),
torch.zeros(1, device=targets.device),
torch.zeros(1, device=targets.device),
)
tcls, tbox, indices, anchors = self.build_targets(p, targets)
args = [c1, c2, *args[1:]]
if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
args.insert(2, n) # number of repeats
n = 1
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum(ch[x] for x in f)
# TODO: channel, gw, gd
elif m in {Detect, Segment}:
args.append([ch[x] for x in f])
if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f)
if m is Segment:
args[3] = make_divisible(args[3] * gw, ch_mul)
elif m is Contract:
c2 = ch[f] * args[0] ** 2
elif m is Expand:
c2 = ch[f] // args[0] ** 2
for i, pi in enumerate(p):
b, a, gj, gi = indices[i]
tobj = torch.zeros_like(pi[..., 0], device=targets.device)
n = b.shape[0]
if n:
ps = pi[b, a, gj, gi]
pxy = ps[:, :2].sigmoid() * 2.0 - 0.5
pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
pbox = torch.cat((pxy, pwh), 1)
iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True)
lbox += (1.0 - iou).mean()
tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype)
if self.nc > 1:
t = torch.full_like(ps[:, 5:], 0.0, device=targets.device)
t[range(n), tcls[i]] = 1.0
lcls += self.BCEcls(ps[:, 5:], t)
lobj += self.BCEobj(pi[..., 4], tobj) * self.obj_gain
lbox *= self.box_gain
lobj *= self.obj_gain
lcls *= self.cls_gain
bs = tobj.shape[0]
loss = lbox + lobj + lcls
return loss * bs, torch.cat((lbox, lobj, lcls)).detach()
def build_targets(self, p, targets):
na, nt = self.na, targets.shape[0]
tcls, tbox, indices, anch = [], [], [], []
gain = torch.ones(7, device=targets.device)
ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)
targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)
g = 0.5
off = (
torch.tensor(
[[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1], [1, 1], [1, -1], [-1, 1], [-1, -1]], device=targets.device
).float()
* g
)
for i in range(self.nl):
anchors = self.anchors[i]
gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]]
t = targets * gain
if nt:
r = t[:, :, 4:6] / anchors[:, None]
j = torch.max(r, 1.0 / r).max(2)[0] < 4
t = t[j]
gxy = t[:, 2:4]
gxi = gain[[2, 3]] - gxy
j, k = ((gxy % 1.0 < g) & (gxy > 1.0)).T
l, m = ((gxi % 1.0 < g) & (gxi > 1.0)).T
j = torch.stack((torch.ones_like(j), j, k, l, m))
t = t.repeat((5, 1, 1))[j]
offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
else:
t = targets[0]
offsets = 0
b, c = t[:, :2].long().T
gxy = t[:, 2:4]
gwh = t[:, 4:6]
gij = (gxy - offsets).long()
gi, gj = gij.T
a = t[:, 6].long()
indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)))
tbox.append(torch.cat((gxy - gij, gwh), 1))
anch.append(anchors[a])
tcls.append(c)
return tcls, tbox, indices, anch
def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
box2 = box2.T
if x1y1x2y2:
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
else:
b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * (
torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)
).clamp(0)
w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
union = w1 * h1 + w2 * h2 - inter + eps
iou = inter / union
if GIoU or DIoU or CIoU:
cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)
ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)
if CIoU or DIoU:
c2 = cw**2 + ch**2 + eps
rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4
if DIoU:
return iou - rho2 / c2
elif CIoU:
v = (4 / math.pi**2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
with torch.no_grad():
alpha = v / (v - iou + (1 + eps))
return iou - (rho2 / c2 + v * alpha)
else:
c_area = cw * ch + eps
return iou - (c_area - union) / c_area
return iou
def non_max_suppression(
prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, max_det=300
):
nc = prediction.shape[2] - 5
xc = prediction[..., 4] > conf_thres
# Checks
assert 0 <= conf_thres <= 1, f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
assert 0 <= iou_thres <= 1, f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
# Settings
min_wh, max_wh = 2, 4096
max_nms = 30000
time_limit = 10.0
redundant = True
multi_label &= nc > 1
merge = False
t = time.time()
output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
for xi, x in enumerate(prediction):
x = x[xc[xi]]
if not x.shape[0]:
continue
x[:, 5:] *= x[:, 4:5]
box = xywh2xyxy(x[:, :4])
if multi_label:
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
else:
c2 = ch[f]
conf, j = x[:, 5:].max(1, keepdim=True)
x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace("__main__.", "") # module type
np = sum(x.numel() for x in m_.parameters()) # number params
m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params
LOGGER.info(f"{i:>3}{str(f):>18}{n_:>3}{np:10.0f} {t:<40}{str(args):<30}") # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
if i == 0:
ch = []
ch.append(c2)
return nn.Sequential(*layers), sorted(save)
if classes is not None:
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
n = x.shape[0]
if not n:
continue
elif n > max_nms:
x = x[x[:, 4].argsort(descending=True)[:max_nms]]
c = x[:, 5:6] * (0 if agnostic else max_wh)
boxes, scores = x[:, :4] + c, x[:, 4]
i = torchvision.ops.nms(boxes, scores, iou_thres)
if i.shape[0] > max_det:
i = i[:max_det]
if merge and (1 < n < 3e3):
iou = box_iou(boxes[i], boxes) > iou_thres
weights = iou * scores[None]
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)
if redundant:
i = i[iou.sum(1) > 1]
output[xi] = x[i]
if (time.time() - t) > time_limit:
print(f"WARNING: NMS time limit {time_limit}s exceeded")
break
return output
def xywh2xyxy(x):
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2
y[:, 1] = x[:, 1] - x[:, 3] / 2
y[:, 2] = x[:, 0] + x[:, 2] / 2
y[:, 3] = x[:, 1] + x[:, 3] / 2
return y
class DataLoader:
def __init__(self, path, img_size=640, batch_size=16):
self.path = path
self.img_size = img_size
self.batch_size = batch_size
self.augment = True
self.hyp = {
"hsv_h": 0.015,
"hsv_s": 0.7,
"hsv_v": 0.4,
"degrees": 0,
"translate": 0.1,
"scale": 0.5,
"shear": 0.0,
}
# Load data from path and prepare it
def __iter__(self):
# Yield batches of data
pass
def train(model, dataloader, optimizer, epochs):
device = next(model.parameters()).device
criterion = YOLOLoss(model.nc, model.model[-1].anchors, reduction="mean", device=device)
for epoch in range(epochs):
model.train()
for batch_i, (imgs, targets, paths, _) in enumerate(dataloader):
imgs = imgs.to(device, non_blocking=True).float() / 255.0
targets = targets.to(device)
pred = model(imgs)
loss, loss_items = criterion(pred, targets)
loss.backward()
optimizer.step()
optimizer.zero_grad()
if batch_i % 10 == 0:
print(f"Epoch {epoch}/{epochs}, Batch {batch_i}/{len(dataloader)}, Loss: {loss.item():.4f}")
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--data", type=str, default="data/coco128.yaml", help="data.yaml path")
parser.add_argument("--epochs", type=int, default=300)
parser.add_argument("--batch-size", type=int, default=16, help="total batch size for all GPUs")
parser.add_argument("--img-size", type=int, default=640, help="train, test image sizes")
parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
opt = parser.parse_args()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Initialize model
model = Model(nc=80).to(device)
# Initialize optimizer
optimizer = optim.Adam(model.parameters(), lr=0.01)
# Initialize dataloader
dataloader = DataLoader(opt.data, img_size=opt.img_size, batch_size=opt.batch_size)
# Train the model
train(model, dataloader, optimizer, opt.epochs)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--cfg", type=str, default="yolov5s.yaml", help="model.yaml")
parser.add_argument("--batch-size", type=int, default=1, help="total batch size for all GPUs")
parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
parser.add_argument("--profile", action="store_true", help="profile model speed")
parser.add_argument("--line-profile", action="store_true", help="profile model speed layer by layer")
parser.add_argument("--test", action="store_true", help="test all yolo*.yaml")
opt = parser.parse_args()
opt.cfg = check_yaml(opt.cfg) # check YAML
print_args(vars(opt))
device = select_device(opt.device)
# Create model
im = torch.rand(opt.batch_size, 3, 640, 640).to(device)
model = Model(opt.cfg).to(device)
# Options
if opt.line_profile: # profile layer by layer
model(im, profile=True)
elif opt.profile: # profile forward-backward
results = profile(input=im, ops=[model], n=3)
elif opt.test: # test all models
for cfg in Path(ROOT / "models").rglob("yolo*.yaml"):
try:
_ = Model(cfg)
except Exception as e:
print(f"Error in {cfg}: {e}")
else: # report fused model summary
model.fuse()
main()

View File

@ -156,7 +156,6 @@ def random_perspective(
):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10))
# targets = [cls, xyxy]
"""Applies random perspective transformation to an image, modifying the image and corresponding labels."""
height = im.shape[0] + border[0] * 2 # shape(h,w,c)
width = im.shape[1] + border[1] * 2

View File

@ -64,7 +64,6 @@ class Callbacks:
thread: (boolean) Run callbacks in daemon thread
kwargs: Keyword Arguments to receive from YOLOv5
"""
assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
for logger in self._callbacks[hook]:
if thread:

View File

@ -1104,7 +1104,8 @@ def extract_boxes(path=DATASETS_DIR / "coco128"):
def autosplit(path=DATASETS_DIR / "coco128/images", weights=(0.9, 0.1, 0.0), annotated_only=False):
"""Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
Usage: from utils.dataloaders import *; autosplit()
Arguments
Arguments:
path: Path to images directory
weights: Train, val, test weights (list, tuple)
annotated_only: Only use images with an annotated txt file
@ -1183,7 +1184,7 @@ class HUBDatasetStats:
"""
Class for generating HUB dataset JSON and `-hub` dataset directory.
Arguments
Arguments:
path: Path to data.yaml or data.zip (with data.yaml inside data.zip)
autodownload: Attempt to download dataset if not found locally
@ -1314,7 +1315,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
"""
YOLOv5 Classification Dataset.
Arguments
Arguments:
root: Dataset path
transform: torchvision transforms, used by default
album_transform: Albumentations transforms, used if installed

View File

@ -518,7 +518,6 @@ def check_font(font=FONT, progress=False):
def check_dataset(data, autodownload=True):
"""Validates and/or auto-downloads a dataset, returning its configuration as a dictionary."""
# Download (optional)
extract_dir = ""
if isinstance(data, (str, Path)) and (is_zipfile(data) or is_tarfile(data)):
@ -1023,7 +1022,6 @@ def non_max_suppression(
Returns:
list of detections, on (n,6) tensor per image [xyxy, conf, cls]
"""
# Checks
assert 0 <= conf_thres <= 1, f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
assert 0 <= iou_thres <= 1, f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"

View File

@ -350,7 +350,8 @@ class GenericLogger:
"""
YOLOv5 General purpose logger for non-task specific logging
Usage: from utils.loggers import GenericLogger; logger = GenericLogger(...)
Arguments
Arguments:
opt: Run arguments
console_logger: Console logger
include: loggers to include

View File

@ -80,7 +80,7 @@ class ClearmlLogger:
- Initialize ClearML Task, this object will capture the experiment
- Upload dataset version to ClearML Data if opt.upload_dataset is True
arguments:
Arguments:
opt (namespace) -- Commandline arguments for this run
hyp (dict) -- Hyperparameters for this run
@ -133,7 +133,7 @@ class ClearmlLogger:
"""
Log scalars/metrics to ClearML.
arguments:
Arguments:
metrics (dict) Metrics in dict format: {"metrics/mAP": 0.8, ...}
epoch (int) iteration number for the current set of metrics
"""
@ -145,7 +145,7 @@ class ClearmlLogger:
"""
Log model weights to ClearML.
arguments:
Arguments:
model_path (PosixPath or str) Path to the model weights
model_name (str) Name of the model visible in ClearML
epoch (int) Iteration / epoch of the model weights
@ -158,7 +158,7 @@ class ClearmlLogger:
"""
Log final metrics to a summary table.
arguments:
Arguments:
metrics (dict) Metrics in dict format: {"metrics/mAP": 0.8, ...}
"""
for k, v in metrics.items():
@ -168,7 +168,7 @@ class ClearmlLogger:
"""
Log image as plot in the plot section of ClearML.
arguments:
Arguments:
title (str) Title of the plot
plot_path (PosixPath or str) Path to the saved image file
"""
@ -183,7 +183,7 @@ class ClearmlLogger:
"""
Log files (images) as debug samples in the ClearML task.
arguments:
Arguments:
files (List(PosixPath)) a list of file paths in PosixPath format
title (str) A title that groups together images with the same values
"""
@ -199,7 +199,7 @@ class ClearmlLogger:
"""
Draw the bounding boxes on a single image and report the result as a ClearML debug sample.
arguments:
Arguments:
image_path (PosixPath) the path the original image file
boxes (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class]
class_names (dict): dict containing mapping of class int to class name

View File

@ -49,7 +49,7 @@ class WandbLogger:
- Upload dataset if opt.upload_dataset is True
- Setup training processes if job_type is 'Training'
arguments:
Arguments:
opt (namespace) -- Commandline arguments for this run
run_id (str) -- Run ID of W&B run to be resumed
job_type (str) -- To set the job_type for this run
@ -90,7 +90,7 @@ class WandbLogger:
- Update data_dict, to contain info of previous run if resumed and the paths of dataset artifact if downloaded
- Setup log_dict, initialize bbox_interval
arguments:
Arguments:
opt (namespace) -- commandline arguments for this run
"""
@ -120,7 +120,7 @@ class WandbLogger:
"""
Log the model checkpoint as W&B artifact.
arguments:
Arguments:
path (Path) -- Path of directory containing the checkpoints
opt (namespace) -- Command line arguments for this run
epoch (int) -- Current epoch number
@ -159,7 +159,7 @@ class WandbLogger:
"""
Save the metrics to the logging dictionary.
arguments:
Arguments:
log_dict (Dict) -- metrics/media to be logged in current step
"""
if self.wandb_run:
@ -170,7 +170,7 @@ class WandbLogger:
"""
Commit the log_dict, model artifacts and Tables to W&B and flush the log_dict.
arguments:
Arguments:
best_result (boolean): Boolean representing if the result of this evaluation is best or not
"""
if self.wandb_run:
@ -197,7 +197,7 @@ class WandbLogger:
@contextmanager
def all_logging_disabled(highest_level=logging.CRITICAL):
"""source - https://gist.github.com/simon-weber/7853144
"""Source - https://gist.github.com/simon-weber/7853144
A context manager that will prevent any logging messages triggered during the body from being processed.
:param highest_level: the maximum logging level in use.
This would only need to be changed if a custom level greater than CRITICAL is defined.

View File

@ -41,7 +41,6 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir=".", names
# Returns
The average precision as computed in py-faster-rcnn.
"""
# Sort by objectness
i = np.argsort(-conf)
tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
@ -103,7 +102,6 @@ def compute_ap(recall, precision):
# Returns
Average precision, precision curve, recall curve
"""
# Append sentinel values to beginning and end
mrec = np.concatenate(([0.0], recall, [1.0]))
mpre = np.concatenate(([1.0], precision, [0.0]))
@ -137,6 +135,7 @@ class ConfusionMatrix:
Return intersection-over-union (Jaccard index) of boxes.
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
Arguments:
detections (Array[N, 6]), x1, y1, x2, y2, conf, class
labels (Array[M, 5]), class, x1, y1, x2, y2
@ -233,7 +232,6 @@ def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7
Input shapes are box1(1,4) to box2(n,4).
"""
# Get the coordinates of bounding boxes
if xywh: # transform from xywh to xyxy
(x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1)
@ -279,14 +277,15 @@ def box_iou(box1, box2, eps=1e-7):
Return intersection-over-union (Jaccard index) of boxes.
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
Arguments:
box1 (Tensor[N, 4])
box2 (Tensor[M, 4])
Returns:
iou (Tensor[N, M]): the NxM matrix containing the pairwise
IoU values for every element in boxes1 and boxes2
"""
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
(a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2)
inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)
@ -304,7 +303,6 @@ def bbox_ioa(box1, box2, eps=1e-7):
box2: np.array of shape(nx4)
returns: np.array of shape(n)
"""
# Get the coordinates of bounding boxes
b1_x1, b1_y1, b1_x2, b1_y2 = box1
b2_x1, b2_y1, b2_x2, b2_y2 = box2.T

View File

@ -29,7 +29,6 @@ def random_perspective(
):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# targets = [cls, xyxy]
"""Applies random perspective, rotation, scale, shear, and translation augmentations to an image and targets."""
height = im.shape[0] + border[0] * 2 # shape(h,w,c)
width = im.shape[1] + border[1] * 2

View File

@ -14,7 +14,6 @@ def crop_mask(masks, boxes):
- masks should be a size [n, h, w] tensor of masks
- boxes should be a size [n, 4] tensor of bbox coords in relative point form
"""
n, h, w = masks.shape
x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n)
r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1)
@ -33,7 +32,6 @@ def process_mask_upsample(protos, masks_in, bboxes, shape):
return: h, w, n
"""
c, mh, mw = protos.shape # CHW
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
masks = F.interpolate(masks[None], shape, mode="bilinear", align_corners=False)[0] # CHW
@ -51,7 +49,6 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False):
return: h, w, n
"""
c, mh, mw = protos.shape # CHW
ih, iw = shape
masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW

View File

@ -17,10 +17,9 @@ class TritonRemoteModel:
def __init__(self, url: str):
"""
Keyword arguments:
Keyword Arguments:
url: Fully qualified address of the Triton server - for e.g. grpc://localhost:8000
"""
parsed_url = urlparse(url)
if parsed_url.scheme == "grpc":
from tritonclient.grpc import InferenceServerClient, InferInput