Merge 0ed15c7282 into 41b5f45096

2024-08-24 21:41:53 +00:00 · 2024-08-24 21:41:53 +00:00 · fae7626b80
parent 41b5f45096 0ed15c7282
commit fae7626b80
13 changed files with 427 additions and 486 deletions
--- a/export.py
+++ b/export.py
@ -449,6 +449,7 @@ def export_openvino(file, metadata, half, int8, data, prefix=colorstr("OpenVINO:
            Quantization transform function.

            Extracts and preprocess input data from dataloader item for quantization.
+
            Parameters:
               data_item: Tuple with data item produced by DataLoader during iteration
            Returns:
--- a/models/yolo.py
+++ b/models/yolo.py
@ -1,490 +1,439 @@
-# Ultralytics YOLOv5 🚀, AGPL-3.0 license
-"""
-YOLO-specific modules.
-
-Usage:
-    $ python models/yolo.py --cfg yolov5s.yaml
-"""
-
 import argparse
-import contextlib
 import math
-import os
-import platform
-import sys
-from copy import deepcopy
-from pathlib import Path

+import numpy as np
 import torch
 import torch.nn as nn
+import torch.optim as optim

-FILE = Path(__file__).resolve()
-ROOT = FILE.parents[1]  # YOLOv5 root directory
-if str(ROOT) not in sys.path:
-    sys.path.append(str(ROOT))  # add ROOT to PATH
-if platform.system() != "Windows":
-    ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative

-from models.common import (
-    C3,
-    C3SPP,
-    C3TR,
-    SPP,
-    SPPF,
-    Bottleneck,
-    BottleneckCSP,
-    C3Ghost,
-    C3x,
-    Classify,
-    Concat,
-    Contract,
-    Conv,
-    CrossConv,
-    DetectMultiBackend,
-    DWConv,
-    DWConvTranspose2d,
-    Expand,
-    Focus,
-    GhostBottleneck,
-    GhostConv,
-    Proto,
-)
-from models.experimental import MixConv2d
-from utils.autoanchor import check_anchor_order
-from utils.general import LOGGER, check_version, check_yaml, colorstr, make_divisible, print_args
-from utils.plots import feature_visualization
-from utils.torch_utils import (
-    fuse_conv_and_bn,
-    initialize_weights,
-    model_info,
-    profile,
-    scale_img,
-    select_device,
-    time_sync,
-)
+def autopad(k, p=None):
+    if p is None:
+        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]
+    return p

-try:
-    import thop  # for FLOPs computation
-except ImportError:
-    thop = None
+
+class Conv(nn.Module):
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
+        super().__init__()
+        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
+        self.bn = nn.BatchNorm2d(c2)
+        self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
+
+    def forward(self, x):
+        return self.act(self.bn(self.conv(x)))
+
+
+class Bottleneck(nn.Module):
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):
+        super().__init__()
+        c_ = int(c2 * e)
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_, c2, 3, 1, g=g)
+        self.add = shortcut and c1 == c2
+
+    def forward(self, x):
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+
+class C3(nn.Module):
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
+        super().__init__()
+        c_ = int(c2 * e)
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c1, c_, 1, 1)
+        self.cv3 = Conv(2 * c_, c2, 1)
+        self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
+
+    def forward(self, x):
+        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
+
+
+class SPPF(nn.Module):
+    def __init__(self, c1, c2, k=5):
+        super().__init__()
+        c_ = c1 // 2
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_ * 4, c2, 1, 1)
+        self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
+
+    def forward(self, x):
+        x = self.cv1(x)
+        y1 = self.m(x)
+        y2 = self.m(y1)
+        return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1))


 class Detect(nn.Module):
-    # YOLOv5 Detect head for detection models
-    stride = None  # strides computed during build
-    dynamic = False  # force grid reconstruction
-    export = False  # export mode
+    stride = None
+    onnx_dynamic = False

-    def __init__(self, nc=80, anchors=(), ch=(), inplace=True):
-        """Initializes YOLOv5 detection layer with specified classes, anchors, channels, and inplace operations."""
+    def __init__(self, nc=80, anchors=(), ch=()):
        super().__init__()
-        self.nc = nc  # number of classes
-        self.no = nc + 5  # number of outputs per anchor
-        self.nl = len(anchors)  # number of detection layers
-        self.na = len(anchors[0]) // 2  # number of anchors
-        self.grid = [torch.empty(0) for _ in range(self.nl)]  # init grid
-        self.anchor_grid = [torch.empty(0) for _ in range(self.nl)]  # init anchor grid
-        self.register_buffer("anchors", torch.tensor(anchors).float().view(self.nl, -1, 2))  # shape(nl,na,2)
-        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
-        self.inplace = inplace  # use inplace ops (e.g. slice assignment)
+        self.nc = nc
+        self.no = nc + 5
+        self.nl = len(anchors)
+        self.na = len(anchors[0]) // 2
+        self.grid = [torch.zeros(1)] * self.nl
+        a = torch.tensor(anchors).float().view(self.nl, -1, 2)
+        self.register_buffer("anchors", a)
+        self.register_buffer("anchor_grid", a.clone().view(self.nl, 1, -1, 1, 1, 2))
+        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)

    def forward(self, x):
-        """Processes input through YOLOv5 layers, altering shape for detection: `x(bs, 3, ny, nx, 85)`."""
-        z = []  # inference output
+        z = []
        for i in range(self.nl):
-            x[i] = self.m[i](x[i])  # conv
-            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
+            x[i] = self.m[i](x[i])
+            bs, _, ny, nx = x[i].shape
            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

-            if not self.training:  # inference
-                if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
-                    self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
+            if not self.training:
+                if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
+                    self.grid[i] = self._make_grid(nx, ny).to(x[i].device)

-                if isinstance(self, Segment):  # (boxes + masks)
-                    xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4)
-                    xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i]  # xy
-                    wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i]  # wh
-                    y = torch.cat((xy, wh, conf.sigmoid(), mask), 4)
-                else:  # Detect (boxes only)
-                    xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4)
-                    xy = (xy * 2 + self.grid[i]) * self.stride[i]  # xy
-                    wh = (wh * 2) ** 2 * self.anchor_grid[i]  # wh
-                    y = torch.cat((xy, wh, conf), 4)
-                z.append(y.view(bs, self.na * nx * ny, self.no))
+                y = x[i].sigmoid()
+                y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + self.grid[i]) * self.stride[i]
+                y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]
+                z.append(y.view(bs, -1, self.no))

-        return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
+        return x if self.training else (torch.cat(z, 1), x)

-    def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, "1.10.0")):
-        """Generates a mesh grid for anchor boxes with optional compatibility for torch versions < 1.10."""
-        d = self.anchors[i].device
-        t = self.anchors[i].dtype
-        shape = 1, self.na, ny, nx, 2  # grid shape
-        y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
-        yv, xv = torch.meshgrid(y, x, indexing="ij") if torch_1_10 else torch.meshgrid(y, x)  # torch>=0.7 compatibility
-        grid = torch.stack((xv, yv), 2).expand(shape) - 0.5  # add grid offset, i.e. y = 2.0 * x - 0.5
-        anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
-        return grid, anchor_grid
+    @staticmethod
+    def _make_grid(nx=20, ny=20):
+        yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
+        return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()


-class Segment(Detect):
-    # YOLOv5 Segment head for segmentation models
-    def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
-        """Initializes YOLOv5 Segment head with options for mask count, protos, and channel adjustments."""
-        super().__init__(nc, anchors, ch, inplace)
-        self.nm = nm  # number of masks
-        self.npr = npr  # number of protos
-        self.no = 5 + nc + self.nm  # number of outputs per anchor
-        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
-        self.proto = Proto(ch[0], self.npr, self.nm)  # protos
-        self.detect = Detect.forward
-
-    def forward(self, x):
-        """Processes input through the network, returning detections and prototypes; adjusts output based on
-        training/export mode.
-        """
-        p = self.proto(x[0])
-        x = self.detect(self, x)
-        return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
-
-
-class BaseModel(nn.Module):
-    """YOLOv5 base model."""
-
-    def forward(self, x, profile=False, visualize=False):
-        """Executes a single-scale inference or training pass on the YOLOv5 base model, with options for profiling and
-        visualization.
-        """
-        return self._forward_once(x, profile, visualize)  # single-scale inference, train
-
-    def _forward_once(self, x, profile=False, visualize=False):
-        """Performs a forward pass on the YOLOv5 model, enabling profiling and feature visualization options."""
-        y, dt = [], []  # outputs
-        for m in self.model:
-            if m.f != -1:  # if not from previous layer
-                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
-            if profile:
-                self._profile_one_layer(m, x, dt)
-            x = m(x)  # run
-            y.append(x if m.i in self.save else None)  # save output
-            if visualize:
-                feature_visualization(x, m.type, m.i, save_dir=visualize)
-        return x
-
-    def _profile_one_layer(self, m, x, dt):
-        """Profiles a single layer's performance by computing GFLOPs, execution time, and parameters."""
-        c = m == self.model[-1]  # is final layer, copy input as inplace fix
-        o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1e9 * 2 if thop else 0  # FLOPs
-        t = time_sync()
-        for _ in range(10):
-            m(x.copy() if c else x)
-        dt.append((time_sync() - t) * 100)
-        if m == self.model[0]:
-            LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s}  module")
-        LOGGER.info(f"{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f}  {m.type}")
-        if c:
-            LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s}  Total")
-
-    def fuse(self):
-        """Fuses Conv2d() and BatchNorm2d() layers in the model to improve inference speed."""
-        LOGGER.info("Fusing layers... ")
-        for m in self.model.modules():
-            if isinstance(m, (Conv, DWConv)) and hasattr(m, "bn"):
-                m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
-                delattr(m, "bn")  # remove batchnorm
-                m.forward = m.forward_fuse  # update forward
-        self.info()
-        return self
-
-    def info(self, verbose=False, img_size=640):
-        """Prints model information given verbosity and image size, e.g., `info(verbose=True, img_size=640)`."""
-        model_info(self, verbose, img_size)
-
-    def _apply(self, fn):
-        """Applies transformations like to(), cpu(), cuda(), half() to model tensors excluding parameters or registered
-        buffers.
-        """
-        self = super()._apply(fn)
-        m = self.model[-1]  # Detect()
-        if isinstance(m, (Detect, Segment)):
-            m.stride = fn(m.stride)
-            m.grid = list(map(fn, m.grid))
-            if isinstance(m.anchor_grid, list):
-                m.anchor_grid = list(map(fn, m.anchor_grid))
-        return self
-
-
-class DetectionModel(BaseModel):
-    # YOLOv5 detection model
-    def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, anchors=None):
-        """Initializes YOLOv5 model with configuration file, input channels, number of classes, and custom anchors."""
+class Model(nn.Module):
+    def __init__(self, nc=80):
        super().__init__()
-        if isinstance(cfg, dict):
-            self.yaml = cfg  # model dict
-        else:  # is *.yaml
-            import yaml  # for torch hub
-
-            self.yaml_file = Path(cfg).name
-            with open(cfg, encoding="ascii", errors="ignore") as f:
-                self.yaml = yaml.safe_load(f)  # model dict
-
-        # Define model
-        ch = self.yaml["ch"] = self.yaml.get("ch", ch)  # input channels
-        if nc and nc != self.yaml["nc"]:
-            LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}")
-            self.yaml["nc"] = nc  # override yaml value
-        if anchors:
-            LOGGER.info(f"Overriding model.yaml anchors with anchors={anchors}")
-            self.yaml["anchors"] = round(anchors)  # override yaml value
-        self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])  # model, savelist
-        self.names = [str(i) for i in range(self.yaml["nc"])]  # default names
-        self.inplace = self.yaml.get("inplace", True)
-
-        # Build strides, anchors
-        m = self.model[-1]  # Detect()
-        if isinstance(m, (Detect, Segment)):
-
-            def _forward(x):
-                """Passes the input 'x' through the model and returns the processed output."""
-                return self.forward(x)[0] if isinstance(m, Segment) else self.forward(x)
-
-            s = 256  # 2x min stride
-            m.inplace = self.inplace
-            m.stride = torch.tensor([s / x.shape[-2] for x in _forward(torch.zeros(1, ch, s, s))])  # forward
-            check_anchor_order(m)
-            m.anchors /= m.stride.view(-1, 1, 1)
-            self.stride = m.stride
-            self._initialize_biases()  # only run once
-
-        # Init weights, biases
-        initialize_weights(self)
-        self.info()
-        LOGGER.info("")
-
-    def forward(self, x, augment=False, profile=False, visualize=False):
-        """Performs single-scale or augmented inference and may include profiling or visualization."""
-        if augment:
-            return self._forward_augment(x)  # augmented inference, None
-        return self._forward_once(x, profile, visualize)  # single-scale inference, train
-
-    def _forward_augment(self, x):
-        """Performs augmented inference across different scales and flips, returning combined detections."""
-        img_size = x.shape[-2:]  # height, width
-        s = [1, 0.83, 0.67]  # scales
-        f = [None, 3, None]  # flips (2-ud, 3-lr)
-        y = []  # outputs
-        for si, fi in zip(s, f):
-            xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
-            yi = self._forward_once(xi)[0]  # forward
-            # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1])  # save
-            yi = self._descale_pred(yi, fi, si, img_size)
-            y.append(yi)
-        y = self._clip_augmented(y)  # clip augmented tails
-        return torch.cat(y, 1), None  # augmented inference, train
-
-    def _descale_pred(self, p, flips, scale, img_size):
-        """De-scales predictions from augmented inference, adjusting for flips and image size."""
-        if self.inplace:
-            p[..., :4] /= scale  # de-scale
-            if flips == 2:
-                p[..., 1] = img_size[0] - p[..., 1]  # de-flip ud
-            elif flips == 3:
-                p[..., 0] = img_size[1] - p[..., 0]  # de-flip lr
-        else:
-            x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale  # de-scale
-            if flips == 2:
-                y = img_size[0] - y  # de-flip ud
-            elif flips == 3:
-                x = img_size[1] - x  # de-flip lr
-            p = torch.cat((x, y, wh, p[..., 4:]), -1)
-        return p
-
-    def _clip_augmented(self, y):
-        """Clips augmented inference tails for YOLOv5 models, affecting first and last tensors based on grid points and
-        layer counts.
-        """
-        nl = self.model[-1].nl  # number of detection layers (P3-P5)
-        g = sum(4**x for x in range(nl))  # grid points
-        e = 1  # exclude layer count
-        i = (y[0].shape[1] // g) * sum(4**x for x in range(e))  # indices
-        y[0] = y[0][:, :-i]  # large
-        i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e))  # indices
-        y[-1] = y[-1][:, i:]  # small
-        return y
-
-    def _initialize_biases(self, cf=None):
-        """
-        Initializes biases for YOLOv5's Detect() module, optionally using class frequencies (cf).
-
-        For details see https://arxiv.org/abs/1708.02002 section 3.3.
-        """
-        # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
-        m = self.model[-1]  # Detect() module
-        for mi, s in zip(m.m, m.stride):  # from
-            b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
-            b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
-            b.data[:, 5 : 5 + m.nc] += (
-                math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum())
-            )  # cls
-            mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
-
-
-Model = DetectionModel  # retain YOLOv5 'Model' class for backwards compatibility
-
-
-class SegmentationModel(DetectionModel):
-    # YOLOv5 segmentation model
-    def __init__(self, cfg="yolov5s-seg.yaml", ch=3, nc=None, anchors=None):
-        """Initializes a YOLOv5 segmentation model with configurable params: cfg (str) for configuration, ch (int) for channels, nc (int) for num classes, anchors (list)."""
-        super().__init__(cfg, ch, nc, anchors)
-
-
-class ClassificationModel(BaseModel):
-    # YOLOv5 classification model
-    def __init__(self, cfg=None, model=None, nc=1000, cutoff=10):
-        """Initializes YOLOv5 model with config file `cfg`, input channels `ch`, number of classes `nc`, and `cuttoff`
-        index.
-        """
-        super().__init__()
-        self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg)
-
-    def _from_detection_model(self, model, nc=1000, cutoff=10):
-        """Creates a classification model from a YOLOv5 detection model, slicing at `cutoff` and adding a classification
-        layer.
-        """
-        if isinstance(model, DetectMultiBackend):
-            model = model.model  # unwrap DetectMultiBackend
-        model.model = model.model[:cutoff]  # backbone
-        m = model.model[-1]  # last layer
-        ch = m.conv.in_channels if hasattr(m, "conv") else m.cv1.conv.in_channels  # ch into module
-        c = Classify(ch, nc)  # Classify()
-        c.i, c.f, c.type = m.i, m.f, "models.common.Classify"  # index, from, type
-        model.model[-1] = c  # replace
-        self.model = model.model
-        self.stride = model.stride
-        self.save = []
        self.nc = nc

-    def _from_yaml(self, cfg):
-        """Creates a YOLOv5 classification model from a specified *.yaml configuration file."""
-        self.model = None
+        # YOLOv5s architecture
+        self.model = nn.Sequential(
+            Conv(3, 32, 6, 2, 2),
+            Conv(32, 64, 3, 2),
+            C3(64, 64, 1),
+            Conv(64, 128, 3, 2),
+            C3(128, 128, 2),
+            Conv(128, 256, 3, 2),
+            C3(256, 256, 3),
+            Conv(256, 512, 3, 2),
+            C3(512, 512, 1),
+            SPPF(512, 512),
+            Conv(512, 256, 1, 1),
+            nn.Upsample(None, 2, "nearest"),
+            C3(512, 256, 1, False),
+            Conv(256, 128, 1, 1),
+            nn.Upsample(None, 2, "nearest"),
+            C3(256, 128, 1, False),
+            Conv(128, 128, 3, 2),
+            C3(256, 256, 1, False),
+            Conv(256, 256, 3, 2),
+            C3(512, 512, 1, False),
+            Detect(
+                nc,
+                [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]],
+                [128, 256, 512],
+            ),
+        )
+
+        self.stride = torch.tensor([8.0, 16.0, 32.0])
+        self.model[-1].stride = self.stride
+
+    def forward(self, x):
+        return self.model(x)


-def parse_model(d, ch):
-    """Parses a YOLOv5 model from a dict `d`, configuring layers based on input channels `ch` and model architecture."""
-    LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
-    anchors, nc, gd, gw, act, ch_mul = (
-        d["anchors"],
-        d["nc"],
-        d["depth_multiple"],
-        d["width_multiple"],
-        d.get("activation"),
-        d.get("channel_multiple"),
-    )
-    if act:
-        Conv.default_act = eval(act)  # redefine default activation, i.e. Conv.default_act = nn.SiLU()
-        LOGGER.info(f"{colorstr('activation:')} {act}")  # print
-    if not ch_mul:
-        ch_mul = 8
-    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
-    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
+class YOLOLoss(nn.Module):
+    def __init__(self, nc=80, anchors=(), reduction="mean", device="cpu"):
+        super(YOLOLoss, self).__init__()
+        self.nc = nc
+        self.nl = len(anchors)
+        self.na = len(anchors[0]) // 2
+        self.anchors = torch.tensor(anchors).float().view(self.nl, -1, 2).to(device)
+        self.reduction = reduction

-    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
-    for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]):  # from, number, module, args
-        m = eval(m) if isinstance(m, str) else m  # eval strings
-        for j, a in enumerate(args):
-            with contextlib.suppress(NameError):
-                args[j] = eval(a) if isinstance(a, str) else a  # eval strings
+        self.BCEcls = nn.BCEWithLogitsLoss(reduction=reduction)
+        self.BCEobj = nn.BCEWithLogitsLoss(reduction=reduction)
+        self.gr = 1.0
+        self.box_gain = 0.05
+        self.cls_gain = 0.5
+        self.obj_gain = 1.0

-        n = n_ = max(round(n * gd), 1) if n > 1 else n  # depth gain
-        if m in {
-            Conv,
-            GhostConv,
-            Bottleneck,
-            GhostBottleneck,
-            SPP,
-            SPPF,
-            DWConv,
-            MixConv2d,
-            Focus,
-            CrossConv,
-            BottleneckCSP,
-            C3,
-            C3TR,
-            C3SPP,
-            C3Ghost,
-            nn.ConvTranspose2d,
-            DWConvTranspose2d,
-            C3x,
-        }:
-            c1, c2 = ch[f], args[0]
-            if c2 != no:  # if not output
-                c2 = make_divisible(c2 * gw, ch_mul)
+    def forward(self, p, targets):
+        lcls, lbox, lobj = (
+            torch.zeros(1, device=targets.device),
+            torch.zeros(1, device=targets.device),
+            torch.zeros(1, device=targets.device),
+        )
+        tcls, tbox, indices, anchors = self.build_targets(p, targets)

-            args = [c1, c2, *args[1:]]
-            if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}:
-                args.insert(2, n)  # number of repeats
-                n = 1
-        elif m is nn.BatchNorm2d:
-            args = [ch[f]]
-        elif m is Concat:
-            c2 = sum(ch[x] for x in f)
-        # TODO: channel, gw, gd
-        elif m in {Detect, Segment}:
-            args.append([ch[x] for x in f])
-            if isinstance(args[1], int):  # number of anchors
-                args[1] = [list(range(args[1] * 2))] * len(f)
-            if m is Segment:
-                args[3] = make_divisible(args[3] * gw, ch_mul)
-        elif m is Contract:
-            c2 = ch[f] * args[0] ** 2
-        elif m is Expand:
-            c2 = ch[f] // args[0] ** 2
+        for i, pi in enumerate(p):
+            b, a, gj, gi = indices[i]
+            tobj = torch.zeros_like(pi[..., 0], device=targets.device)
+
+            n = b.shape[0]
+            if n:
+                ps = pi[b, a, gj, gi]
+                pxy = ps[:, :2].sigmoid() * 2.0 - 0.5
+                pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
+                pbox = torch.cat((pxy, pwh), 1)
+                iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True)
+                lbox += (1.0 - iou).mean()
+
+                tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype)
+
+                if self.nc > 1:
+                    t = torch.full_like(ps[:, 5:], 0.0, device=targets.device)
+                    t[range(n), tcls[i]] = 1.0
+                    lcls += self.BCEcls(ps[:, 5:], t)
+
+            lobj += self.BCEobj(pi[..., 4], tobj) * self.obj_gain
+
+        lbox *= self.box_gain
+        lobj *= self.obj_gain
+        lcls *= self.cls_gain
+        bs = tobj.shape[0]
+
+        loss = lbox + lobj + lcls
+        return loss * bs, torch.cat((lbox, lobj, lcls)).detach()
+
+    def build_targets(self, p, targets):
+        na, nt = self.na, targets.shape[0]
+        tcls, tbox, indices, anch = [], [], [], []
+        gain = torch.ones(7, device=targets.device)
+        ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt)
+        targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2)
+
+        g = 0.5
+        off = (
+            torch.tensor(
+                [[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1], [1, 1], [1, -1], [-1, 1], [-1, -1]], device=targets.device
+            ).float()
+            * g
+        )
+
+        for i in range(self.nl):
+            anchors = self.anchors[i]
+            gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]]
+
+            t = targets * gain
+            if nt:
+                r = t[:, :, 4:6] / anchors[:, None]
+                j = torch.max(r, 1.0 / r).max(2)[0] < 4
+                t = t[j]
+
+                gxy = t[:, 2:4]
+                gxi = gain[[2, 3]] - gxy
+                j, k = ((gxy % 1.0 < g) & (gxy > 1.0)).T
+                l, m = ((gxi % 1.0 < g) & (gxi > 1.0)).T
+                j = torch.stack((torch.ones_like(j), j, k, l, m))
+                t = t.repeat((5, 1, 1))[j]
+                offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
+            else:
+                t = targets[0]
+                offsets = 0
+
+            b, c = t[:, :2].long().T
+            gxy = t[:, 2:4]
+            gwh = t[:, 4:6]
+            gij = (gxy - offsets).long()
+            gi, gj = gij.T
+
+            a = t[:, 6].long()
+            indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1)))
+            tbox.append(torch.cat((gxy - gij, gwh), 1))
+            anch.append(anchors[a])
+            tcls.append(c)
+
+        return tcls, tbox, indices, anch
+
+
+def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
+    box2 = box2.T
+
+    if x1y1x2y2:
+        b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
+        b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
+    else:
+        b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2
+        b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2
+        b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2
+        b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2
+
+    inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * (
+        torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)
+    ).clamp(0)
+
+    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps
+    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps
+    union = w1 * h1 + w2 * h2 - inter + eps
+
+    iou = inter / union
+
+    if GIoU or DIoU or CIoU:
+        cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1)
+    ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1)
+
+    if CIoU or DIoU:
+        c2 = cw**2 + ch**2 + eps
+        rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4
+        if DIoU:
+            return iou - rho2 / c2
+        elif CIoU:
+            v = (4 / math.pi**2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)
+            with torch.no_grad():
+                alpha = v / (v - iou + (1 + eps))
+            return iou - (rho2 / c2 + v * alpha)
+    else:
+        c_area = cw * ch + eps
+        return iou - (c_area - union) / c_area
+
+    return iou
+
+
+def non_max_suppression(
+    prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, max_det=300
+):
+    nc = prediction.shape[2] - 5
+    xc = prediction[..., 4] > conf_thres
+
+    # Checks
+    assert 0 <= conf_thres <= 1, f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
+    assert 0 <= iou_thres <= 1, f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
+
+    # Settings
+    min_wh, max_wh = 2, 4096
+    max_nms = 30000
+    time_limit = 10.0
+    redundant = True
+    multi_label &= nc > 1
+    merge = False
+
+    t = time.time()
+    output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
+    for xi, x in enumerate(prediction):
+        x = x[xc[xi]]
+
+        if not x.shape[0]:
+            continue
+
+        x[:, 5:] *= x[:, 4:5]
+
+        box = xywh2xyxy(x[:, :4])
+
+        if multi_label:
+            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
+            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
        else:
-            c2 = ch[f]
+            conf, j = x[:, 5:].max(1, keepdim=True)
+            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]

-        m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
-        t = str(m)[8:-2].replace("__main__.", "")  # module type
-        np = sum(x.numel() for x in m_.parameters())  # number params
-        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
-        LOGGER.info(f"{i:>3}{str(f):>18}{n_:>3}{np:10.0f}  {t:<40}{str(args):<30}")  # print
-        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
-        layers.append(m_)
-        if i == 0:
-            ch = []
-        ch.append(c2)
-    return nn.Sequential(*layers), sorted(save)
+        if classes is not None:
+            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
+
+        n = x.shape[0]
+        if not n:
+            continue
+        elif n > max_nms:
+            x = x[x[:, 4].argsort(descending=True)[:max_nms]]
+
+        c = x[:, 5:6] * (0 if agnostic else max_wh)
+        boxes, scores = x[:, :4] + c, x[:, 4]
+        i = torchvision.ops.nms(boxes, scores, iou_thres)
+        if i.shape[0] > max_det:
+            i = i[:max_det]
+        if merge and (1 < n < 3e3):
+            iou = box_iou(boxes[i], boxes) > iou_thres
+            weights = iou * scores[None]
+            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)
+            if redundant:
+                i = i[iou.sum(1) > 1]
+
+        output[xi] = x[i]
+        if (time.time() - t) > time_limit:
+            print(f"WARNING: NMS time limit {time_limit}s exceeded")
+            break
+
+    return output
+
+
+def xywh2xyxy(x):
+    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2
+    y[:, 1] = x[:, 1] - x[:, 3] / 2
+    y[:, 2] = x[:, 0] + x[:, 2] / 2
+    y[:, 3] = x[:, 1] + x[:, 3] / 2
+    return y
+
+
+class DataLoader:
+    def __init__(self, path, img_size=640, batch_size=16):
+        self.path = path
+        self.img_size = img_size
+        self.batch_size = batch_size
+        self.augment = True
+        self.hyp = {
+            "hsv_h": 0.015,
+            "hsv_s": 0.7,
+            "hsv_v": 0.4,
+            "degrees": 0,
+            "translate": 0.1,
+            "scale": 0.5,
+            "shear": 0.0,
+        }
+        # Load data from path and prepare it
+
+    def __iter__(self):
+        # Yield batches of data
+        pass
+
+
+def train(model, dataloader, optimizer, epochs):
+    device = next(model.parameters()).device
+    criterion = YOLOLoss(model.nc, model.model[-1].anchors, reduction="mean", device=device)
+
+    for epoch in range(epochs):
+        model.train()
+        for batch_i, (imgs, targets, paths, _) in enumerate(dataloader):
+            imgs = imgs.to(device, non_blocking=True).float() / 255.0
+            targets = targets.to(device)
+
+            pred = model(imgs)
+            loss, loss_items = criterion(pred, targets)
+
+            loss.backward()
+            optimizer.step()
+            optimizer.zero_grad()
+
+            if batch_i % 10 == 0:
+                print(f"Epoch {epoch}/{epochs}, Batch {batch_i}/{len(dataloader)}, Loss: {loss.item():.4f}")
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--data", type=str, default="data/coco128.yaml", help="data.yaml path")
+    parser.add_argument("--epochs", type=int, default=300)
+    parser.add_argument("--batch-size", type=int, default=16, help="total batch size for all GPUs")
+    parser.add_argument("--img-size", type=int, default=640, help="train, test image sizes")
+    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
+    opt = parser.parse_args()
+
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+
+    # Initialize model
+    model = Model(nc=80).to(device)
+
+    # Initialize optimizer
+    optimizer = optim.Adam(model.parameters(), lr=0.01)
+
+    # Initialize dataloader
+    dataloader = DataLoader(opt.data, img_size=opt.img_size, batch_size=opt.batch_size)
+
+    # Train the model
+    train(model, dataloader, optimizer, opt.epochs)


 if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--cfg", type=str, default="yolov5s.yaml", help="model.yaml")
-    parser.add_argument("--batch-size", type=int, default=1, help="total batch size for all GPUs")
-    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
-    parser.add_argument("--profile", action="store_true", help="profile model speed")
-    parser.add_argument("--line-profile", action="store_true", help="profile model speed layer by layer")
-    parser.add_argument("--test", action="store_true", help="test all yolo*.yaml")
-    opt = parser.parse_args()
-    opt.cfg = check_yaml(opt.cfg)  # check YAML
-    print_args(vars(opt))
-    device = select_device(opt.device)
-
-    # Create model
-    im = torch.rand(opt.batch_size, 3, 640, 640).to(device)
-    model = Model(opt.cfg).to(device)
-
-    # Options
-    if opt.line_profile:  # profile layer by layer
-        model(im, profile=True)
-
-    elif opt.profile:  # profile forward-backward
-        results = profile(input=im, ops=[model], n=3)
-
-    elif opt.test:  # test all models
-        for cfg in Path(ROOT / "models").rglob("yolo*.yaml"):
-            try:
-                _ = Model(cfg)
-            except Exception as e:
-                print(f"Error in {cfg}: {e}")
-
-    else:  # report fused model summary
-        model.fuse()
+    main()
--- a/utils/augmentations.py
+++ b/utils/augmentations.py
@ -156,7 +156,6 @@ def random_perspective(
 ):
    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10))
    # targets = [cls, xyxy]
-
    """Applies random perspective transformation to an image, modifying the image and corresponding labels."""
    height = im.shape[0] + border[0] * 2  # shape(h,w,c)
    width = im.shape[1] + border[1] * 2
--- a/utils/callbacks.py
+++ b/utils/callbacks.py
@ -64,7 +64,6 @@ class Callbacks:
            thread: (boolean) Run callbacks in daemon thread
            kwargs: Keyword Arguments to receive from YOLOv5
        """
-
        assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}"
        for logger in self._callbacks[hook]:
            if thread:
--- a/utils/dataloaders.py
+++ b/utils/dataloaders.py
@ -1104,7 +1104,8 @@ def extract_boxes(path=DATASETS_DIR / "coco128"):
 def autosplit(path=DATASETS_DIR / "coco128/images", weights=(0.9, 0.1, 0.0), annotated_only=False):
    """Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
    Usage: from utils.dataloaders import *; autosplit()
-    Arguments
+
+    Arguments:
        path:            Path to images directory
        weights:         Train, val, test weights (list, tuple)
        annotated_only:  Only use images with an annotated txt file
@ -1183,7 +1184,7 @@ class HUBDatasetStats:
    """
    Class for generating HUB dataset JSON and `-hub` dataset directory.

-    Arguments
+    Arguments:
        path:           Path to data.yaml or data.zip (with data.yaml inside data.zip)
        autodownload:   Attempt to download dataset if not found locally

@ -1314,7 +1315,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
    """
    YOLOv5 Classification Dataset.

-    Arguments
+    Arguments:
        root:  Dataset path
        transform:  torchvision transforms, used by default
        album_transform: Albumentations transforms, used if installed
--- a/utils/general.py
+++ b/utils/general.py
@ -518,7 +518,6 @@ def check_font(font=FONT, progress=False):

 def check_dataset(data, autodownload=True):
    """Validates and/or auto-downloads a dataset, returning its configuration as a dictionary."""
-
    # Download (optional)
    extract_dir = ""
    if isinstance(data, (str, Path)) and (is_zipfile(data) or is_tarfile(data)):
@ -1023,7 +1022,6 @@ def non_max_suppression(
    Returns:
         list of detections, on (n,6) tensor per image [xyxy, conf, cls]
    """
-
    # Checks
    assert 0 <= conf_thres <= 1, f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0"
    assert 0 <= iou_thres <= 1, f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0"
--- a/utils/loggers/init.py
+++ b/utils/loggers/init.py
@ -350,7 +350,8 @@ class GenericLogger:
    """
    YOLOv5 General purpose logger for non-task specific logging
    Usage: from utils.loggers import GenericLogger; logger = GenericLogger(...)
-    Arguments
+
+    Arguments:
        opt:             Run arguments
        console_logger:  Console logger
        include:         loggers to include
--- a/utils/loggers/clearml/clearml_utils.py
+++ b/utils/loggers/clearml/clearml_utils.py
@ -80,7 +80,7 @@ class ClearmlLogger:
        - Initialize ClearML Task, this object will capture the experiment
        - Upload dataset version to ClearML Data if opt.upload_dataset is True

-        arguments:
+        Arguments:
        opt (namespace) -- Commandline arguments for this run
        hyp (dict) -- Hyperparameters for this run

@ -133,7 +133,7 @@ class ClearmlLogger:
        """
        Log scalars/metrics to ClearML.

-        arguments:
+        Arguments:
        metrics (dict) Metrics in dict format: {"metrics/mAP": 0.8, ...}
        epoch (int) iteration number for the current set of metrics
        """
@ -145,7 +145,7 @@ class ClearmlLogger:
        """
        Log model weights to ClearML.

-        arguments:
+        Arguments:
        model_path (PosixPath or str) Path to the model weights
        model_name (str) Name of the model visible in ClearML
        epoch (int) Iteration / epoch of the model weights
@ -158,7 +158,7 @@ class ClearmlLogger:
        """
        Log final metrics to a summary table.

-        arguments:
+        Arguments:
        metrics (dict) Metrics in dict format: {"metrics/mAP": 0.8, ...}
        """
        for k, v in metrics.items():
@ -168,7 +168,7 @@ class ClearmlLogger:
        """
        Log image as plot in the plot section of ClearML.

-        arguments:
+        Arguments:
        title (str) Title of the plot
        plot_path (PosixPath or str) Path to the saved image file
        """
@ -183,7 +183,7 @@ class ClearmlLogger:
        """
        Log files (images) as debug samples in the ClearML task.

-        arguments:
+        Arguments:
        files (List(PosixPath)) a list of file paths in PosixPath format
        title (str) A title that groups together images with the same values
        """
@ -199,7 +199,7 @@ class ClearmlLogger:
        """
        Draw the bounding boxes on a single image and report the result as a ClearML debug sample.

-        arguments:
+        Arguments:
        image_path (PosixPath) the path the original image file
        boxes (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class]
        class_names (dict): dict containing mapping of class int to class name
--- a/utils/loggers/wandb/wandb_utils.py
+++ b/utils/loggers/wandb/wandb_utils.py
@ -49,7 +49,7 @@ class WandbLogger:
        - Upload dataset if opt.upload_dataset is True
        - Setup training processes if job_type is 'Training'

-        arguments:
+        Arguments:
        opt (namespace) -- Commandline arguments for this run
        run_id (str) -- Run ID of W&B run to be resumed
        job_type (str) -- To set the job_type for this run
@ -90,7 +90,7 @@ class WandbLogger:
          - Update data_dict, to contain info of previous run if resumed and the paths of dataset artifact if downloaded
          - Setup log_dict, initialize bbox_interval

-        arguments:
+        Arguments:
        opt (namespace) -- commandline arguments for this run

        """
@ -120,7 +120,7 @@ class WandbLogger:
        """
        Log the model checkpoint as W&B artifact.

-        arguments:
+        Arguments:
        path (Path)   -- Path of directory containing the checkpoints
        opt (namespace) -- Command line arguments for this run
        epoch (int)  -- Current epoch number
@ -159,7 +159,7 @@ class WandbLogger:
        """
        Save the metrics to the logging dictionary.

-        arguments:
+        Arguments:
        log_dict (Dict) -- metrics/media to be logged in current step
        """
        if self.wandb_run:
@ -170,7 +170,7 @@ class WandbLogger:
        """
        Commit the log_dict, model artifacts and Tables to W&B and flush the log_dict.

-        arguments:
+        Arguments:
        best_result (boolean): Boolean representing if the result of this evaluation is best or not
        """
        if self.wandb_run:
@ -197,7 +197,7 @@ class WandbLogger:

@contextmanager
 def all_logging_disabled(highest_level=logging.CRITICAL):
-    """source - https://gist.github.com/simon-weber/7853144
+    """Source - https://gist.github.com/simon-weber/7853144
    A context manager that will prevent any logging messages triggered during the body from being processed.
    :param highest_level: the maximum logging level in use.
      This would only need to be changed if a custom level greater than CRITICAL is defined.
--- a/utils/metrics.py
+++ b/utils/metrics.py
@ -41,7 +41,6 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir=".", names
    # Returns
        The average precision as computed in py-faster-rcnn.
    """
-
    # Sort by objectness
    i = np.argsort(-conf)
    tp, conf, pred_cls = tp[i], conf[i], pred_cls[i]
@ -103,7 +102,6 @@ def compute_ap(recall, precision):
    # Returns
        Average precision, precision curve, recall curve
    """
-
    # Append sentinel values to beginning and end
    mrec = np.concatenate(([0.0], recall, [1.0]))
    mpre = np.concatenate(([1.0], precision, [0.0]))
@ -137,6 +135,7 @@ class ConfusionMatrix:
        Return intersection-over-union (Jaccard index) of boxes.

        Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+
        Arguments:
            detections (Array[N, 6]), x1, y1, x2, y2, conf, class
            labels (Array[M, 5]), class, x1, y1, x2, y2
@ -233,7 +232,6 @@ def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7

    Input shapes are box1(1,4) to box2(n,4).
    """
-
    # Get the coordinates of bounding boxes
    if xywh:  # transform from xywh to xyxy
        (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1)
@ -279,14 +277,15 @@ def box_iou(box1, box2, eps=1e-7):
    Return intersection-over-union (Jaccard index) of boxes.

    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+
    Arguments:
        box1 (Tensor[N, 4])
        box2 (Tensor[M, 4])
+
    Returns:
        iou (Tensor[N, M]): the NxM matrix containing the pairwise
            IoU values for every element in boxes1 and boxes2
    """
-
    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
    (a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2)
    inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2)
@ -304,7 +303,6 @@ def bbox_ioa(box1, box2, eps=1e-7):
    box2:       np.array of shape(nx4)
    returns:    np.array of shape(n)
    """
-
    # Get the coordinates of bounding boxes
    b1_x1, b1_y1, b1_x2, b1_y2 = box1
    b2_x1, b2_y1, b2_x2, b2_y2 = box2.T
--- a/utils/segment/augmentations.py
+++ b/utils/segment/augmentations.py
@ -29,7 +29,6 @@ def random_perspective(
 ):
    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
    # targets = [cls, xyxy]
-
    """Applies random perspective, rotation, scale, shear, and translation augmentations to an image and targets."""
    height = im.shape[0] + border[0] * 2  # shape(h,w,c)
    width = im.shape[1] + border[1] * 2
--- a/utils/segment/general.py
+++ b/utils/segment/general.py
@ -14,7 +14,6 @@ def crop_mask(masks, boxes):
        - masks should be a size [n, h, w] tensor of masks
        - boxes should be a size [n, 4] tensor of bbox coords in relative point form
    """
-
    n, h, w = masks.shape
    x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1)  # x1 shape(1,1,n)
    r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :]  # rows shape(1,w,1)
@ -33,7 +32,6 @@ def process_mask_upsample(protos, masks_in, bboxes, shape):

    return: h, w, n
    """
-
    c, mh, mw = protos.shape  # CHW
    masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)
    masks = F.interpolate(masks[None], shape, mode="bilinear", align_corners=False)[0]  # CHW
@ -51,7 +49,6 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False):

    return: h, w, n
    """
-
    c, mh, mw = protos.shape  # CHW
    ih, iw = shape
    masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw)  # CHW
--- a/utils/triton.py
+++ b/utils/triton.py
@ -17,10 +17,9 @@ class TritonRemoteModel:

    def __init__(self, url: str):
        """
-        Keyword arguments:
+        Keyword Arguments:
        url: Fully qualified address of the Triton server - for e.g. grpc://localhost:8000
        """
-
        parsed_url = urlparse(url)
        if parsed_url.scheme == "grpc":
            from tritonclient.grpc import InferenceServerClient, InferInput