Add docstrings to YOLOv5 functions (#12760)
* Add docstrings to top level files * Auto-format by https://ultralytics.com/actions * Add docstrings * Auto-format by https://ultralytics.com/actions * Add docstrings * Auto-format by https://ultralytics.com/actions * Add docstrings * Add docstrings * Auto-format by https://ultralytics.com/actions * Add docstrings * Add docstrings * Auto-format by https://ultralytics.com/actions * Add docstrings * Auto-format by https://ultralytics.com/actions * Add docstrings * Auto-format by https://ultralytics.com/actions * Add docstrings * Auto-format by https://ultralytics.com/actions * Add docstrings * Auto-format by https://ultralytics.com/actions * Add docstrings * Auto-format by https://ultralytics.com/actions * Add docstrings * Add docstrings * Auto-format by https://ultralytics.com/actions * Add docstrings * Auto-format by https://ultralytics.com/actions * Add docstrings * Auto-format by https://ultralytics.com/actions * Add docstrings * Auto-format by https://ultralytics.com/actions * Add docstrings * Auto-format by https://ultralytics.com/actions * Add docstrings * Auto-format by https://ultralytics.com/actions * Add docstrings * Auto-format by https://ultralytics.com/actions * Update activations.py Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> * Auto-format by https://ultralytics.com/actions --------- Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com>pull/12771/head
parent
ca00c34c2a
commit
41603da162
|
@ -149,6 +149,7 @@ def test(
|
|||
|
||||
|
||||
def parse_opt():
|
||||
"""Parses command-line arguments for YOLOv5 model inference configuration."""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--weights", type=str, default=ROOT / "yolov5s.pt", help="weights path")
|
||||
parser.add_argument("--imgsz", "--img", "--img-size", type=int, default=640, help="inference size (pixels)")
|
||||
|
@ -166,6 +167,7 @@ def parse_opt():
|
|||
|
||||
|
||||
def main(opt):
|
||||
"""Executes a test run if `opt.test` is True, otherwise starts training or inference with provided options."""
|
||||
test(**vars(opt)) if opt.test else run(**vars(opt))
|
||||
|
||||
|
||||
|
|
|
@ -204,6 +204,7 @@ def run(
|
|||
|
||||
|
||||
def parse_opt():
|
||||
"""Parses command line arguments for YOLOv5 inference settings including model, source, device, and image size."""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s-cls.pt", help="model path(s)")
|
||||
parser.add_argument("--source", type=str, default=ROOT / "data/images", help="file/dir/URL/glob/screen/0(webcam)")
|
||||
|
@ -229,6 +230,7 @@ def parse_opt():
|
|||
|
||||
|
||||
def main(opt):
|
||||
"""Executes YOLOv5 model inference with options for ONNX DNN and video frame-rate stride adjustments."""
|
||||
check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
|
||||
run(**vars(opt))
|
||||
|
||||
|
|
|
@ -76,6 +76,7 @@ GIT_INFO = check_git_info()
|
|||
|
||||
|
||||
def train(opt, device):
|
||||
"""Trains a YOLOv5 model, managing datasets, model optimization, logging, and saving checkpoints."""
|
||||
init_seeds(opt.seed + 1 + RANK, deterministic=True)
|
||||
save_dir, data, bs, epochs, nw, imgsz, pretrained = (
|
||||
opt.save_dir,
|
||||
|
@ -306,6 +307,9 @@ def train(opt, device):
|
|||
|
||||
|
||||
def parse_opt(known=False):
|
||||
"""Parses command line arguments for YOLOv5 training including model path, dataset, epochs, and more, returning
|
||||
parsed arguments.
|
||||
"""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--model", type=str, default="yolov5s-cls.pt", help="initial weights path")
|
||||
parser.add_argument("--data", type=str, default="imagenette160", help="cifar10, cifar100, mnist, imagenet, ...")
|
||||
|
@ -333,7 +337,7 @@ def parse_opt(known=False):
|
|||
|
||||
|
||||
def main(opt):
|
||||
# Checks
|
||||
"""Executes YOLOv5 training with given options, handling device setup and DDP mode; includes pre-training checks."""
|
||||
if RANK in {-1, 0}:
|
||||
print_args(vars(opt))
|
||||
check_git_status()
|
||||
|
@ -357,7 +361,11 @@ def main(opt):
|
|||
|
||||
|
||||
def run(**kwargs):
|
||||
# Usage: from yolov5 import classify; classify.train.run(data=mnist, imgsz=320, model='yolov5m')
|
||||
"""
|
||||
Executes YOLOv5 model training or inference with specified parameters, returning updated options.
|
||||
|
||||
Example: from yolov5 import classify; classify.train.run(data=mnist, imgsz=320, model='yolov5m')
|
||||
"""
|
||||
opt = parse_opt(True)
|
||||
for k, v in kwargs.items():
|
||||
setattr(opt, k, v)
|
||||
|
|
|
@ -147,6 +147,7 @@ def run(
|
|||
|
||||
|
||||
def parse_opt():
|
||||
"""Parses and returns command line arguments for YOLOv5 model evaluation and inference settings."""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--data", type=str, default=ROOT / "../datasets/mnist", help="dataset path")
|
||||
parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s-cls.pt", help="model.pt path(s)")
|
||||
|
@ -166,6 +167,7 @@ def parse_opt():
|
|||
|
||||
|
||||
def main(opt):
|
||||
"""Executes the YOLOv5 model prediction workflow, handling argument parsing and requirement checks."""
|
||||
check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
|
||||
run(**vars(opt))
|
||||
|
||||
|
|
|
@ -166,6 +166,7 @@ def run(
|
|||
|
||||
# Create or append to the CSV file
|
||||
def write_to_csv(image_name, prediction, confidence):
|
||||
"""Writes prediction data for an image to a CSV file, appending if the file exists."""
|
||||
data = {"Image Name": image_name, "Prediction": prediction, "Confidence": confidence}
|
||||
with open(csv_path, mode="a", newline="") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=data.keys())
|
||||
|
@ -264,6 +265,7 @@ def run(
|
|||
|
||||
|
||||
def parse_opt():
|
||||
"""Parses command-line arguments for YOLOv5 detection, setting inference options and model configurations."""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s.pt", help="model path or triton URL")
|
||||
parser.add_argument("--source", type=str, default=ROOT / "data/images", help="file/dir/URL/glob/screen/0(webcam)")
|
||||
|
@ -300,6 +302,7 @@ def parse_opt():
|
|||
|
||||
|
||||
def main(opt):
|
||||
"""Executes YOLOv5 model inference with given options, checking requirements before running the model."""
|
||||
check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
|
||||
run(**vars(opt))
|
||||
|
||||
|
|
46
export.py
46
export.py
|
@ -92,6 +92,7 @@ MACOS = platform.system() == "Darwin" # macOS environment
|
|||
|
||||
class iOSModel(torch.nn.Module):
|
||||
def __init__(self, model, im):
|
||||
"""Initializes an iOS compatible model with normalization based on image dimensions."""
|
||||
super().__init__()
|
||||
b, c, h, w = im.shape # batch, channel, height, width
|
||||
self.model = model
|
||||
|
@ -104,12 +105,13 @@ class iOSModel(torch.nn.Module):
|
|||
# self.normalize = torch.tensor([1. / w, 1. / h, 1. / w, 1. / h]).expand(np, 4) # explicit (faster, larger)
|
||||
|
||||
def forward(self, x):
|
||||
"""Runs forward pass on the input tensor, returning class confidences and normalized coordinates."""
|
||||
xywh, conf, cls = self.model(x)[0].squeeze().split((4, 1, self.nc), 1)
|
||||
return cls * conf, xywh * self.normalize # confidence (3780, 80), coordinates (3780, 4)
|
||||
|
||||
|
||||
def export_formats():
|
||||
# YOLOv5 export formats
|
||||
"""Returns a DataFrame of supported YOLOv5 model export formats and their properties."""
|
||||
x = [
|
||||
["PyTorch", "-", ".pt", True, True],
|
||||
["TorchScript", "torchscript", ".torchscript", True, True],
|
||||
|
@ -128,7 +130,7 @@ def export_formats():
|
|||
|
||||
|
||||
def try_export(inner_func):
|
||||
# YOLOv5 export decorator, i..e @try_export
|
||||
"""Decorator @try_export for YOLOv5 model export functions that logs success/failure, time taken, and file size."""
|
||||
inner_args = get_default_args(inner_func)
|
||||
|
||||
def outer_func(*args, **kwargs):
|
||||
|
@ -147,7 +149,9 @@ def try_export(inner_func):
|
|||
|
||||
@try_export
|
||||
def export_torchscript(model, im, file, optimize, prefix=colorstr("TorchScript:")):
|
||||
# YOLOv5 TorchScript model export
|
||||
"""Exports YOLOv5 model to TorchScript format, optionally optimized for mobile, with image shape and stride
|
||||
metadata.
|
||||
"""
|
||||
LOGGER.info(f"\n{prefix} starting export with torch {torch.__version__}...")
|
||||
f = file.with_suffix(".torchscript")
|
||||
|
||||
|
@ -163,7 +167,7 @@ def export_torchscript(model, im, file, optimize, prefix=colorstr("TorchScript:"
|
|||
|
||||
@try_export
|
||||
def export_onnx(model, im, file, opset, dynamic, simplify, prefix=colorstr("ONNX:")):
|
||||
# YOLOv5 ONNX export
|
||||
"""Exports a YOLOv5 model to ONNX format with dynamic axes and optional simplification."""
|
||||
check_requirements("onnx>=1.12.0")
|
||||
import onnx
|
||||
|
||||
|
@ -276,7 +280,9 @@ def export_openvino(file, metadata, half, int8, data, prefix=colorstr("OpenVINO:
|
|||
|
||||
@try_export
|
||||
def export_paddle(model, im, file, metadata, prefix=colorstr("PaddlePaddle:")):
|
||||
# YOLOv5 Paddle export
|
||||
"""Exports a YOLOv5 model to PaddlePaddle format using X2Paddle, saving to `save_dir` and adding a metadata.yaml
|
||||
file.
|
||||
"""
|
||||
check_requirements(("paddlepaddle", "x2paddle"))
|
||||
import x2paddle
|
||||
from x2paddle.convert import pytorch2paddle
|
||||
|
@ -291,7 +297,7 @@ def export_paddle(model, im, file, metadata, prefix=colorstr("PaddlePaddle:")):
|
|||
|
||||
@try_export
|
||||
def export_coreml(model, im, file, int8, half, nms, prefix=colorstr("CoreML:")):
|
||||
# YOLOv5 CoreML export
|
||||
"""Exports YOLOv5 model to CoreML format with optional NMS, INT8, and FP16 support; requires coremltools."""
|
||||
check_requirements("coremltools")
|
||||
import coremltools as ct
|
||||
|
||||
|
@ -316,7 +322,11 @@ def export_coreml(model, im, file, int8, half, nms, prefix=colorstr("CoreML:")):
|
|||
|
||||
@try_export
|
||||
def export_engine(model, im, file, half, dynamic, simplify, workspace=4, verbose=False, prefix=colorstr("TensorRT:")):
|
||||
# YOLOv5 TensorRT export https://developer.nvidia.com/tensorrt
|
||||
"""
|
||||
Exports a YOLOv5 model to TensorRT engine format, requiring GPU and TensorRT>=7.0.0.
|
||||
|
||||
https://developer.nvidia.com/tensorrt
|
||||
"""
|
||||
assert im.device.type != "cpu", "export running on CPU but must be on GPU, i.e. `python export.py --device 0`"
|
||||
try:
|
||||
import tensorrt as trt
|
||||
|
@ -440,7 +450,7 @@ def export_saved_model(
|
|||
|
||||
@try_export
|
||||
def export_pb(keras_model, file, prefix=colorstr("TensorFlow GraphDef:")):
|
||||
# YOLOv5 TensorFlow GraphDef *.pb export https://github.com/leimao/Frozen_Graph_TensorFlow
|
||||
"""Exports YOLOv5 model to TensorFlow GraphDef *.pb format; see https://github.com/leimao/Frozen_Graph_TensorFlow for details."""
|
||||
import tensorflow as tf
|
||||
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
|
||||
|
||||
|
@ -493,7 +503,11 @@ def export_tflite(
|
|||
|
||||
@try_export
|
||||
def export_edgetpu(file, prefix=colorstr("Edge TPU:")):
|
||||
# YOLOv5 Edge TPU export https://coral.ai/docs/edgetpu/models-intro/
|
||||
"""
|
||||
Exports a YOLOv5 model to Edge TPU compatible TFLite format; requires Linux and Edge TPU compiler.
|
||||
|
||||
https://coral.ai/docs/edgetpu/models-intro/
|
||||
"""
|
||||
cmd = "edgetpu_compiler --version"
|
||||
help_url = "https://coral.ai/docs/edgetpu/compiler/"
|
||||
assert platform.system() == "Linux", f"export only supported on Linux. See {help_url}"
|
||||
|
@ -531,7 +545,7 @@ def export_edgetpu(file, prefix=colorstr("Edge TPU:")):
|
|||
|
||||
@try_export
|
||||
def export_tfjs(file, int8, prefix=colorstr("TensorFlow.js:")):
|
||||
# YOLOv5 TensorFlow.js export
|
||||
"""Exports a YOLOv5 model to TensorFlow.js format, optionally with uint8 quantization."""
|
||||
check_requirements("tensorflowjs")
|
||||
import tensorflowjs as tfjs
|
||||
|
||||
|
@ -568,7 +582,11 @@ def export_tfjs(file, int8, prefix=colorstr("TensorFlow.js:")):
|
|||
|
||||
|
||||
def add_tflite_metadata(file, metadata, num_outputs):
|
||||
# Add metadata to *.tflite models per https://www.tensorflow.org/lite/models/convert/metadata
|
||||
"""
|
||||
Adds TFLite metadata to a model file, supporting multiple outputs, as specified by TensorFlow guidelines.
|
||||
|
||||
https://www.tensorflow.org/lite/models/convert/metadata
|
||||
"""
|
||||
with contextlib.suppress(ImportError):
|
||||
# check_requirements('tflite_support')
|
||||
from tflite_support import flatbuffers
|
||||
|
@ -601,7 +619,9 @@ def add_tflite_metadata(file, metadata, num_outputs):
|
|||
|
||||
|
||||
def pipeline_coreml(model, im, file, names, y, prefix=colorstr("CoreML Pipeline:")):
|
||||
# YOLOv5 CoreML pipeline
|
||||
"""Converts a PyTorch YOLOv5 model to CoreML format with NMS, handling different input/output shapes and saving the
|
||||
model.
|
||||
"""
|
||||
import coremltools as ct
|
||||
from PIL import Image
|
||||
|
||||
|
@ -869,6 +889,7 @@ def run(
|
|||
|
||||
|
||||
def parse_opt(known=False):
|
||||
"""Parses command-line arguments for YOLOv5 model export configurations, returning the parsed options."""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--data", type=str, default=ROOT / "data/coco128.yaml", help="dataset.yaml path")
|
||||
parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s.pt", help="model.pt path(s)")
|
||||
|
@ -904,6 +925,7 @@ def parse_opt(known=False):
|
|||
|
||||
|
||||
def main(opt):
|
||||
"""Executes the YOLOv5 model inference or export with specified weights and options."""
|
||||
for opt.weights in opt.weights if isinstance(opt.weights, list) else [opt.weights]:
|
||||
run(**vars(opt))
|
||||
|
||||
|
|
42
hubconf.py
42
hubconf.py
|
@ -84,57 +84,77 @@ def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbo
|
|||
|
||||
|
||||
def custom(path="path/to/model.pt", autoshape=True, _verbose=True, device=None):
|
||||
# YOLOv5 custom or local model
|
||||
"""Loads a custom or local YOLOv5 model from a given path with optional autoshaping and device specification."""
|
||||
return _create(path, autoshape=autoshape, verbose=_verbose, device=device)
|
||||
|
||||
|
||||
def yolov5n(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
|
||||
# YOLOv5-nano model https://github.com/ultralytics/yolov5
|
||||
"""Instantiates the YOLOv5-nano model with options for pretraining, input channels, class count, autoshaping,
|
||||
verbosity, and device.
|
||||
"""
|
||||
return _create("yolov5n", pretrained, channels, classes, autoshape, _verbose, device)
|
||||
|
||||
|
||||
def yolov5s(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
|
||||
# YOLOv5-small model https://github.com/ultralytics/yolov5
|
||||
"""Creates YOLOv5-small model with options for pretraining, input channels, class count, autoshaping, verbosity, and
|
||||
device.
|
||||
"""
|
||||
return _create("yolov5s", pretrained, channels, classes, autoshape, _verbose, device)
|
||||
|
||||
|
||||
def yolov5m(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
|
||||
# YOLOv5-medium model https://github.com/ultralytics/yolov5
|
||||
"""Instantiates the YOLOv5-medium model with customizable pretraining, channel count, class count, autoshaping,
|
||||
verbosity, and device.
|
||||
"""
|
||||
return _create("yolov5m", pretrained, channels, classes, autoshape, _verbose, device)
|
||||
|
||||
|
||||
def yolov5l(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
|
||||
# YOLOv5-large model https://github.com/ultralytics/yolov5
|
||||
"""Creates YOLOv5-large model with options for pretraining, channels, classes, autoshaping, verbosity, and device
|
||||
selection.
|
||||
"""
|
||||
return _create("yolov5l", pretrained, channels, classes, autoshape, _verbose, device)
|
||||
|
||||
|
||||
def yolov5x(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
|
||||
# YOLOv5-xlarge model https://github.com/ultralytics/yolov5
|
||||
"""Instantiates the YOLOv5-xlarge model with customizable pretraining, channel count, class count, autoshaping,
|
||||
verbosity, and device.
|
||||
"""
|
||||
return _create("yolov5x", pretrained, channels, classes, autoshape, _verbose, device)
|
||||
|
||||
|
||||
def yolov5n6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
|
||||
# YOLOv5-nano-P6 model https://github.com/ultralytics/yolov5
|
||||
"""Creates YOLOv5-nano-P6 model with options for pretraining, channels, classes, autoshaping, verbosity, and
|
||||
device.
|
||||
"""
|
||||
return _create("yolov5n6", pretrained, channels, classes, autoshape, _verbose, device)
|
||||
|
||||
|
||||
def yolov5s6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
|
||||
# YOLOv5-small-P6 model https://github.com/ultralytics/yolov5
|
||||
"""Instantiate YOLOv5-small-P6 model with options for pretraining, input channels, number of classes, autoshaping,
|
||||
verbosity, and device selection.
|
||||
"""
|
||||
return _create("yolov5s6", pretrained, channels, classes, autoshape, _verbose, device)
|
||||
|
||||
|
||||
def yolov5m6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
|
||||
# YOLOv5-medium-P6 model https://github.com/ultralytics/yolov5
|
||||
"""Creates YOLOv5-medium-P6 model with options for pretraining, channel count, class count, autoshaping, verbosity,
|
||||
and device.
|
||||
"""
|
||||
return _create("yolov5m6", pretrained, channels, classes, autoshape, _verbose, device)
|
||||
|
||||
|
||||
def yolov5l6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
|
||||
# YOLOv5-large-P6 model https://github.com/ultralytics/yolov5
|
||||
"""Instantiates the YOLOv5-large-P6 model with customizable pretraining, channel and class counts, autoshaping,
|
||||
verbosity, and device selection.
|
||||
"""
|
||||
return _create("yolov5l6", pretrained, channels, classes, autoshape, _verbose, device)
|
||||
|
||||
|
||||
def yolov5x6(pretrained=True, channels=3, classes=80, autoshape=True, _verbose=True, device=None):
|
||||
# YOLOv5-xlarge-P6 model https://github.com/ultralytics/yolov5
|
||||
"""Creates YOLOv5-xlarge-P6 model with options for pretraining, channels, classes, autoshaping, verbosity, and
|
||||
device.
|
||||
"""
|
||||
return _create("yolov5x6", pretrained, channels, classes, autoshape, _verbose, device)
|
||||
|
||||
|
||||
|
|
133
models/common.py
133
models/common.py
|
@ -22,7 +22,7 @@ import torch.nn as nn
|
|||
from PIL import Image
|
||||
from torch.cuda import amp
|
||||
|
||||
# Import 'ultralytics' package or install if if missing
|
||||
# Import 'ultralytics' package or install if missing
|
||||
try:
|
||||
import ultralytics
|
||||
|
||||
|
@ -71,15 +71,18 @@ class Conv(nn.Module):
|
|||
default_act = nn.SiLU() # default activation
|
||||
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
|
||||
"""Initializes a standard convolution layer with optional batch normalization and activation."""
|
||||
super().__init__()
|
||||
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
|
||||
self.bn = nn.BatchNorm2d(c2)
|
||||
self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
|
||||
|
||||
def forward(self, x):
|
||||
"""Applies a convolution followed by batch normalization and an activation function to the input tensor `x`."""
|
||||
return self.act(self.bn(self.conv(x)))
|
||||
|
||||
def forward_fuse(self, x):
|
||||
"""Applies a fused convolution and activation function to the input tensor `x`."""
|
||||
return self.act(self.conv(x))
|
||||
|
||||
|
||||
|
@ -98,6 +101,11 @@ class DWConvTranspose2d(nn.ConvTranspose2d):
|
|||
class TransformerLayer(nn.Module):
|
||||
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
|
||||
def __init__(self, c, num_heads):
|
||||
"""
|
||||
Initializes a transformer layer, sans LayerNorm for performance, with multihead attention and linear layers.
|
||||
|
||||
See as described in https://arxiv.org/abs/2010.11929.
|
||||
"""
|
||||
super().__init__()
|
||||
self.q = nn.Linear(c, c, bias=False)
|
||||
self.k = nn.Linear(c, c, bias=False)
|
||||
|
@ -107,6 +115,7 @@ class TransformerLayer(nn.Module):
|
|||
self.fc2 = nn.Linear(c, c, bias=False)
|
||||
|
||||
def forward(self, x):
|
||||
"""Performs forward pass using MultiheadAttention and two linear transformations with residual connections."""
|
||||
x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
|
||||
x = self.fc2(self.fc1(x)) + x
|
||||
return x
|
||||
|
@ -115,6 +124,9 @@ class TransformerLayer(nn.Module):
|
|||
class TransformerBlock(nn.Module):
|
||||
# Vision Transformer https://arxiv.org/abs/2010.11929
|
||||
def __init__(self, c1, c2, num_heads, num_layers):
|
||||
"""Initializes a Transformer block for vision tasks, adapting dimensions if necessary and stacking specified
|
||||
layers.
|
||||
"""
|
||||
super().__init__()
|
||||
self.conv = None
|
||||
if c1 != c2:
|
||||
|
@ -124,6 +136,9 @@ class TransformerBlock(nn.Module):
|
|||
self.c2 = c2
|
||||
|
||||
def forward(self, x):
|
||||
"""Processes input through an optional convolution, followed by Transformer layers and position embeddings for
|
||||
object detection.
|
||||
"""
|
||||
if self.conv is not None:
|
||||
x = self.conv(x)
|
||||
b, _, w, h = x.shape
|
||||
|
@ -141,6 +156,9 @@ class Bottleneck(nn.Module):
|
|||
self.add = shortcut and c1 == c2
|
||||
|
||||
def forward(self, x):
|
||||
"""Processes input through two convolutions, optionally adds shortcut if channel dimensions match; input is a
|
||||
tensor.
|
||||
"""
|
||||
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
||||
|
||||
|
||||
|
@ -158,6 +176,9 @@ class BottleneckCSP(nn.Module):
|
|||
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
|
||||
|
||||
def forward(self, x):
|
||||
"""Performs forward pass by applying layers, activation, and concatenation on input x, returning feature-
|
||||
enhanced output.
|
||||
"""
|
||||
y1 = self.cv3(self.m(self.cv1(x)))
|
||||
y2 = self.cv2(x)
|
||||
return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
|
||||
|
@ -166,7 +187,12 @@ class BottleneckCSP(nn.Module):
|
|||
class CrossConv(nn.Module):
|
||||
# Cross Convolution Downsample
|
||||
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
|
||||
# ch_in, ch_out, kernel, stride, groups, expansion, shortcut
|
||||
"""
|
||||
Initializes CrossConv with downsampling, expanding, and optionally shortcutting; `c1` input, `c2` output
|
||||
channels.
|
||||
|
||||
Inputs are ch_in, ch_out, kernel, stride, groups, expansion, shortcut.
|
||||
"""
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = Conv(c1, c_, (1, k), (1, s))
|
||||
|
@ -174,6 +200,7 @@ class CrossConv(nn.Module):
|
|||
self.add = shortcut and c1 == c2
|
||||
|
||||
def forward(self, x):
|
||||
"""Performs feature sampling, expanding, and applies shortcut if channels match; expects `x` input tensor."""
|
||||
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
||||
|
||||
|
||||
|
@ -188,12 +215,16 @@ class C3(nn.Module):
|
|||
self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
|
||||
|
||||
def forward(self, x):
|
||||
"""Performs forward propagation using concatenated outputs from two convolutions and a Bottleneck sequence."""
|
||||
return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
|
||||
|
||||
|
||||
class C3x(C3):
|
||||
# C3 module with cross-convolutions
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
||||
"""Initializes C3x module with cross-convolutions, extending C3 with customizable channel dimensions, groups,
|
||||
and expansion.
|
||||
"""
|
||||
super().__init__(c1, c2, n, shortcut, g, e)
|
||||
c_ = int(c2 * e)
|
||||
self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))
|
||||
|
@ -202,6 +233,9 @@ class C3x(C3):
|
|||
class C3TR(C3):
|
||||
# C3 module with TransformerBlock()
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
||||
"""Initializes C3 module with TransformerBlock for enhanced feature extraction, accepts channel sizes, shortcut
|
||||
config, group, and expansion.
|
||||
"""
|
||||
super().__init__(c1, c2, n, shortcut, g, e)
|
||||
c_ = int(c2 * e)
|
||||
self.m = TransformerBlock(c_, c_, 4, n)
|
||||
|
@ -210,6 +244,9 @@ class C3TR(C3):
|
|||
class C3SPP(C3):
|
||||
# C3 module with SPP()
|
||||
def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
|
||||
"""Initializes a C3 module with SPP layer for advanced spatial feature extraction, given channel sizes, kernel
|
||||
sizes, shortcut, group, and expansion ratio.
|
||||
"""
|
||||
super().__init__(c1, c2, n, shortcut, g, e)
|
||||
c_ = int(c2 * e)
|
||||
self.m = SPP(c_, c_, k)
|
||||
|
@ -218,6 +255,7 @@ class C3SPP(C3):
|
|||
class C3Ghost(C3):
|
||||
# C3 module with GhostBottleneck()
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
||||
"""Initializes YOLOv5's C3 module with Ghost Bottlenecks for efficient feature extraction."""
|
||||
super().__init__(c1, c2, n, shortcut, g, e)
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
|
||||
|
@ -226,6 +264,7 @@ class C3Ghost(C3):
|
|||
class SPP(nn.Module):
|
||||
# Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
|
||||
def __init__(self, c1, c2, k=(5, 9, 13)):
|
||||
"""Initializes SPP layer with Spatial Pyramid Pooling, ref: https://arxiv.org/abs/1406.4729, args: c1 (input channels), c2 (output channels), k (kernel sizes)."""
|
||||
super().__init__()
|
||||
c_ = c1 // 2 # hidden channels
|
||||
self.cv1 = Conv(c1, c_, 1, 1)
|
||||
|
@ -233,6 +272,9 @@ class SPP(nn.Module):
|
|||
self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
|
||||
|
||||
def forward(self, x):
|
||||
"""Applies convolution and max pooling layers to the input tensor `x`, concatenates results, and returns output
|
||||
tensor.
|
||||
"""
|
||||
x = self.cv1(x)
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore") # suppress torch 1.9.0 max_pool2d() warning
|
||||
|
@ -249,6 +291,7 @@ class SPPF(nn.Module):
|
|||
self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
|
||||
|
||||
def forward(self, x):
|
||||
"""Processes input through a series of convolutions and max pooling operations for feature extraction."""
|
||||
x = self.cv1(x)
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore") # suppress torch 1.9.0 max_pool2d() warning
|
||||
|
@ -278,6 +321,7 @@ class GhostConv(nn.Module):
|
|||
self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
|
||||
|
||||
def forward(self, x):
|
||||
"""Performs forward pass, concatenating outputs of two convolutions on input `x`: shape (B,C,H,W)."""
|
||||
y = self.cv1(x)
|
||||
return torch.cat((y, self.cv2(y)), 1)
|
||||
|
||||
|
@ -297,16 +341,23 @@ class GhostBottleneck(nn.Module):
|
|||
)
|
||||
|
||||
def forward(self, x):
|
||||
"""Processes input through conv and shortcut layers, returning their summed output."""
|
||||
return self.conv(x) + self.shortcut(x)
|
||||
|
||||
|
||||
class Contract(nn.Module):
|
||||
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
|
||||
def __init__(self, gain=2):
|
||||
"""Initializes a layer to contract spatial dimensions (width-height) into channels, e.g., input shape
|
||||
(1,64,80,80) to (1,256,40,40).
|
||||
"""
|
||||
super().__init__()
|
||||
self.gain = gain
|
||||
|
||||
def forward(self, x):
|
||||
"""Processes input tensor to expand channel dimensions by contracting spatial dimensions, yielding output shape
|
||||
`(b, c*s*s, h//s, w//s)`.
|
||||
"""
|
||||
b, c, h, w = x.size() # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'
|
||||
s = self.gain
|
||||
x = x.view(b, c, h // s, s, w // s, s) # x(1,64,40,2,40,2)
|
||||
|
@ -317,10 +368,19 @@ class Contract(nn.Module):
|
|||
class Expand(nn.Module):
|
||||
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
|
||||
def __init__(self, gain=2):
|
||||
"""
|
||||
Initializes the Expand module to increase spatial dimensions by redistributing channels, with an optional gain
|
||||
factor.
|
||||
|
||||
Example: x(1,64,80,80) to x(1,16,160,160).
|
||||
"""
|
||||
super().__init__()
|
||||
self.gain = gain
|
||||
|
||||
def forward(self, x):
|
||||
"""Processes input tensor x to expand spatial dimensions by redistributing channels, requiring C / gain^2 ==
|
||||
0.
|
||||
"""
|
||||
b, c, h, w = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
|
||||
s = self.gain
|
||||
x = x.view(b, s, s, c // s**2, h, w) # x(1,2,2,16,80,80)
|
||||
|
@ -331,17 +391,21 @@ class Expand(nn.Module):
|
|||
class Concat(nn.Module):
|
||||
# Concatenate a list of tensors along dimension
|
||||
def __init__(self, dimension=1):
|
||||
"""Initializes a Concat module to concatenate tensors along a specified dimension."""
|
||||
super().__init__()
|
||||
self.d = dimension
|
||||
|
||||
def forward(self, x):
|
||||
"""Concatenates a list of tensors along a specified dimension; `x` is a list of tensors, `dimension` is an
|
||||
int.
|
||||
"""
|
||||
return torch.cat(x, self.d)
|
||||
|
||||
|
||||
class DetectMultiBackend(nn.Module):
|
||||
# YOLOv5 MultiBackend class for python inference on various backends
|
||||
def __init__(self, weights="yolov5s.pt", device=torch.device("cpu"), dnn=False, data=None, fp16=False, fuse=True):
|
||||
# Usage:
|
||||
"""Initializes DetectMultiBackend with support for various inference backends, including PyTorch and ONNX."""
|
||||
# PyTorch: weights = *.pt
|
||||
# TorchScript: *.torchscript
|
||||
# ONNX Runtime: *.onnx
|
||||
|
@ -462,11 +526,13 @@ class DetectMultiBackend(nn.Module):
|
|||
import tensorflow as tf
|
||||
|
||||
def wrap_frozen_graph(gd, inputs, outputs):
|
||||
"""Wraps a TensorFlow GraphDef for inference, returning a pruned function."""
|
||||
x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped
|
||||
ge = x.graph.as_graph_element
|
||||
return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
|
||||
|
||||
def gd_outputs(gd):
|
||||
"""Generates a sorted list of graph outputs excluding NoOp nodes and inputs, formatted as '<name>:0'."""
|
||||
name_list, input_list = [], []
|
||||
for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef
|
||||
name_list.append(node.name)
|
||||
|
@ -540,7 +606,7 @@ class DetectMultiBackend(nn.Module):
|
|||
self.__dict__.update(locals()) # assign all variables to self
|
||||
|
||||
def forward(self, im, augment=False, visualize=False):
|
||||
# YOLOv5 MultiBackend inference
|
||||
"""Performs YOLOv5 inference on input images with options for augmentation and visualization."""
|
||||
b, ch, h, w = im.shape # batch, channel, height, width
|
||||
if self.fp16 and im.dtype != torch.float16:
|
||||
im = im.half() # to FP16
|
||||
|
@ -622,10 +688,11 @@ class DetectMultiBackend(nn.Module):
|
|||
return self.from_numpy(y)
|
||||
|
||||
def from_numpy(self, x):
|
||||
"""Converts a NumPy array to a torch tensor, maintaining device compatibility."""
|
||||
return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x
|
||||
|
||||
def warmup(self, imgsz=(1, 3, 640, 640)):
|
||||
# Warmup model by running inference once
|
||||
"""Performs a single inference warmup to initialize model weights, accepting an `imgsz` tuple for image size."""
|
||||
warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton
|
||||
if any(warmup_types) and (self.device.type != "cpu" or self.triton):
|
||||
im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input
|
||||
|
@ -634,7 +701,11 @@ class DetectMultiBackend(nn.Module):
|
|||
|
||||
@staticmethod
|
||||
def _model_type(p="path/to/model.pt"):
|
||||
# Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx
|
||||
"""
|
||||
Determines model type from file path or URL, supporting various export formats.
|
||||
|
||||
Example: path='path/to/model.onnx' -> type=onnx
|
||||
"""
|
||||
# types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]
|
||||
from export import export_formats
|
||||
from utils.downloads import is_url
|
||||
|
@ -650,7 +721,7 @@ class DetectMultiBackend(nn.Module):
|
|||
|
||||
@staticmethod
|
||||
def _load_metadata(f=Path("path/to/meta.yaml")):
|
||||
# Load metadata from meta.yaml if it exists
|
||||
"""Loads metadata from a YAML file, returning strides and names if the file exists, otherwise `None`."""
|
||||
if f.exists():
|
||||
d = yaml_load(f)
|
||||
return d["stride"], d["names"] # assign stride, names
|
||||
|
@ -668,6 +739,7 @@ class AutoShape(nn.Module):
|
|||
amp = False # Automatic Mixed Precision (AMP) inference
|
||||
|
||||
def __init__(self, model, verbose=True):
|
||||
"""Initializes YOLOv5 model for inference, setting up attributes and preparing model for evaluation."""
|
||||
super().__init__()
|
||||
if verbose:
|
||||
LOGGER.info("Adding AutoShape... ")
|
||||
|
@ -681,7 +753,11 @@ class AutoShape(nn.Module):
|
|||
m.export = True # do not output loss values
|
||||
|
||||
def _apply(self, fn):
|
||||
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
|
||||
"""
|
||||
Applies to(), cpu(), cuda(), half() etc.
|
||||
|
||||
to model tensors excluding parameters or registered buffers.
|
||||
"""
|
||||
self = super()._apply(fn)
|
||||
if self.pt:
|
||||
m = self.model.model.model[-1] if self.dmb else self.model.model[-1] # Detect()
|
||||
|
@ -693,7 +769,12 @@ class AutoShape(nn.Module):
|
|||
|
||||
@smart_inference_mode()
|
||||
def forward(self, ims, size=640, augment=False, profile=False):
|
||||
# Inference from various sources. For size(height=640, width=1280), RGB images example inputs are:
|
||||
"""
|
||||
Performs inference on inputs with optional augment & profiling.
|
||||
|
||||
Supports various formats including file, URI, OpenCV, PIL, numpy, torch.
|
||||
"""
|
||||
# For size(height=640, width=1280), RGB images example inputs are:
|
||||
# file: ims = 'data/images/zidane.jpg' # str or PosixPath
|
||||
# URI: = 'https://ultralytics.com/images/zidane.jpg'
|
||||
# OpenCV: = cv2.imread('image.jpg')[:,:,::-1] # HWC BGR to RGB x(640,1280,3)
|
||||
|
@ -761,6 +842,7 @@ class AutoShape(nn.Module):
|
|||
class Detections:
|
||||
# YOLOv5 detections class for inference results
|
||||
def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
|
||||
"""Initializes the YOLOv5 Detections class with image info, predictions, filenames, timing and normalization."""
|
||||
super().__init__()
|
||||
d = pred[0].device # device
|
||||
gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims] # normalizations
|
||||
|
@ -778,6 +860,7 @@ class Detections:
|
|||
self.s = tuple(shape) # inference BCHW shape
|
||||
|
||||
def _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path("")):
|
||||
"""Executes model predictions, displaying and/or saving outputs with optional crops and labels."""
|
||||
s, crops = "", []
|
||||
for i, (im, pred) in enumerate(zip(self.ims, self.pred)):
|
||||
s += f"\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} " # string
|
||||
|
@ -832,22 +915,42 @@ class Detections:
|
|||
|
||||
@TryExcept("Showing images is not supported in this environment")
|
||||
def show(self, labels=True):
|
||||
"""
|
||||
Displays detection results with optional labels.
|
||||
|
||||
Usage: show(labels=True)
|
||||
"""
|
||||
self._run(show=True, labels=labels) # show results
|
||||
|
||||
def save(self, labels=True, save_dir="runs/detect/exp", exist_ok=False):
|
||||
"""
|
||||
Saves detection results with optional labels to a specified directory.
|
||||
|
||||
Usage: save(labels=True, save_dir='runs/detect/exp', exist_ok=False)
|
||||
"""
|
||||
save_dir = increment_path(save_dir, exist_ok, mkdir=True) # increment save_dir
|
||||
self._run(save=True, labels=labels, save_dir=save_dir) # save results
|
||||
|
||||
def crop(self, save=True, save_dir="runs/detect/exp", exist_ok=False):
|
||||
"""
|
||||
Crops detection results, optionally saves them to a directory.
|
||||
|
||||
Args: save (bool), save_dir (str), exist_ok (bool).
|
||||
"""
|
||||
save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else None
|
||||
return self._run(crop=True, save=save, save_dir=save_dir) # crop results
|
||||
|
||||
def render(self, labels=True):
|
||||
"""Renders detection results with optional labels on images; args: labels (bool) indicating label inclusion."""
|
||||
self._run(render=True, labels=labels) # render results
|
||||
return self.ims
|
||||
|
||||
def pandas(self):
|
||||
# return detections as pandas DataFrames, i.e. print(results.pandas().xyxy[0])
|
||||
"""
|
||||
Returns detections as pandas DataFrames for various box formats (xyxy, xyxyn, xywh, xywhn).
|
||||
|
||||
Example: print(results.pandas().xyxy[0]).
|
||||
"""
|
||||
new = copy(self) # return copy
|
||||
ca = "xmin", "ymin", "xmax", "ymax", "confidence", "class", "name" # xyxy columns
|
||||
cb = "xcenter", "ycenter", "width", "height", "confidence", "class", "name" # xywh columns
|
||||
|
@ -857,7 +960,11 @@ class Detections:
|
|||
return new
|
||||
|
||||
def tolist(self):
|
||||
# return a list of Detections objects, i.e. 'for result in results.tolist():'
|
||||
"""
|
||||
Converts a Detections object into a list of individual detection results for iteration.
|
||||
|
||||
Example: for result in results.tolist():
|
||||
"""
|
||||
r = range(self.n) # iterable
|
||||
return [
|
||||
Detections(
|
||||
|
@ -872,6 +979,7 @@ class Detections:
|
|||
]
|
||||
|
||||
def print(self):
|
||||
"""Logs the string representation of the current object's state via the LOGGER."""
|
||||
LOGGER.info(self.__str__())
|
||||
|
||||
def __len__(self): # override len(results)
|
||||
|
@ -881,6 +989,7 @@ class Detections:
|
|||
return self._run(pprint=True) # print results
|
||||
|
||||
def __repr__(self):
|
||||
"""Returns a string representation of the YOLOv5 object, including its class and formatted results."""
|
||||
return f"YOLOv5 {self.__class__} instance\n" + self.__str__()
|
||||
|
||||
|
||||
|
@ -894,6 +1003,7 @@ class Proto(nn.Module):
|
|||
self.cv3 = Conv(c_, c2)
|
||||
|
||||
def forward(self, x):
|
||||
"""Performs a forward pass using convolutional layers and upsampling on input tensor `x`."""
|
||||
return self.cv3(self.cv2(self.upsample(self.cv1(x))))
|
||||
|
||||
|
||||
|
@ -910,6 +1020,7 @@ class Classify(nn.Module):
|
|||
self.linear = nn.Linear(c_, c2) # to x(b,c2)
|
||||
|
||||
def forward(self, x):
|
||||
"""Processes input through conv, pool, drop, and linear layers; supports list concatenation input."""
|
||||
if isinstance(x, list):
|
||||
x = torch.cat(x, 1)
|
||||
return self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
|
||||
|
|
|
@ -19,6 +19,7 @@ class Sum(nn.Module):
|
|||
self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights
|
||||
|
||||
def forward(self, x):
|
||||
"""Processes input through a customizable weighted sum of `n` inputs, optionally applying learned weights."""
|
||||
y = x[0] # no weight
|
||||
if self.weight:
|
||||
w = torch.sigmoid(self.w) * 2
|
||||
|
@ -53,15 +54,21 @@ class MixConv2d(nn.Module):
|
|||
self.act = nn.SiLU()
|
||||
|
||||
def forward(self, x):
|
||||
"""Performs forward pass by applying SiLU activation on batch-normalized concatenated convolutional layer
|
||||
outputs.
|
||||
"""
|
||||
return self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
|
||||
|
||||
|
||||
class Ensemble(nn.ModuleList):
|
||||
# Ensemble of models
|
||||
"""Ensemble of models."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initializes an ensemble of models to be used for aggregated predictions."""
|
||||
super().__init__()
|
||||
|
||||
def forward(self, x, augment=False, profile=False, visualize=False):
|
||||
"""Performs forward pass aggregating outputs from an ensemble of models.."""
|
||||
y = [module(x, augment, profile, visualize)[0] for module in self]
|
||||
# y = torch.stack(y).max(0)[0] # max ensemble
|
||||
# y = torch.stack(y).mean(0) # mean ensemble
|
||||
|
@ -70,7 +77,11 @@ class Ensemble(nn.ModuleList):
|
|||
|
||||
|
||||
def attempt_load(weights, device=None, inplace=True, fuse=True):
|
||||
# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
|
||||
"""
|
||||
Loads and fuses an ensemble or single YOLOv5 model from weights, handling device placement and model adjustments.
|
||||
|
||||
Example inputs: weights=[a,b,c] or a single model weights=[a] or weights=a.
|
||||
"""
|
||||
from models.yolo import Detect, Model
|
||||
|
||||
model = Ensemble()
|
||||
|
|
114
models/tf.py
114
models/tf.py
|
@ -51,6 +51,7 @@ from utils.general import LOGGER, make_divisible, print_args
|
|||
class TFBN(keras.layers.Layer):
|
||||
# TensorFlow BatchNormalization wrapper
|
||||
def __init__(self, w=None):
|
||||
"""Initializes a TensorFlow BatchNormalization layer with optional pretrained weights."""
|
||||
super().__init__()
|
||||
self.bn = keras.layers.BatchNormalization(
|
||||
beta_initializer=keras.initializers.Constant(w.bias.numpy()),
|
||||
|
@ -61,12 +62,19 @@ class TFBN(keras.layers.Layer):
|
|||
)
|
||||
|
||||
def call(self, inputs):
|
||||
"""Applies batch normalization to the inputs."""
|
||||
return self.bn(inputs)
|
||||
|
||||
|
||||
class TFPad(keras.layers.Layer):
|
||||
# Pad inputs in spatial dimensions 1 and 2
|
||||
def __init__(self, pad):
|
||||
"""
|
||||
Initializes a padding layer for spatial dimensions 1 and 2 with specified padding, supporting both int and tuple
|
||||
inputs.
|
||||
|
||||
Inputs are
|
||||
"""
|
||||
super().__init__()
|
||||
if isinstance(pad, int):
|
||||
self.pad = tf.constant([[0, 0], [pad, pad], [pad, pad], [0, 0]])
|
||||
|
@ -74,13 +82,19 @@ class TFPad(keras.layers.Layer):
|
|||
self.pad = tf.constant([[0, 0], [pad[0], pad[0]], [pad[1], pad[1]], [0, 0]])
|
||||
|
||||
def call(self, inputs):
|
||||
"""Pads input tensor with zeros using specified padding, suitable for int and tuple pad dimensions."""
|
||||
return tf.pad(inputs, self.pad, mode="constant", constant_values=0)
|
||||
|
||||
|
||||
class TFConv(keras.layers.Layer):
|
||||
# Standard convolution
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
|
||||
# ch_in, ch_out, weights, kernel, stride, padding, groups
|
||||
"""
|
||||
Initializes a standard convolution layer with optional batch normalization and activation; supports only
|
||||
group=1.
|
||||
|
||||
Inputs are ch_in, ch_out, weights, kernel, stride, padding, groups.
|
||||
"""
|
||||
super().__init__()
|
||||
assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
|
||||
# TensorFlow convolution padding is inconsistent with PyTorch (e.g. k=3 s=2 'SAME' padding)
|
||||
|
@ -99,13 +113,19 @@ class TFConv(keras.layers.Layer):
|
|||
self.act = activations(w.act) if act else tf.identity
|
||||
|
||||
def call(self, inputs):
|
||||
"""Applies convolution, batch normalization, and activation function to input tensors."""
|
||||
return self.act(self.bn(self.conv(inputs)))
|
||||
|
||||
|
||||
class TFDWConv(keras.layers.Layer):
|
||||
# Depthwise convolution
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, act=True, w=None):
|
||||
# ch_in, ch_out, weights, kernel, stride, padding, groups
|
||||
"""
|
||||
Initializes a depthwise convolution layer with optional batch normalization and activation for TensorFlow
|
||||
models.
|
||||
|
||||
Input are ch_in, ch_out, weights, kernel, stride, padding, groups.
|
||||
"""
|
||||
super().__init__()
|
||||
assert c2 % c1 == 0, f"TFDWConv() output={c2} must be a multiple of input={c1} channels"
|
||||
conv = keras.layers.DepthwiseConv2D(
|
||||
|
@ -122,13 +142,18 @@ class TFDWConv(keras.layers.Layer):
|
|||
self.act = activations(w.act) if act else tf.identity
|
||||
|
||||
def call(self, inputs):
|
||||
"""Applies convolution, batch normalization, and activation function to input tensors."""
|
||||
return self.act(self.bn(self.conv(inputs)))
|
||||
|
||||
|
||||
class TFDWConvTranspose2d(keras.layers.Layer):
|
||||
# Depthwise ConvTranspose2d
|
||||
def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0, w=None):
|
||||
# ch_in, ch_out, weights, kernel, stride, padding, groups
|
||||
"""
|
||||
Initializes depthwise ConvTranspose2D layer with specific channel, kernel, stride, and padding settings.
|
||||
|
||||
Inputs are ch_in, ch_out, weights, kernel, stride, padding, groups.
|
||||
"""
|
||||
super().__init__()
|
||||
assert c1 == c2, f"TFDWConv() output={c2} must be equal to input={c1} channels"
|
||||
assert k == 4 and p1 == 1, "TFDWConv() only valid for k=4 and p1=1"
|
||||
|
@ -149,13 +174,19 @@ class TFDWConvTranspose2d(keras.layers.Layer):
|
|||
]
|
||||
|
||||
def call(self, inputs):
|
||||
"""Processes input through parallel convolutions and concatenates results, trimming border pixels."""
|
||||
return tf.concat([m(x) for m, x in zip(self.conv, tf.split(inputs, self.c1, 3))], 3)[:, 1:-1, 1:-1]
|
||||
|
||||
|
||||
class TFFocus(keras.layers.Layer):
|
||||
# Focus wh information into c-space
|
||||
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
|
||||
# ch_in, ch_out, kernel, stride, padding, groups
|
||||
"""
|
||||
Initializes TFFocus layer to focus width and height information into channel space with custom convolution
|
||||
parameters.
|
||||
|
||||
Inputs are ch_in, ch_out, kernel, stride, padding, groups.
|
||||
"""
|
||||
super().__init__()
|
||||
self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
|
||||
|
||||
|
@ -175,12 +206,16 @@ class TFBottleneck(keras.layers.Layer):
|
|||
self.add = shortcut and c1 == c2
|
||||
|
||||
def call(self, inputs):
|
||||
"""Performs forward pass; if shortcut is True & input/output channels match, adds input to the convolution
|
||||
result.
|
||||
"""
|
||||
return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
|
||||
|
||||
|
||||
class TFCrossConv(keras.layers.Layer):
|
||||
# Cross Convolution
|
||||
def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False, w=None):
|
||||
"""Initializes cross convolution layer with optional expansion, grouping, and shortcut addition capabilities."""
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = TFConv(c1, c_, (1, k), (1, s), w=w.cv1)
|
||||
|
@ -188,12 +223,16 @@ class TFCrossConv(keras.layers.Layer):
|
|||
self.add = shortcut and c1 == c2
|
||||
|
||||
def call(self, inputs):
|
||||
"""Passes input through two convolutions optionally adding the input if channel dimensions match."""
|
||||
return inputs + self.cv2(self.cv1(inputs)) if self.add else self.cv2(self.cv1(inputs))
|
||||
|
||||
|
||||
class TFConv2d(keras.layers.Layer):
|
||||
# Substitution for PyTorch nn.Conv2D
|
||||
def __init__(self, c1, c2, k, s=1, g=1, bias=True, w=None):
|
||||
"""Initializes a TensorFlow 2D convolution layer, mimicking PyTorch's nn.Conv2D functionality for given filter
|
||||
sizes and stride.
|
||||
"""
|
||||
super().__init__()
|
||||
assert g == 1, "TF v2.2 Conv2D does not support 'groups' argument"
|
||||
self.conv = keras.layers.Conv2D(
|
||||
|
@ -207,13 +246,19 @@ class TFConv2d(keras.layers.Layer):
|
|||
)
|
||||
|
||||
def call(self, inputs):
|
||||
"""Applies a convolution operation to the inputs and returns the result."""
|
||||
return self.conv(inputs)
|
||||
|
||||
|
||||
class TFBottleneckCSP(keras.layers.Layer):
|
||||
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
|
||||
# ch_in, ch_out, number, shortcut, groups, expansion
|
||||
"""
|
||||
Initializes CSP bottleneck layer with specified channel sizes, count, shortcut option, groups, and expansion
|
||||
ratio.
|
||||
|
||||
Inputs are ch_in, ch_out, number, shortcut, groups, expansion.
|
||||
"""
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
|
||||
|
@ -225,6 +270,9 @@ class TFBottleneckCSP(keras.layers.Layer):
|
|||
self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
|
||||
|
||||
def call(self, inputs):
|
||||
"""Processes input through the model layers, concatenates, normalizes, activates, and reduces the output
|
||||
dimensions.
|
||||
"""
|
||||
y1 = self.cv3(self.m(self.cv1(inputs)))
|
||||
y2 = self.cv2(inputs)
|
||||
return self.cv4(self.act(self.bn(tf.concat((y1, y2), axis=3))))
|
||||
|
@ -233,7 +281,11 @@ class TFBottleneckCSP(keras.layers.Layer):
|
|||
class TFC3(keras.layers.Layer):
|
||||
# CSP Bottleneck with 3 convolutions
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
|
||||
# ch_in, ch_out, number, shortcut, groups, expansion
|
||||
"""
|
||||
Initializes CSP Bottleneck with 3 convolutions, supporting optional shortcuts and group convolutions.
|
||||
|
||||
Inputs are ch_in, ch_out, number, shortcut, groups, expansion.
|
||||
"""
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
|
||||
|
@ -242,13 +294,22 @@ class TFC3(keras.layers.Layer):
|
|||
self.m = keras.Sequential([TFBottleneck(c_, c_, shortcut, g, e=1.0, w=w.m[j]) for j in range(n)])
|
||||
|
||||
def call(self, inputs):
|
||||
"""
|
||||
Processes input through a sequence of transformations for object detection (YOLOv5).
|
||||
|
||||
See https://github.com/ultralytics/yolov5.
|
||||
"""
|
||||
return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
|
||||
|
||||
|
||||
class TFC3x(keras.layers.Layer):
|
||||
# 3 module with cross-convolutions
|
||||
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, w=None):
|
||||
# ch_in, ch_out, number, shortcut, groups, expansion
|
||||
"""
|
||||
Initializes layer with cross-convolutions for enhanced feature extraction in object detection models.
|
||||
|
||||
Inputs are ch_in, ch_out, number, shortcut, groups, expansion.
|
||||
"""
|
||||
super().__init__()
|
||||
c_ = int(c2 * e) # hidden channels
|
||||
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
|
||||
|
@ -259,12 +320,14 @@ class TFC3x(keras.layers.Layer):
|
|||
)
|
||||
|
||||
def call(self, inputs):
|
||||
"""Processes input through cascaded convolutions and merges features, returning the final tensor output."""
|
||||
return self.cv3(tf.concat((self.m(self.cv1(inputs)), self.cv2(inputs)), axis=3))
|
||||
|
||||
|
||||
class TFSPP(keras.layers.Layer):
|
||||
# Spatial pyramid pooling layer used in YOLOv3-SPP
|
||||
def __init__(self, c1, c2, k=(5, 9, 13), w=None):
|
||||
"""Initializes a YOLOv3-SPP layer with specific input/output channels and kernel sizes for pooling."""
|
||||
super().__init__()
|
||||
c_ = c1 // 2 # hidden channels
|
||||
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
|
||||
|
@ -272,6 +335,7 @@ class TFSPP(keras.layers.Layer):
|
|||
self.m = [keras.layers.MaxPool2D(pool_size=x, strides=1, padding="SAME") for x in k]
|
||||
|
||||
def call(self, inputs):
|
||||
"""Processes input through two TFConv layers and concatenates with max-pooled outputs at intermediate stage."""
|
||||
x = self.cv1(inputs)
|
||||
return self.cv2(tf.concat([x] + [m(x) for m in self.m], 3))
|
||||
|
||||
|
@ -279,6 +343,9 @@ class TFSPP(keras.layers.Layer):
|
|||
class TFSPPF(keras.layers.Layer):
|
||||
# Spatial pyramid pooling-Fast layer
|
||||
def __init__(self, c1, c2, k=5, w=None):
|
||||
"""Initializes a fast spatial pyramid pooling layer with customizable in/out channels, kernel size, and
|
||||
weights.
|
||||
"""
|
||||
super().__init__()
|
||||
c_ = c1 // 2 # hidden channels
|
||||
self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
|
||||
|
@ -286,6 +353,9 @@ class TFSPPF(keras.layers.Layer):
|
|||
self.m = keras.layers.MaxPool2D(pool_size=k, strides=1, padding="SAME")
|
||||
|
||||
def call(self, inputs):
|
||||
"""Executes the model's forward pass, concatenating input features with three max-pooled versions before final
|
||||
convolution.
|
||||
"""
|
||||
x = self.cv1(inputs)
|
||||
y1 = self.m(x)
|
||||
y2 = self.m(y1)
|
||||
|
@ -312,6 +382,7 @@ class TFDetect(keras.layers.Layer):
|
|||
self.grid[i] = self._make_grid(nx, ny)
|
||||
|
||||
def call(self, inputs):
|
||||
"""Performs forward pass through the model layers to predict object bounding boxes and classifications."""
|
||||
z = [] # inference output
|
||||
x = []
|
||||
for i in range(self.nl):
|
||||
|
@ -336,7 +407,7 @@ class TFDetect(keras.layers.Layer):
|
|||
|
||||
@staticmethod
|
||||
def _make_grid(nx=20, ny=20):
|
||||
# yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
|
||||
"""Generates a 2D grid of coordinates in (x, y) format with shape [1, 1, ny*nx, 2]."""
|
||||
# return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
|
||||
xv, yv = tf.meshgrid(tf.range(nx), tf.range(ny))
|
||||
return tf.cast(tf.reshape(tf.stack([xv, yv], 2), [1, 1, ny * nx, 2]), dtype=tf.float32)
|
||||
|
@ -345,6 +416,9 @@ class TFDetect(keras.layers.Layer):
|
|||
class TFSegment(TFDetect):
|
||||
# YOLOv5 Segment head for segmentation models
|
||||
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), imgsz=(640, 640), w=None):
|
||||
"""Initializes YOLOv5 Segment head with specified channel depths, anchors, and input size for segmentation
|
||||
models.
|
||||
"""
|
||||
super().__init__(nc, anchors, ch, imgsz, w)
|
||||
self.nm = nm # number of masks
|
||||
self.npr = npr # number of protos
|
||||
|
@ -354,6 +428,7 @@ class TFSegment(TFDetect):
|
|||
self.detect = TFDetect.call
|
||||
|
||||
def call(self, x):
|
||||
"""Applies detection and proto layers on input, returning detections and optionally protos if training."""
|
||||
p = self.proto(x[0])
|
||||
# p = TFUpsample(None, scale_factor=4, mode='nearest')(self.proto(x[0])) # (optional) full-size protos
|
||||
p = tf.transpose(p, [0, 3, 1, 2]) # from shape(1,160,160,32) to shape(1,32,160,160)
|
||||
|
@ -363,6 +438,9 @@ class TFSegment(TFDetect):
|
|||
|
||||
class TFProto(keras.layers.Layer):
|
||||
def __init__(self, c1, c_=256, c2=32, w=None):
|
||||
"""Initializes TFProto layer with convolutional and upsampling layers for feature extraction and
|
||||
transformation.
|
||||
"""
|
||||
super().__init__()
|
||||
self.cv1 = TFConv(c1, c_, k=3, w=w.cv1)
|
||||
self.upsample = TFUpsample(None, scale_factor=2, mode="nearest")
|
||||
|
@ -370,6 +448,7 @@ class TFProto(keras.layers.Layer):
|
|||
self.cv3 = TFConv(c_, c2, w=w.cv3)
|
||||
|
||||
def call(self, inputs):
|
||||
"""Performs forward pass through the model, applying convolutions and upscaling on input tensor."""
|
||||
return self.cv3(self.cv2(self.upsample(self.cv1(inputs))))
|
||||
|
||||
|
||||
|
@ -385,17 +464,20 @@ class TFUpsample(keras.layers.Layer):
|
|||
# size=(x.shape[1] * 2, x.shape[2] * 2))
|
||||
|
||||
def call(self, inputs):
|
||||
"""Applies upsample operation to inputs using nearest neighbor interpolation."""
|
||||
return self.upsample(inputs)
|
||||
|
||||
|
||||
class TFConcat(keras.layers.Layer):
|
||||
# TF version of torch.concat()
|
||||
def __init__(self, dimension=1, w=None):
|
||||
"""Initializes a TensorFlow layer for NCHW to NHWC concatenation, requiring dimension=1."""
|
||||
super().__init__()
|
||||
assert dimension == 1, "convert only NCHW to NHWC concat"
|
||||
self.d = 3
|
||||
|
||||
def call(self, inputs):
|
||||
"""Concatenates a list of tensors along the last dimension, used for NCHW to NHWC conversion."""
|
||||
return tf.concat(inputs, self.d)
|
||||
|
||||
|
||||
|
@ -539,7 +621,9 @@ class TFModel:
|
|||
|
||||
@staticmethod
|
||||
def _xywh2xyxy(xywh):
|
||||
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
|
||||
"""Converts bounding box format from [x, y, w, h] to [x1, y1, x2, y2], where xy1=top-left and xy2=bottom-
|
||||
right.
|
||||
"""
|
||||
x, y, w, h = tf.split(xywh, num_or_size_splits=4, axis=-1)
|
||||
return tf.concat([x - w / 2, y - h / 2, x + w / 2, y + h / 2], axis=-1)
|
||||
|
||||
|
@ -547,7 +631,7 @@ class TFModel:
|
|||
class AgnosticNMS(keras.layers.Layer):
|
||||
# TF Agnostic NMS
|
||||
def call(self, input, topk_all, iou_thres, conf_thres):
|
||||
# wrap map_fn to avoid TypeSpec related error https://stackoverflow.com/a/65809989/3036450
|
||||
"""Performs agnostic NMS on input tensors using given thresholds and top-K selection."""
|
||||
return tf.map_fn(
|
||||
lambda x: self._nms(x, topk_all, iou_thres, conf_thres),
|
||||
input,
|
||||
|
@ -589,7 +673,7 @@ class AgnosticNMS(keras.layers.Layer):
|
|||
|
||||
|
||||
def activations(act=nn.SiLU):
|
||||
# Returns TF activation from input PyTorch activation
|
||||
"""Converts PyTorch activations to TensorFlow equivalents, supporting LeakyReLU, Hardswish, and SiLU/Swish."""
|
||||
if isinstance(act, nn.LeakyReLU):
|
||||
return lambda x: keras.activations.relu(x, alpha=0.1)
|
||||
elif isinstance(act, nn.Hardswish):
|
||||
|
@ -601,7 +685,9 @@ def activations(act=nn.SiLU):
|
|||
|
||||
|
||||
def representative_dataset_gen(dataset, ncalib=100):
|
||||
# Representative dataset generator for use with converter.representative_dataset, returns a generator of np arrays
|
||||
"""Generates a representative dataset for calibration by yielding transformed numpy arrays from the input
|
||||
dataset.
|
||||
"""
|
||||
for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
|
||||
im = np.transpose(img, [1, 2, 0])
|
||||
im = np.expand_dims(im, axis=0).astype(np.float32)
|
||||
|
@ -637,6 +723,9 @@ def run(
|
|||
|
||||
|
||||
def parse_opt():
|
||||
"""Parses and returns command-line options for model inference, including weights path, image size, batch size, and
|
||||
dynamic batching.
|
||||
"""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--weights", type=str, default=ROOT / "yolov5s.pt", help="weights path")
|
||||
parser.add_argument("--imgsz", "--img", "--img-size", nargs="+", type=int, default=[640], help="inference size h,w")
|
||||
|
@ -649,6 +738,7 @@ def parse_opt():
|
|||
|
||||
|
||||
def main(opt):
|
||||
"""Executes the YOLOv5 model run function with parsed command line options."""
|
||||
run(**vars(opt))
|
||||
|
||||
|
||||
|
|
|
@ -88,6 +88,7 @@ class Detect(nn.Module):
|
|||
self.inplace = inplace # use inplace ops (e.g. slice assignment)
|
||||
|
||||
def forward(self, x):
|
||||
"""Processes input through YOLOv5 layers, altering shape for detection: `x(bs, 3, ny, nx, 85)`."""
|
||||
z = [] # inference output
|
||||
for i in range(self.nl):
|
||||
x[i] = self.m[i](x[i]) # conv
|
||||
|
@ -113,6 +114,7 @@ class Detect(nn.Module):
|
|||
return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x)
|
||||
|
||||
def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, "1.10.0")):
|
||||
"""Generates a mesh grid for anchor boxes with optional compatibility for torch versions < 1.10."""
|
||||
d = self.anchors[i].device
|
||||
t = self.anchors[i].dtype
|
||||
shape = 1, self.na, ny, nx, 2 # grid shape
|
||||
|
@ -126,6 +128,7 @@ class Detect(nn.Module):
|
|||
class Segment(Detect):
|
||||
# YOLOv5 Segment head for segmentation models
|
||||
def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True):
|
||||
"""Initializes YOLOv5 Segment head with options for mask count, protos, and channel adjustments."""
|
||||
super().__init__(nc, anchors, ch, inplace)
|
||||
self.nm = nm # number of masks
|
||||
self.npr = npr # number of protos
|
||||
|
@ -135,17 +138,25 @@ class Segment(Detect):
|
|||
self.detect = Detect.forward
|
||||
|
||||
def forward(self, x):
|
||||
"""Processes input through the network, returning detections and prototypes; adjusts output based on
|
||||
training/export mode.
|
||||
"""
|
||||
p = self.proto(x[0])
|
||||
x = self.detect(self, x)
|
||||
return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1])
|
||||
|
||||
|
||||
class BaseModel(nn.Module):
|
||||
# YOLOv5 base model
|
||||
"""YOLOv5 base model."""
|
||||
|
||||
def forward(self, x, profile=False, visualize=False):
|
||||
"""Executes a single-scale inference or training pass on the YOLOv5 base model, with options for profiling and
|
||||
visualization.
|
||||
"""
|
||||
return self._forward_once(x, profile, visualize) # single-scale inference, train
|
||||
|
||||
def _forward_once(self, x, profile=False, visualize=False):
|
||||
"""Performs a forward pass on the YOLOv5 model, enabling profiling and feature visualization options."""
|
||||
y, dt = [], [] # outputs
|
||||
for m in self.model:
|
||||
if m.f != -1: # if not from previous layer
|
||||
|
@ -159,6 +170,7 @@ class BaseModel(nn.Module):
|
|||
return x
|
||||
|
||||
def _profile_one_layer(self, m, x, dt):
|
||||
"""Profiles a single layer's performance by computing GFLOPs, execution time, and parameters."""
|
||||
c = m == self.model[-1] # is final layer, copy input as inplace fix
|
||||
o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1e9 * 2 if thop else 0 # FLOPs
|
||||
t = time_sync()
|
||||
|
@ -185,7 +197,9 @@ class BaseModel(nn.Module):
|
|||
model_info(self, verbose, img_size)
|
||||
|
||||
def _apply(self, fn):
|
||||
# Apply to(), cpu(), cuda(), half() to model tensors that are not parameters or registered buffers
|
||||
"""Applies transformations like to(), cpu(), cuda(), half() to model tensors excluding parameters or registered
|
||||
buffers.
|
||||
"""
|
||||
self = super()._apply(fn)
|
||||
m = self.model[-1] # Detect()
|
||||
if isinstance(m, (Detect, Segment)):
|
||||
|
@ -239,11 +253,13 @@ class DetectionModel(BaseModel):
|
|||
LOGGER.info("")
|
||||
|
||||
def forward(self, x, augment=False, profile=False, visualize=False):
|
||||
"""Performs single-scale or augmented inference and may include profiling or visualization."""
|
||||
if augment:
|
||||
return self._forward_augment(x) # augmented inference, None
|
||||
return self._forward_once(x, profile, visualize) # single-scale inference, train
|
||||
|
||||
def _forward_augment(self, x):
|
||||
"""Performs augmented inference across different scales and flips, returning combined detections."""
|
||||
img_size = x.shape[-2:] # height, width
|
||||
s = [1, 0.83, 0.67] # scales
|
||||
f = [None, 3, None] # flips (2-ud, 3-lr)
|
||||
|
@ -258,7 +274,7 @@ class DetectionModel(BaseModel):
|
|||
return torch.cat(y, 1), None # augmented inference, train
|
||||
|
||||
def _descale_pred(self, p, flips, scale, img_size):
|
||||
# de-scale predictions following augmented inference (inverse operation)
|
||||
"""De-scales predictions from augmented inference, adjusting for flips and image size."""
|
||||
if self.inplace:
|
||||
p[..., :4] /= scale # de-scale
|
||||
if flips == 2:
|
||||
|
@ -275,7 +291,9 @@ class DetectionModel(BaseModel):
|
|||
return p
|
||||
|
||||
def _clip_augmented(self, y):
|
||||
# Clip YOLOv5 augmented inference tails
|
||||
"""Clips augmented inference tails for YOLOv5 models, affecting first and last tensors based on grid points and
|
||||
layer counts.
|
||||
"""
|
||||
nl = self.model[-1].nl # number of detection layers (P3-P5)
|
||||
g = sum(4**x for x in range(nl)) # grid points
|
||||
e = 1 # exclude layer count
|
||||
|
@ -304,6 +322,7 @@ Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibilit
|
|||
class SegmentationModel(DetectionModel):
|
||||
# YOLOv5 segmentation model
|
||||
def __init__(self, cfg="yolov5s-seg.yaml", ch=3, nc=None, anchors=None):
|
||||
"""Initializes a YOLOv5 segmentation model with configurable params: cfg (str) for configuration, ch (int) for channels, nc (int) for num classes, anchors (list)."""
|
||||
super().__init__(cfg, ch, nc, anchors)
|
||||
|
||||
|
||||
|
@ -314,7 +333,9 @@ class ClassificationModel(BaseModel):
|
|||
self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg)
|
||||
|
||||
def _from_detection_model(self, model, nc=1000, cutoff=10):
|
||||
# Create a YOLOv5 classification model from a YOLOv5 detection model
|
||||
"""Creates a classification model from a YOLOv5 detection model, slicing at `cutoff` and adding a classification
|
||||
layer.
|
||||
"""
|
||||
if isinstance(model, DetectMultiBackend):
|
||||
model = model.model # unwrap DetectMultiBackend
|
||||
model.model = model.model[:cutoff] # backbone
|
||||
|
@ -329,7 +350,7 @@ class ClassificationModel(BaseModel):
|
|||
self.nc = nc
|
||||
|
||||
def _from_yaml(self, cfg):
|
||||
# Create a YOLOv5 classification model from a *.yaml file
|
||||
"""Creates a YOLOv5 classification model from a specified *.yaml configuration file."""
|
||||
self.model = None
|
||||
|
||||
|
||||
|
|
|
@ -257,6 +257,9 @@ def run(
|
|||
|
||||
|
||||
def parse_opt():
|
||||
"""Parses command-line options for YOLOv5 inference including model paths, data sources, inference settings, and
|
||||
output preferences.
|
||||
"""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s-seg.pt", help="model path(s)")
|
||||
parser.add_argument("--source", type=str, default=ROOT / "data/images", help="file/dir/URL/glob/screen/0(webcam)")
|
||||
|
@ -293,6 +296,7 @@ def parse_opt():
|
|||
|
||||
|
||||
def main(opt):
|
||||
"""Executes YOLOv5 model inference with given options, checking for requirements before launching."""
|
||||
check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
|
||||
run(**vars(opt))
|
||||
|
||||
|
|
|
@ -532,6 +532,11 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio
|
|||
|
||||
|
||||
def parse_opt(known=False):
|
||||
"""
|
||||
Parses command line arguments for training configurations, returning parsed arguments.
|
||||
|
||||
Supports both known and unknown args.
|
||||
"""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--weights", type=str, default=ROOT / "yolov5s-seg.pt", help="initial weights path")
|
||||
parser.add_argument("--cfg", type=str, default="", help="model.yaml path")
|
||||
|
@ -576,7 +581,7 @@ def parse_opt(known=False):
|
|||
|
||||
|
||||
def main(opt, callbacks=Callbacks()):
|
||||
# Checks
|
||||
"""Initializes training or evolution of YOLOv5 models based on provided configuration and options."""
|
||||
if RANK in {-1, 0}:
|
||||
print_args(vars(opt))
|
||||
check_git_status()
|
||||
|
@ -733,7 +738,11 @@ def main(opt, callbacks=Callbacks()):
|
|||
|
||||
|
||||
def run(**kwargs):
|
||||
# Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
|
||||
"""
|
||||
Executes YOLOv5 training with given parameters, altering options programmatically; returns updated options.
|
||||
|
||||
Example: mport train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
|
||||
"""
|
||||
opt = parse_opt(True)
|
||||
for k, v in kwargs.items():
|
||||
setattr(opt, k, v)
|
||||
|
|
|
@ -71,7 +71,9 @@ from utils.torch_utils import de_parallel, select_device, smart_inference_mode
|
|||
|
||||
|
||||
def save_one_txt(predn, save_conf, shape, file):
|
||||
# Save one txt result
|
||||
"""Saves detection results in txt format; includes class, xywh (normalized), optionally confidence if `save_conf` is
|
||||
True.
|
||||
"""
|
||||
gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh
|
||||
for *xyxy, conf, cls in predn.tolist():
|
||||
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
|
||||
|
@ -81,7 +83,11 @@ def save_one_txt(predn, save_conf, shape, file):
|
|||
|
||||
|
||||
def save_one_json(predn, jdict, path, class_map, pred_masks):
|
||||
# Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
|
||||
"""
|
||||
Saves a JSON file with detection results including bounding boxes, category IDs, scores, and segmentation masks.
|
||||
|
||||
Example JSON result: {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}.
|
||||
"""
|
||||
from pycocotools.mask import encode
|
||||
|
||||
def single_encode(x):
|
||||
|
@ -437,6 +443,9 @@ def run(
|
|||
|
||||
|
||||
def parse_opt():
|
||||
"""Parses command line arguments for configuring YOLOv5 options like dataset path, weights, batch size, and
|
||||
inference settings.
|
||||
"""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--data", type=str, default=ROOT / "data/coco128-seg.yaml", help="dataset.yaml path")
|
||||
parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s-seg.pt", help="model path(s)")
|
||||
|
@ -469,6 +478,7 @@ def parse_opt():
|
|||
|
||||
|
||||
def main(opt):
|
||||
"""Executes YOLOv5 tasks including training, validation, testing, speed, and study with configurable options."""
|
||||
check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
|
||||
|
||||
if opt.task in ("train", "val", "test"): # run normally
|
||||
|
|
10
train.py
10
train.py
|
@ -505,6 +505,7 @@ def train(hyp, opt, device, callbacks): # hyp is path/to/hyp.yaml or hyp dictio
|
|||
|
||||
|
||||
def parse_opt(known=False):
|
||||
"""Parses command-line arguments for YOLOv5 training, validation, and testing."""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--weights", type=str, default=ROOT / "yolov5s.pt", help="initial weights path")
|
||||
parser.add_argument("--cfg", type=str, default="", help="model.yaml path")
|
||||
|
@ -559,7 +560,7 @@ def parse_opt(known=False):
|
|||
|
||||
|
||||
def main(opt, callbacks=Callbacks()):
|
||||
# Checks
|
||||
"""Runs training or hyperparameter evolution with specified options and optional callbacks."""
|
||||
if RANK in {-1, 0}:
|
||||
print_args(vars(opt))
|
||||
check_git_status()
|
||||
|
@ -815,6 +816,7 @@ def main(opt, callbacks=Callbacks()):
|
|||
|
||||
|
||||
def generate_individual(input_ranges, individual_length):
|
||||
"""Generates a list of random values within specified input ranges for each gene in the individual."""
|
||||
individual = []
|
||||
for i in range(individual_length):
|
||||
lower_bound, upper_bound = input_ranges[i]
|
||||
|
@ -823,7 +825,11 @@ def generate_individual(input_ranges, individual_length):
|
|||
|
||||
|
||||
def run(**kwargs):
|
||||
# Usage: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
|
||||
"""
|
||||
Executes YOLOv5 training with given options, overriding with any kwargs provided.
|
||||
|
||||
Example: import train; train.run(data='coco128.yaml', imgsz=320, weights='yolov5m.pt')
|
||||
"""
|
||||
opt = parse_opt(True)
|
||||
for k, v in kwargs.items():
|
||||
setattr(opt, k, v)
|
||||
|
|
|
@ -1,85 +1,95 @@
|
|||
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
|
||||
"""utils/initialization."""
|
||||
|
||||
import contextlib
|
||||
import platform
|
||||
import threading
|
||||
|
||||
|
||||
def emojis(str=""):
|
||||
# Return platform-dependent emoji-safe version of string
|
||||
return str.encode().decode("ascii", "ignore") if platform.system() == "Windows" else str
|
||||
|
||||
|
||||
class TryExcept(contextlib.ContextDecorator):
|
||||
# YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager
|
||||
def __init__(self, msg=""):
|
||||
self.msg = msg
|
||||
|
||||
def __enter__(self):
|
||||
pass
|
||||
|
||||
def __exit__(self, exc_type, value, traceback):
|
||||
if value:
|
||||
print(emojis(f"{self.msg}{': ' if self.msg else ''}{value}"))
|
||||
return True
|
||||
|
||||
|
||||
def threaded(func):
|
||||
# Multi-threads a target function and returns thread. Usage: @threaded decorator
|
||||
def wrapper(*args, **kwargs):
|
||||
thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True)
|
||||
thread.start()
|
||||
return thread
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def join_threads(verbose=False):
|
||||
# Join all daemon threads, i.e. atexit.register(lambda: join_threads())
|
||||
main_thread = threading.current_thread()
|
||||
for t in threading.enumerate():
|
||||
if t is not main_thread:
|
||||
if verbose:
|
||||
print(f"Joining thread {t.name}")
|
||||
t.join()
|
||||
|
||||
|
||||
def notebook_init(verbose=True):
|
||||
# Check system software and hardware
|
||||
print("Checking setup...")
|
||||
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from ultralytics.utils.checks import check_requirements
|
||||
|
||||
from utils.general import check_font, is_colab
|
||||
from utils.torch_utils import select_device # imports
|
||||
|
||||
check_font()
|
||||
|
||||
import psutil
|
||||
|
||||
if check_requirements("wandb", install=False):
|
||||
os.system("pip uninstall -y wandb") # eliminate unexpected account creation prompt with infinite hang
|
||||
if is_colab():
|
||||
shutil.rmtree("/content/sample_data", ignore_errors=True) # remove colab /sample_data directory
|
||||
|
||||
# System info
|
||||
display = None
|
||||
if verbose:
|
||||
gb = 1 << 30 # bytes to GiB (1024 ** 3)
|
||||
ram = psutil.virtual_memory().total
|
||||
total, used, free = shutil.disk_usage("/")
|
||||
with contextlib.suppress(Exception): # clear display if ipython is installed
|
||||
from IPython import display
|
||||
|
||||
display.clear_output()
|
||||
s = f"({os.cpu_count()} CPUs, {ram / gb:.1f} GB RAM, {(total - free) / gb:.1f}/{total / gb:.1f} GB disk)"
|
||||
else:
|
||||
s = ""
|
||||
|
||||
select_device(newline=False)
|
||||
print(emojis(f"Setup complete ✅ {s}"))
|
||||
return display
|
||||
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
|
||||
"""utils/initialization."""
|
||||
|
||||
import contextlib
|
||||
import platform
|
||||
import threading
|
||||
|
||||
|
||||
def emojis(str=""):
|
||||
"""Returns an emoji-safe version of a string, stripped of emojis on Windows platforms."""
|
||||
return str.encode().decode("ascii", "ignore") if platform.system() == "Windows" else str
|
||||
|
||||
|
||||
class TryExcept(contextlib.ContextDecorator):
|
||||
# YOLOv5 TryExcept class. Usage: @TryExcept() decorator or 'with TryExcept():' context manager
|
||||
def __init__(self, msg=""):
|
||||
"""Initializes TryExcept with an optional message, used as a decorator or context manager for error handling."""
|
||||
self.msg = msg
|
||||
|
||||
def __enter__(self):
|
||||
"""Enter the runtime context related to this object for error handling with an optional message."""
|
||||
pass
|
||||
|
||||
def __exit__(self, exc_type, value, traceback):
|
||||
"""Context manager exit method that prints an error message with emojis if an exception occurred, always returns
|
||||
True.
|
||||
"""
|
||||
if value:
|
||||
print(emojis(f"{self.msg}{': ' if self.msg else ''}{value}"))
|
||||
return True
|
||||
|
||||
|
||||
def threaded(func):
|
||||
"""Decorator @threaded to run a function in a separate thread, returning the thread instance."""
|
||||
|
||||
def wrapper(*args, **kwargs):
|
||||
thread = threading.Thread(target=func, args=args, kwargs=kwargs, daemon=True)
|
||||
thread.start()
|
||||
return thread
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
def join_threads(verbose=False):
|
||||
"""
|
||||
Joins all daemon threads, optionally printing their names if verbose is True.
|
||||
|
||||
Example: atexit.register(lambda: join_threads())
|
||||
"""
|
||||
main_thread = threading.current_thread()
|
||||
for t in threading.enumerate():
|
||||
if t is not main_thread:
|
||||
if verbose:
|
||||
print(f"Joining thread {t.name}")
|
||||
t.join()
|
||||
|
||||
|
||||
def notebook_init(verbose=True):
|
||||
"""Initializes notebook environment by checking requirements, cleaning up, and displaying system info."""
|
||||
print("Checking setup...")
|
||||
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from ultralytics.utils.checks import check_requirements
|
||||
|
||||
from utils.general import check_font, is_colab
|
||||
from utils.torch_utils import select_device # imports
|
||||
|
||||
check_font()
|
||||
|
||||
import psutil
|
||||
|
||||
if check_requirements("wandb", install=False):
|
||||
os.system("pip uninstall -y wandb") # eliminate unexpected account creation prompt with infinite hang
|
||||
if is_colab():
|
||||
shutil.rmtree("/content/sample_data", ignore_errors=True) # remove colab /sample_data directory
|
||||
|
||||
# System info
|
||||
display = None
|
||||
if verbose:
|
||||
gb = 1 << 30 # bytes to GiB (1024 ** 3)
|
||||
ram = psutil.virtual_memory().total
|
||||
total, used, free = shutil.disk_usage("/")
|
||||
with contextlib.suppress(Exception): # clear display if ipython is installed
|
||||
from IPython import display
|
||||
|
||||
display.clear_output()
|
||||
s = f"({os.cpu_count()} CPUs, {ram / gb:.1f} GB RAM, {(total - free) / gb:.1f}/{total / gb:.1f} GB disk)"
|
||||
else:
|
||||
s = ""
|
||||
|
||||
select_device(newline=False)
|
||||
print(emojis(f"Setup complete ✅ {s}"))
|
||||
return display
|
||||
|
|
|
@ -7,43 +7,54 @@ import torch.nn.functional as F
|
|||
|
||||
|
||||
class SiLU(nn.Module):
|
||||
# SiLU activation https://arxiv.org/pdf/1606.08415.pdf
|
||||
@staticmethod
|
||||
def forward(x):
|
||||
"""
|
||||
Applies the Sigmoid-weighted Linear Unit (SiLU) activation function.
|
||||
|
||||
https://arxiv.org/pdf/1606.08415.pdf.
|
||||
"""
|
||||
return x * torch.sigmoid(x)
|
||||
|
||||
|
||||
class Hardswish(nn.Module):
|
||||
# Hard-SiLU activation
|
||||
@staticmethod
|
||||
def forward(x):
|
||||
# return x * F.hardsigmoid(x) # for TorchScript and CoreML
|
||||
"""
|
||||
Applies the Hardswish activation function, compatible with TorchScript, CoreML, and ONNX.
|
||||
|
||||
Equivalent to x * F.hardsigmoid(x)
|
||||
"""
|
||||
return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0 # for TorchScript, CoreML and ONNX
|
||||
|
||||
|
||||
class Mish(nn.Module):
|
||||
# Mish activation https://github.com/digantamisra98/Mish
|
||||
"""Mish activation https://github.com/digantamisra98/Mish."""
|
||||
|
||||
@staticmethod
|
||||
def forward(x):
|
||||
"""Applies the Mish activation function, a smooth alternative to ReLU."""
|
||||
return x * F.softplus(x).tanh()
|
||||
|
||||
|
||||
class MemoryEfficientMish(nn.Module):
|
||||
# Mish activation memory-efficient
|
||||
class F(torch.autograd.Function):
|
||||
@staticmethod
|
||||
def forward(ctx, x):
|
||||
"""Applies the Mish activation function, a smooth ReLU alternative, to the input tensor `x`."""
|
||||
ctx.save_for_backward(x)
|
||||
return x.mul(torch.tanh(F.softplus(x))) # x * tanh(ln(1 + exp(x)))
|
||||
|
||||
@staticmethod
|
||||
def backward(ctx, grad_output):
|
||||
"""Computes the gradient of the Mish activation function with respect to input `x`."""
|
||||
x = ctx.saved_tensors[0]
|
||||
sx = torch.sigmoid(x)
|
||||
fx = F.softplus(x).tanh()
|
||||
return grad_output * (fx + x * sx * (1 - fx * fx))
|
||||
|
||||
def forward(self, x):
|
||||
"""Applies the Mish activation function to the input tensor `x`."""
|
||||
return self.F.apply(x)
|
||||
|
||||
|
||||
|
@ -55,30 +66,41 @@ class FReLU(nn.Module):
|
|||
self.bn = nn.BatchNorm2d(c1)
|
||||
|
||||
def forward(self, x):
|
||||
"""
|
||||
Applies FReLU activation with max operation between input and BN-convolved input.
|
||||
|
||||
https://arxiv.org/abs/2007.11824
|
||||
"""
|
||||
return torch.max(x, self.bn(self.conv(x)))
|
||||
|
||||
|
||||
class AconC(nn.Module):
|
||||
r"""ACON activation (activate or not)
|
||||
"""
|
||||
ACON activation (activate or not) function.
|
||||
|
||||
AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter
|
||||
according to "Activate or Not: Learning Customized Activation" <https://arxiv.org/pdf/2009.04759.pdf>.
|
||||
See "Activate or Not: Learning Customized Activation" https://arxiv.org/pdf/2009.04759.pdf.
|
||||
"""
|
||||
|
||||
def __init__(self, c1):
|
||||
"""Initializes AconC with learnable parameters p1, p2, and beta for channel-wise activation control."""
|
||||
super().__init__()
|
||||
self.p1 = nn.Parameter(torch.randn(1, c1, 1, 1))
|
||||
self.p2 = nn.Parameter(torch.randn(1, c1, 1, 1))
|
||||
self.beta = nn.Parameter(torch.ones(1, c1, 1, 1))
|
||||
|
||||
def forward(self, x):
|
||||
"""Applies AconC activation function with learnable parameters for channel-wise control on input tensor x."""
|
||||
dpx = (self.p1 - self.p2) * x
|
||||
return dpx * torch.sigmoid(self.beta * dpx) + self.p2 * x
|
||||
|
||||
|
||||
class MetaAconC(nn.Module):
|
||||
r"""ACON activation (activate or not)
|
||||
MetaAconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is generated by a small network
|
||||
according to "Activate or Not: Learning Customized Activation" <https://arxiv.org/pdf/2009.04759.pdf>.
|
||||
"""
|
||||
ACON activation (activate or not) function.
|
||||
|
||||
AconC: (p1*x-p2*x) * sigmoid(beta*(p1*x-p2*x)) + p2*x, beta is a learnable parameter
|
||||
See "Activate or Not: Learning Customized Activation" https://arxiv.org/pdf/2009.04759.pdf.
|
||||
"""
|
||||
|
||||
def __init__(self, c1, k=1, s=1, r=16): # ch_in, kernel, stride, r
|
||||
|
@ -92,6 +114,7 @@ class MetaAconC(nn.Module):
|
|||
# self.bn2 = nn.BatchNorm2d(c1)
|
||||
|
||||
def forward(self, x):
|
||||
"""Applies a forward pass transforming input `x` using learnable parameters and sigmoid activation."""
|
||||
y = x.mean(dim=2, keepdims=True).mean(dim=3, keepdims=True)
|
||||
# batch-size 1 bug/instabilities https://github.com/ultralytics/yolov5/issues/2891
|
||||
# beta = torch.sigmoid(self.bn2(self.fc2(self.bn1(self.fc1(y))))) # bug/unstable
|
||||
|
|
|
@ -20,6 +20,7 @@ IMAGENET_STD = 0.229, 0.224, 0.225 # RGB standard deviation
|
|||
class Albumentations:
|
||||
# YOLOv5 Albumentations class (optional, only used if package is installed)
|
||||
def __init__(self, size=640):
|
||||
"""Initializes Albumentations class for optional data augmentation in YOLOv5 with specified input size."""
|
||||
self.transform = None
|
||||
prefix = colorstr("albumentations: ")
|
||||
try:
|
||||
|
@ -46,6 +47,7 @@ class Albumentations:
|
|||
LOGGER.info(f"{prefix}{e}")
|
||||
|
||||
def __call__(self, im, labels, p=1.0):
|
||||
"""Applies transformations to an image and labels with probability `p`, returning updated image and labels."""
|
||||
if self.transform and random.random() < p:
|
||||
new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed
|
||||
im, labels = new["image"], np.array([[c, *b] for c, b in zip(new["class_labels"], new["bboxes"])])
|
||||
|
@ -53,19 +55,23 @@ class Albumentations:
|
|||
|
||||
|
||||
def normalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD, inplace=False):
|
||||
# Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = (x - mean) / std
|
||||
"""
|
||||
Applies ImageNet normalization to RGB images in BCHW format, modifying them in-place if specified.
|
||||
|
||||
Example: y = (x - mean) / std
|
||||
"""
|
||||
return TF.normalize(x, mean, std, inplace=inplace)
|
||||
|
||||
|
||||
def denormalize(x, mean=IMAGENET_MEAN, std=IMAGENET_STD):
|
||||
# Denormalize RGB images x per ImageNet stats in BCHW format, i.e. = x * std + mean
|
||||
"""Reverses ImageNet normalization for BCHW format RGB images by applying `x = x * std + mean`."""
|
||||
for i in range(3):
|
||||
x[:, i] = x[:, i] * std[i] + mean[i]
|
||||
return x
|
||||
|
||||
|
||||
def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
|
||||
# HSV color-space augmentation
|
||||
"""Applies HSV color-space augmentation to an image with random gains for hue, saturation, and value."""
|
||||
if hgain or sgain or vgain:
|
||||
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
|
||||
hue, sat, val = cv2.split(cv2.cvtColor(im, cv2.COLOR_BGR2HSV))
|
||||
|
@ -81,7 +87,7 @@ def augment_hsv(im, hgain=0.5, sgain=0.5, vgain=0.5):
|
|||
|
||||
|
||||
def hist_equalize(im, clahe=True, bgr=False):
|
||||
# Equalize histogram on BGR image 'im' with im.shape(n,m,3) and range 0-255
|
||||
"""Equalizes image histogram, with optional CLAHE, for BGR or RGB image with shape (n,m,3) and range 0-255."""
|
||||
yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
|
||||
if clahe:
|
||||
c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
|
||||
|
@ -92,7 +98,11 @@ def hist_equalize(im, clahe=True, bgr=False):
|
|||
|
||||
|
||||
def replicate(im, labels):
|
||||
# Replicate labels
|
||||
"""
|
||||
Replicates half of the smallest object labels in an image for data augmentation.
|
||||
|
||||
Returns augmented image and labels.
|
||||
"""
|
||||
h, w = im.shape[:2]
|
||||
boxes = labels[:, 1:].astype(int)
|
||||
x1, y1, x2, y2 = boxes.T
|
||||
|
@ -109,7 +119,7 @@ def replicate(im, labels):
|
|||
|
||||
|
||||
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
|
||||
# Resize and pad image while meeting stride-multiple constraints
|
||||
"""Resizes and pads image to new_shape with stride-multiple constraints, returns resized image, ratio, padding."""
|
||||
shape = im.shape[:2] # current shape [height, width]
|
||||
if isinstance(new_shape, int):
|
||||
new_shape = (new_shape, new_shape)
|
||||
|
@ -232,7 +242,11 @@ def random_perspective(
|
|||
|
||||
|
||||
def copy_paste(im, labels, segments, p=0.5):
|
||||
# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
|
||||
"""
|
||||
Applies Copy-Paste augmentation by flipping and merging segments and labels on an image.
|
||||
|
||||
Details at https://arxiv.org/abs/2012.07177.
|
||||
"""
|
||||
n = len(segments)
|
||||
if p and n:
|
||||
h, w, c = im.shape # height, width, channels
|
||||
|
@ -254,7 +268,11 @@ def copy_paste(im, labels, segments, p=0.5):
|
|||
|
||||
|
||||
def cutout(im, labels, p=0.5):
|
||||
# Applies image cutout augmentation https://arxiv.org/abs/1708.04552
|
||||
"""
|
||||
Applies cutout augmentation to an image with optional label adjustment, using random masks of varying sizes.
|
||||
|
||||
Details at https://arxiv.org/abs/1708.04552.
|
||||
"""
|
||||
if random.random() < p:
|
||||
h, w = im.shape[:2]
|
||||
scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
|
||||
|
@ -281,7 +299,11 @@ def cutout(im, labels, p=0.5):
|
|||
|
||||
|
||||
def mixup(im, labels, im2, labels2):
|
||||
# Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
|
||||
"""
|
||||
Applies MixUp augmentation by blending images and labels.
|
||||
|
||||
See https://arxiv.org/pdf/1710.09412.pdf for details.
|
||||
"""
|
||||
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
|
||||
im = (im * r + im2 * (1 - r)).astype(np.uint8)
|
||||
labels = np.concatenate((labels, labels2), 0)
|
||||
|
@ -341,7 +363,7 @@ def classify_albumentations(
|
|||
|
||||
|
||||
def classify_transforms(size=224):
|
||||
# Transforms to apply if albumentations not installed
|
||||
"""Applies a series of transformations including center crop, ToTensor, and normalization for classification."""
|
||||
assert isinstance(size, int), f"ERROR: classify_transforms size {size} must be integer, not (list, tuple)"
|
||||
# T.Compose([T.ToTensor(), T.Resize(size), T.CenterCrop(size), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
|
||||
return T.Compose([CenterCrop(size), ToTensor(), T.Normalize(IMAGENET_MEAN, IMAGENET_STD)])
|
||||
|
@ -350,6 +372,9 @@ def classify_transforms(size=224):
|
|||
class LetterBox:
|
||||
# YOLOv5 LetterBox class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
|
||||
def __init__(self, size=(640, 640), auto=False, stride=32):
|
||||
"""Initializes a LetterBox object for YOLOv5 image preprocessing with optional auto sizing and stride
|
||||
adjustment.
|
||||
"""
|
||||
super().__init__()
|
||||
self.h, self.w = (size, size) if isinstance(size, int) else size
|
||||
self.auto = auto # pass max size integer, automatically solve for short side using stride
|
||||
|
@ -369,6 +394,7 @@ class LetterBox:
|
|||
class CenterCrop:
|
||||
# YOLOv5 CenterCrop class for image preprocessing, i.e. T.Compose([CenterCrop(size), ToTensor()])
|
||||
def __init__(self, size=640):
|
||||
"""Initializes CenterCrop for image preprocessing, accepting single int or tuple for size, defaults to 640."""
|
||||
super().__init__()
|
||||
self.h, self.w = (size, size) if isinstance(size, int) else size
|
||||
|
||||
|
@ -382,6 +408,7 @@ class CenterCrop:
|
|||
class ToTensor:
|
||||
# YOLOv5 ToTensor class for image preprocessing, i.e. T.Compose([LetterBox(size), ToTensor()])
|
||||
def __init__(self, half=False):
|
||||
"""Initializes ToTensor for YOLOv5 image preprocessing, with optional half precision (half=True for FP16)."""
|
||||
super().__init__()
|
||||
self.half = half
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ PREFIX = colorstr("AutoAnchor: ")
|
|||
|
||||
|
||||
def check_anchor_order(m):
|
||||
# Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
|
||||
"""Checks and corrects anchor order against stride in YOLOv5 Detect() module if necessary."""
|
||||
a = m.anchors.prod(-1).mean(-1).view(-1) # mean anchor area per output layer
|
||||
da = a[-1] - a[0] # delta a
|
||||
ds = m.stride[-1] - m.stride[0] # delta s
|
||||
|
@ -26,7 +26,7 @@ def check_anchor_order(m):
|
|||
|
||||
@TryExcept(f"{PREFIX}ERROR")
|
||||
def check_anchors(dataset, model, thr=4.0, imgsz=640):
|
||||
# Check anchor fit to data, recompute if necessary
|
||||
"""Evaluates anchor fit to dataset and adjusts if necessary, supporting customizable threshold and image size."""
|
||||
m = model.module.model[-1] if hasattr(model, "module") else model.model[-1] # Detect()
|
||||
shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
|
||||
scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale
|
||||
|
|
|
@ -11,13 +11,13 @@ from utils.torch_utils import profile
|
|||
|
||||
|
||||
def check_train_batch_size(model, imgsz=640, amp=True):
|
||||
# Check YOLOv5 training batch size
|
||||
"""Checks and computes optimal training batch size for YOLOv5 model, given image size and AMP setting."""
|
||||
with torch.cuda.amp.autocast(amp):
|
||||
return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size
|
||||
|
||||
|
||||
def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
|
||||
# Automatically estimate best YOLOv5 batch size to use `fraction` of available CUDA memory
|
||||
"""Estimates optimal YOLOv5 batch size using `fraction` of CUDA memory."""
|
||||
# Usage:
|
||||
# import torch
|
||||
# from utils.autobatch import autobatch
|
||||
|
|
|
@ -8,7 +8,7 @@ class Callbacks:
|
|||
"""Handles all registered callbacks for YOLOv5 Hooks."""
|
||||
|
||||
def __init__(self):
|
||||
# Define the available callbacks
|
||||
"""Initializes a Callbacks object to manage registered YOLOv5 training event hooks."""
|
||||
self._callbacks = {
|
||||
"on_pretrain_routine_start": [],
|
||||
"on_pretrain_routine_end": [],
|
||||
|
|
|
@ -73,7 +73,7 @@ for orientation in ExifTags.TAGS.keys():
|
|||
|
||||
|
||||
def get_hash(paths):
|
||||
# Returns a single hash value of a list of paths (files or dirs)
|
||||
"""Generates a single SHA256 hash for a list of file or directory paths by combining their sizes and paths."""
|
||||
size = sum(os.path.getsize(p) for p in paths if os.path.exists(p)) # sizes
|
||||
h = hashlib.sha256(str(size).encode()) # hash sizes
|
||||
h.update("".join(paths).encode()) # hash paths
|
||||
|
@ -81,7 +81,7 @@ def get_hash(paths):
|
|||
|
||||
|
||||
def exif_size(img):
|
||||
# Returns exif-corrected PIL size
|
||||
"""Returns corrected PIL image size (width, height) considering EXIF orientation."""
|
||||
s = img.size # (width, height)
|
||||
with contextlib.suppress(Exception):
|
||||
rotation = dict(img._getexif().items())[orientation]
|
||||
|
@ -118,7 +118,11 @@ def exif_transpose(image):
|
|||
|
||||
|
||||
def seed_worker(worker_id):
|
||||
# Set dataloader worker seed https://pytorch.org/docs/stable/notes/randomness.html#dataloader
|
||||
"""
|
||||
Sets the seed for a dataloader worker to ensure reproducibility, based on PyTorch's randomness notes.
|
||||
|
||||
See https://pytorch.org/docs/stable/notes/randomness.html#dataloader.
|
||||
"""
|
||||
worker_seed = torch.initial_seed() % 2**32
|
||||
np.random.seed(worker_seed)
|
||||
random.seed(worker_seed)
|
||||
|
@ -128,7 +132,7 @@ def seed_worker(worker_id):
|
|||
# https://github.com/pytorch/pytorch/blob/master/torch/utils/data/distributed.py
|
||||
class SmartDistributedSampler(distributed.DistributedSampler):
|
||||
def __iter__(self):
|
||||
# deterministically shuffle based on epoch and seed
|
||||
"""Yields indices for distributed data sampling, shuffled deterministically based on epoch and seed."""
|
||||
g = torch.Generator()
|
||||
g.manual_seed(self.seed + self.epoch)
|
||||
|
||||
|
@ -218,14 +222,19 @@ class InfiniteDataLoader(dataloader.DataLoader):
|
|||
"""
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""Initializes an InfiniteDataLoader that reuses workers with standard DataLoader syntax, augmenting with a
|
||||
repeating sampler.
|
||||
"""
|
||||
super().__init__(*args, **kwargs)
|
||||
object.__setattr__(self, "batch_sampler", _RepeatSampler(self.batch_sampler))
|
||||
self.iterator = super().__iter__()
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the length of the batch sampler's sampler in the InfiniteDataLoader."""
|
||||
return len(self.batch_sampler.sampler)
|
||||
|
||||
def __iter__(self):
|
||||
"""Yields batches of data indefinitely in a loop by resetting the sampler when exhausted."""
|
||||
for _ in range(len(self)):
|
||||
yield next(self.iterator)
|
||||
|
||||
|
@ -239,9 +248,11 @@ class _RepeatSampler:
|
|||
"""
|
||||
|
||||
def __init__(self, sampler):
|
||||
"""Initializes a perpetual sampler wrapping a provided `Sampler` instance for endless data iteration."""
|
||||
self.sampler = sampler
|
||||
|
||||
def __iter__(self):
|
||||
"""Returns an infinite iterator over the dataset by repeatedly yielding from the given sampler."""
|
||||
while True:
|
||||
yield from iter(self.sampler)
|
||||
|
||||
|
@ -249,7 +260,12 @@ class _RepeatSampler:
|
|||
class LoadScreenshots:
|
||||
# YOLOv5 screenshot dataloader, i.e. `python detect.py --source "screen 0 100 100 512 256"`
|
||||
def __init__(self, source, img_size=640, stride=32, auto=True, transforms=None):
|
||||
# source = [screen_number left top width height] (pixels)
|
||||
"""
|
||||
Initializes a screenshot dataloader for YOLOv5 with specified source region, image size, stride, auto, and
|
||||
transforms.
|
||||
|
||||
Source = [screen_number left top width height] (pixels)
|
||||
"""
|
||||
check_requirements("mss")
|
||||
import mss
|
||||
|
||||
|
@ -278,10 +294,13 @@ class LoadScreenshots:
|
|||
self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height}
|
||||
|
||||
def __iter__(self):
|
||||
"""Iterates over itself, enabling use in loops and iterable contexts."""
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
# mss screen capture: get raw pixels from the screen as np array
|
||||
"""Captures and returns the next screen frame as a BGR numpy array, cropping to only the first three channels
|
||||
from BGRA.
|
||||
"""
|
||||
im0 = np.array(self.sct.grab(self.monitor))[:, :, :3] # [:, :, :3] BGRA to BGR
|
||||
s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: "
|
||||
|
||||
|
@ -296,8 +315,10 @@ class LoadScreenshots:
|
|||
|
||||
|
||||
class LoadImages:
|
||||
# YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`
|
||||
"""YOLOv5 image/video dataloader, i.e. `python detect.py --source image.jpg/vid.mp4`"""
|
||||
|
||||
def __init__(self, path, img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
|
||||
"""Initializes YOLOv5 loader for images/videos, supporting glob patterns, directories, and lists of paths."""
|
||||
if isinstance(path, str) and Path(path).suffix == ".txt": # *.txt file with img/vid/dir on each line
|
||||
path = Path(path).read_text().rsplit()
|
||||
files = []
|
||||
|
@ -335,10 +356,12 @@ class LoadImages:
|
|||
)
|
||||
|
||||
def __iter__(self):
|
||||
"""Initializes iterator by resetting count and returns the iterator object itself."""
|
||||
self.count = 0
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
"""Advances to the next file in the dataset, raising StopIteration if at the end."""
|
||||
if self.count == self.nf:
|
||||
raise StopIteration
|
||||
path = self.files[self.count]
|
||||
|
@ -379,7 +402,9 @@ class LoadImages:
|
|||
return path, im, im0, self.cap, s
|
||||
|
||||
def _new_video(self, path):
|
||||
# Create a new video capture object
|
||||
"""Initializes a new video capture object with path, frame count adjusted by stride, and orientation
|
||||
metadata.
|
||||
"""
|
||||
self.frame = 0
|
||||
self.cap = cv2.VideoCapture(path)
|
||||
self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
|
||||
|
@ -387,7 +412,7 @@ class LoadImages:
|
|||
# self.cap.set(cv2.CAP_PROP_ORIENTATION_AUTO, 0) # disable https://github.com/ultralytics/yolov5/issues/8493
|
||||
|
||||
def _cv2_rotate(self, im):
|
||||
# Rotate a cv2 video manually
|
||||
"""Rotates a cv2 image based on its orientation; supports 0, 90, and 180 degrees rotations."""
|
||||
if self.orientation == 0:
|
||||
return cv2.rotate(im, cv2.ROTATE_90_CLOCKWISE)
|
||||
elif self.orientation == 180:
|
||||
|
@ -397,12 +422,16 @@ class LoadImages:
|
|||
return im
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the number of files in the dataset."""
|
||||
return self.nf # number of files
|
||||
|
||||
|
||||
class LoadStreams:
|
||||
# YOLOv5 streamloader, i.e. `python detect.py --source 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP streams`
|
||||
def __init__(self, sources="file.streams", img_size=640, stride=32, auto=True, transforms=None, vid_stride=1):
|
||||
"""Initializes a stream loader for processing video streams with YOLOv5, supporting various sources including
|
||||
YouTube.
|
||||
"""
|
||||
torch.backends.cudnn.benchmark = True # faster for fixed-size inference
|
||||
self.mode = "stream"
|
||||
self.img_size = img_size
|
||||
|
@ -448,7 +477,7 @@ class LoadStreams:
|
|||
LOGGER.warning("WARNING ⚠️ Stream shapes differ. For optimal performance supply similarly-shaped streams.")
|
||||
|
||||
def update(self, i, cap, stream):
|
||||
# Read stream `i` frames in daemon thread
|
||||
"""Reads frames from stream `i`, updating imgs array; handles stream reopening on signal loss."""
|
||||
n, f = 0, self.frames[i] # frame number, frame array
|
||||
while cap.isOpened() and n < f:
|
||||
n += 1
|
||||
|
@ -464,10 +493,14 @@ class LoadStreams:
|
|||
time.sleep(0.0) # wait time
|
||||
|
||||
def __iter__(self):
|
||||
"""Resets and returns the iterator for iterating over video frames or images in a dataset."""
|
||||
self.count = -1
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
"""Iterates over video frames or images, halting on thread stop or 'q' key press, raising `StopIteration` when
|
||||
done.
|
||||
"""
|
||||
self.count += 1
|
||||
if not all(x.is_alive() for x in self.threads) or cv2.waitKey(1) == ord("q"): # q to quit
|
||||
cv2.destroyAllWindows()
|
||||
|
@ -484,11 +517,14 @@ class LoadStreams:
|
|||
return self.sources, im, im0, None, ""
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the number of sources in the dataset, supporting up to 32 streams at 30 FPS over 30 years."""
|
||||
return len(self.sources) # 1E12 frames = 32 streams at 30 FPS for 30 years
|
||||
|
||||
|
||||
def img2label_paths(img_paths):
|
||||
# Define label paths as a function of image paths
|
||||
"""Generates label file paths from corresponding image file paths by replacing `/images/` with `/labels/` and
|
||||
extension with `.txt`.
|
||||
"""
|
||||
sa, sb = f"{os.sep}images{os.sep}", f"{os.sep}labels{os.sep}" # /images/, /labels/ substrings
|
||||
return [sb.join(x.rsplit(sa, 1)).rsplit(".", 1)[0] + ".txt" for x in img_paths]
|
||||
|
||||
|
@ -657,7 +693,7 @@ class LoadImagesAndLabels(Dataset):
|
|||
pbar.close()
|
||||
|
||||
def check_cache_ram(self, safety_margin=0.1, prefix=""):
|
||||
# Check image caching requirements vs available memory
|
||||
"""Checks if available RAM is sufficient for caching images, adjusting for a safety margin."""
|
||||
b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes
|
||||
n = min(self.n, 30) # extrapolate from 30 random images
|
||||
for _ in range(n):
|
||||
|
@ -676,7 +712,7 @@ class LoadImagesAndLabels(Dataset):
|
|||
return cache
|
||||
|
||||
def cache_labels(self, path=Path("./labels.cache"), prefix=""):
|
||||
# Cache dataset labels, check images and read shapes
|
||||
"""Caches dataset labels, verifies images, reads shapes, and tracks dataset integrity."""
|
||||
x = {} # dict
|
||||
nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
|
||||
desc = f"{prefix}Scanning {path.parent / path.stem}..."
|
||||
|
@ -716,6 +752,7 @@ class LoadImagesAndLabels(Dataset):
|
|||
return x
|
||||
|
||||
def __len__(self):
|
||||
"""Returns the number of images in the dataset."""
|
||||
return len(self.im_files)
|
||||
|
||||
# def __iter__(self):
|
||||
|
@ -725,6 +762,7 @@ class LoadImagesAndLabels(Dataset):
|
|||
# return self
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""Fetches the dataset item at the given index, considering linear, shuffled, or weighted sampling."""
|
||||
index = self.indices[index] # linear, shuffled, or image_weights
|
||||
|
||||
hyp = self.hyp
|
||||
|
@ -801,7 +839,11 @@ class LoadImagesAndLabels(Dataset):
|
|||
return torch.from_numpy(img), labels_out, self.im_files[index], shapes
|
||||
|
||||
def load_image(self, i):
|
||||
# Loads 1 image from dataset index 'i', returns (im, original hw, resized hw)
|
||||
"""
|
||||
Loads an image by index, returning the image, its original dimensions, and resized dimensions.
|
||||
|
||||
Returns (im, original hw, resized hw)
|
||||
"""
|
||||
im, f, fn = (
|
||||
self.ims[i],
|
||||
self.im_files[i],
|
||||
|
@ -822,13 +864,13 @@ class LoadImagesAndLabels(Dataset):
|
|||
return self.ims[i], self.im_hw0[i], self.im_hw[i] # im, hw_original, hw_resized
|
||||
|
||||
def cache_images_to_disk(self, i):
|
||||
# Saves an image as an *.npy file for faster loading
|
||||
"""Saves an image to disk as an *.npy file for quicker loading, identified by index `i`."""
|
||||
f = self.npy_files[i]
|
||||
if not f.exists():
|
||||
np.save(f.as_posix(), cv2.imread(self.im_files[i]))
|
||||
|
||||
def load_mosaic(self, index):
|
||||
# YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
|
||||
"""Loads a 4-image mosaic for YOLOv5, combining 1 selected and 3 random images, with labels and segments."""
|
||||
labels4, segments4 = [], []
|
||||
s = self.img_size
|
||||
yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y
|
||||
|
@ -888,7 +930,9 @@ class LoadImagesAndLabels(Dataset):
|
|||
return img4, labels4
|
||||
|
||||
def load_mosaic9(self, index):
|
||||
# YOLOv5 9-mosaic loader. Loads 1 image + 8 random images into a 9-image mosaic
|
||||
"""Loads 1 image + 8 random images into a 9-image mosaic for augmented YOLOv5 training, returning labels and
|
||||
segments.
|
||||
"""
|
||||
labels9, segments9 = [], []
|
||||
s = self.img_size
|
||||
indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices
|
||||
|
@ -968,6 +1012,7 @@ class LoadImagesAndLabels(Dataset):
|
|||
|
||||
@staticmethod
|
||||
def collate_fn(batch):
|
||||
"""Batches images, labels, paths, and shapes, assigning unique indices to targets in merged label tensor."""
|
||||
im, label, path, shapes = zip(*batch) # transposed
|
||||
for i, lb in enumerate(label):
|
||||
lb[:, 0] = i # add target image index for build_targets()
|
||||
|
@ -975,6 +1020,7 @@ class LoadImagesAndLabels(Dataset):
|
|||
|
||||
@staticmethod
|
||||
def collate_fn4(batch):
|
||||
"""Bundles a batch's data by quartering the number of shapes and paths, preparing it for model input."""
|
||||
im, label, path, shapes = zip(*batch) # transposed
|
||||
n = len(shapes) // 4
|
||||
im4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]
|
||||
|
@ -1003,7 +1049,9 @@ class LoadImagesAndLabels(Dataset):
|
|||
|
||||
# Ancillary functions --------------------------------------------------------------------------------------------------
|
||||
def flatten_recursive(path=DATASETS_DIR / "coco128"):
|
||||
# Flatten a recursive directory by bringing all files to top level
|
||||
"""Flattens a directory by copying all files from subdirectories to a new top-level directory, preserving
|
||||
filenames.
|
||||
"""
|
||||
new_path = Path(f"{str(path)}_flat")
|
||||
if os.path.exists(new_path):
|
||||
shutil.rmtree(new_path) # delete output folder
|
||||
|
@ -1073,7 +1121,7 @@ def autosplit(path=DATASETS_DIR / "coco128/images", weights=(0.9, 0.1, 0.0), ann
|
|||
|
||||
|
||||
def verify_image_label(args):
|
||||
# Verify one image-label pair
|
||||
"""Verifies a single image-label pair, ensuring image format, size, and legal label values."""
|
||||
im_file, lb_file, prefix = args
|
||||
nm, nf, ne, nc, msg, segments = 0, 0, 0, 0, "", [] # number (missing, found, empty, corrupt), message, segments
|
||||
try:
|
||||
|
@ -1141,7 +1189,9 @@ class HUBDatasetStats:
|
|||
"""
|
||||
|
||||
def __init__(self, path="coco128.yaml", autodownload=False):
|
||||
# Initialize class
|
||||
"""Initializes HUBDatasetStats with optional auto-download for datasets, given a path to dataset YAML or ZIP
|
||||
file.
|
||||
"""
|
||||
zipped, data_dir, yaml_path = self._unzip(Path(path))
|
||||
try:
|
||||
with open(check_yaml(yaml_path), errors="ignore") as f:
|
||||
|
@ -1160,7 +1210,9 @@ class HUBDatasetStats:
|
|||
|
||||
@staticmethod
|
||||
def _find_yaml(dir):
|
||||
# Return data.yaml file
|
||||
"""Finds and returns the path to a single '.yaml' file in the specified directory, preferring files that match
|
||||
the directory name.
|
||||
"""
|
||||
files = list(dir.glob("*.yaml")) or list(dir.rglob("*.yaml")) # try root level first and then recursive
|
||||
assert files, f"No *.yaml file found in {dir}"
|
||||
if len(files) > 1:
|
||||
|
@ -1170,7 +1222,7 @@ class HUBDatasetStats:
|
|||
return files[0]
|
||||
|
||||
def _unzip(self, path):
|
||||
# Unzip data.zip
|
||||
"""Unzips a .zip file at 'path', returning success status, unzipped directory, and path to YAML file within."""
|
||||
if not str(path).endswith(".zip"): # path is data.yaml
|
||||
return False, None, path
|
||||
assert Path(path).is_file(), f"Error unzipping {path}, file not found"
|
||||
|
@ -1180,7 +1232,7 @@ class HUBDatasetStats:
|
|||
return True, str(dir), self._find_yaml(dir) # zipped, data_dir, yaml_path
|
||||
|
||||
def _hub_ops(self, f, max_dim=1920):
|
||||
# HUB ops for 1 image 'f': resize and save at reduced quality in /dataset-hub for web/app viewing
|
||||
"""Resizes and saves an image at reduced quality for web/app viewing, supporting both PIL and OpenCV."""
|
||||
f_new = self.im_dir / Path(f).name # dataset-hub image filename
|
||||
try: # use PIL
|
||||
im = Image.open(f)
|
||||
|
@ -1198,7 +1250,8 @@ class HUBDatasetStats:
|
|||
cv2.imwrite(str(f_new), im)
|
||||
|
||||
def get_json(self, save=False, verbose=False):
|
||||
# Return dataset JSON for Ultralytics HUB
|
||||
"""Generates dataset JSON for Ultralytics HUB, optionally saves or prints it; save=bool, verbose=bool."""
|
||||
|
||||
def _round(labels):
|
||||
# Update labels to integer class and 6 decimal place floats
|
||||
return [[int(c), *(round(x, 4) for x in points)] for c, *points in labels]
|
||||
|
@ -1235,7 +1288,9 @@ class HUBDatasetStats:
|
|||
return self.stats
|
||||
|
||||
def process_images(self):
|
||||
# Compress images for Ultralytics HUB
|
||||
"""Compresses images for Ultralytics HUB across 'train', 'val', 'test' splits and saves to specified
|
||||
directory.
|
||||
"""
|
||||
for split in "train", "val", "test":
|
||||
if self.data.get(split) is None:
|
||||
continue
|
||||
|
@ -1259,6 +1314,9 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
|
|||
"""
|
||||
|
||||
def __init__(self, root, augment, imgsz, cache=False):
|
||||
"""Initializes YOLOv5 Classification Dataset with optional caching, augmentations, and transforms for image
|
||||
classification.
|
||||
"""
|
||||
super().__init__(root=root)
|
||||
self.torch_transforms = classify_transforms(imgsz)
|
||||
self.album_transforms = classify_albumentations(augment, imgsz) if augment else None
|
||||
|
@ -1267,6 +1325,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
|
|||
self.samples = [list(x) + [Path(x[0]).with_suffix(".npy"), None] for x in self.samples] # file, index, npy, im
|
||||
|
||||
def __getitem__(self, i):
|
||||
"""Fetches and transforms an image sample by index, supporting RAM/disk caching and Augmentations."""
|
||||
f, j, fn, im = self.samples[i] # filename, index, filename.with_suffix('.npy'), image
|
||||
if self.cache_ram and im is None:
|
||||
im = self.samples[i][3] = cv2.imread(f)
|
||||
|
|
|
@ -11,7 +11,7 @@ import torch
|
|||
|
||||
|
||||
def is_url(url, check=True):
|
||||
# Check if string is URL and check if URL exists
|
||||
"""Determines if a string is a URL and optionally checks its existence online, returning a boolean."""
|
||||
try:
|
||||
url = str(url)
|
||||
result = urllib.parse.urlparse(url)
|
||||
|
@ -22,13 +22,17 @@ def is_url(url, check=True):
|
|||
|
||||
|
||||
def gsutil_getsize(url=""):
|
||||
# gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
|
||||
"""
|
||||
Returns the size in bytes of a file at a Google Cloud Storage URL using `gsutil du`.
|
||||
|
||||
Returns 0 if the command fails or output is empty.
|
||||
"""
|
||||
output = subprocess.check_output(["gsutil", "du", url], shell=True, encoding="utf-8")
|
||||
return int(output.split()[0]) if output else 0
|
||||
|
||||
|
||||
def url_getsize(url="https://ultralytics.com/images/bus.jpg"):
|
||||
# Return downloadable file size in bytes
|
||||
"""Returns the size in bytes of a downloadable file at a given URL; defaults to -1 if not found."""
|
||||
response = requests.head(url, allow_redirects=True)
|
||||
return int(response.headers.get("content-length", -1))
|
||||
|
||||
|
@ -54,7 +58,11 @@ def curl_download(url, filename, *, silent: bool = False) -> bool:
|
|||
|
||||
|
||||
def safe_download(file, url, url2=None, min_bytes=1e0, error_msg=""):
|
||||
# Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
|
||||
"""
|
||||
Downloads a file from a URL (or alternate URL) to a specified path if file is above a minimum size.
|
||||
|
||||
Removes incomplete downloads.
|
||||
"""
|
||||
from utils.general import LOGGER
|
||||
|
||||
file = Path(file)
|
||||
|
@ -78,7 +86,9 @@ def safe_download(file, url, url2=None, min_bytes=1e0, error_msg=""):
|
|||
|
||||
|
||||
def attempt_download(file, repo="ultralytics/yolov5", release="v7.0"):
|
||||
# Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v7.0', etc.
|
||||
"""Downloads a file from GitHub release assets or via direct URL if not found locally, supporting backup
|
||||
versions.
|
||||
"""
|
||||
from utils.general import LOGGER
|
||||
|
||||
def github_assets(repository, version="latest"):
|
||||
|
|
|
@ -16,6 +16,9 @@ DETECTION_URL = "/v1/object-detection/<model>"
|
|||
|
||||
@app.route(DETECTION_URL, methods=["POST"])
|
||||
def predict(model):
|
||||
"""Predict and return object detections in JSON format given an image and model name via a Flask REST API POST
|
||||
request.
|
||||
"""
|
||||
if request.method != "POST":
|
||||
return
|
||||
|
||||
|
|
161
utils/general.py
161
utils/general.py
|
@ -71,18 +71,18 @@ os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" # suppress verbose TF compiler warning
|
|||
|
||||
|
||||
def is_ascii(s=""):
|
||||
# Is string composed of all ASCII (no UTF) characters? (note str().isascii() introduced in python 3.7)
|
||||
"""Checks if input string `s` contains only ASCII characters; returns `True` if so, otherwise `False`."""
|
||||
s = str(s) # convert list, tuple, None, etc. to str
|
||||
return len(s.encode().decode("ascii", "ignore")) == len(s)
|
||||
|
||||
|
||||
def is_chinese(s="人工智能"):
|
||||
# Is string composed of any Chinese characters?
|
||||
"""Determines if a string `s` contains any Chinese characters; returns `True` if so, otherwise `False`."""
|
||||
return bool(re.search("[\u4e00-\u9fff]", str(s)))
|
||||
|
||||
|
||||
def is_colab():
|
||||
# Is environment a Google Colab instance?
|
||||
"""Checks if the current environment is a Google Colab instance; returns `True` for Colab, otherwise `False`."""
|
||||
return "google.colab" in sys.modules
|
||||
|
||||
|
||||
|
@ -101,7 +101,7 @@ def is_jupyter():
|
|||
|
||||
|
||||
def is_kaggle():
|
||||
# Is environment a Kaggle Notebook?
|
||||
"""Checks if the current environment is a Kaggle Notebook by validating environment variables."""
|
||||
return os.environ.get("PWD") == "/kaggle/working" and os.environ.get("KAGGLE_URL_BASE") == "https://www.kaggle.com"
|
||||
|
||||
|
||||
|
@ -117,7 +117,7 @@ def is_docker() -> bool:
|
|||
|
||||
|
||||
def is_writeable(dir, test=False):
|
||||
# Return True if directory has write permissions, test opening a file with write permissions if test=True
|
||||
"""Checks if a directory is writable, optionally testing by creating a temporary file if `test=True`."""
|
||||
if not test:
|
||||
return os.access(dir, os.W_OK) # possible issues on Windows
|
||||
file = Path(dir) / "tmp.txt"
|
||||
|
@ -134,7 +134,7 @@ LOGGING_NAME = "yolov5"
|
|||
|
||||
|
||||
def set_logging(name=LOGGING_NAME, verbose=True):
|
||||
# sets up logging for the given name
|
||||
"""Configures logging with specified verbosity; `name` sets the logger's name, `verbose` controls logging level."""
|
||||
rank = int(os.getenv("RANK", -1)) # rank in world for Multi-GPU trainings
|
||||
level = logging.INFO if verbose and rank in {-1, 0} else logging.ERROR
|
||||
logging.config.dictConfig(
|
||||
|
@ -168,7 +168,9 @@ if platform.system() == "Windows":
|
|||
|
||||
|
||||
def user_config_dir(dir="Ultralytics", env_var="YOLOV5_CONFIG_DIR"):
|
||||
# Return path of user configuration directory. Prefer environment variable if exists. Make dir if required.
|
||||
"""Returns user configuration directory path, preferring environment variable `YOLOV5_CONFIG_DIR` if set, else OS-
|
||||
specific.
|
||||
"""
|
||||
env = os.getenv(env_var)
|
||||
if env:
|
||||
path = Path(env) # use environment variable
|
||||
|
@ -186,19 +188,23 @@ CONFIG_DIR = user_config_dir() # Ultralytics settings dir
|
|||
class Profile(contextlib.ContextDecorator):
|
||||
# YOLOv5 Profile class. Usage: @Profile() decorator or 'with Profile():' context manager
|
||||
def __init__(self, t=0.0, device: torch.device = None):
|
||||
"""Initializes a profiling context for YOLOv5 with optional timing threshold and device specification."""
|
||||
self.t = t
|
||||
self.device = device
|
||||
self.cuda = bool(device and str(device).startswith("cuda"))
|
||||
|
||||
def __enter__(self):
|
||||
"""Initializes timing at the start of a profiling context block for performance measurement."""
|
||||
self.start = self.time()
|
||||
return self
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
"""Concludes timing, updating duration for profiling upon exiting a context block."""
|
||||
self.dt = self.time() - self.start # delta-time
|
||||
self.t += self.dt # accumulate dt
|
||||
|
||||
def time(self):
|
||||
"""Measures and returns the current time, synchronizing CUDA operations if `cuda` is True."""
|
||||
if self.cuda:
|
||||
torch.cuda.synchronize(self.device)
|
||||
return time.time()
|
||||
|
@ -207,19 +213,23 @@ class Profile(contextlib.ContextDecorator):
|
|||
class Timeout(contextlib.ContextDecorator):
|
||||
# YOLOv5 Timeout class. Usage: @Timeout(seconds) decorator or 'with Timeout(seconds):' context manager
|
||||
def __init__(self, seconds, *, timeout_msg="", suppress_timeout_errors=True):
|
||||
"""Initializes a timeout context/decorator with defined seconds, optional message, and error suppression."""
|
||||
self.seconds = int(seconds)
|
||||
self.timeout_message = timeout_msg
|
||||
self.suppress = bool(suppress_timeout_errors)
|
||||
|
||||
def _timeout_handler(self, signum, frame):
|
||||
"""Raises a TimeoutError with a custom message when a timeout event occurs."""
|
||||
raise TimeoutError(self.timeout_message)
|
||||
|
||||
def __enter__(self):
|
||||
"""Initializes timeout mechanism on non-Windows platforms, starting a countdown to raise TimeoutError."""
|
||||
if platform.system() != "Windows": # not supported on Windows
|
||||
signal.signal(signal.SIGALRM, self._timeout_handler) # Set handler for SIGALRM
|
||||
signal.alarm(self.seconds) # start countdown for SIGALRM to be raised
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Disables active alarm on non-Windows systems and optionally suppresses TimeoutError if set."""
|
||||
if platform.system() != "Windows":
|
||||
signal.alarm(0) # Cancel SIGALRM if it's scheduled
|
||||
if self.suppress and exc_type is TimeoutError: # Suppress TimeoutError
|
||||
|
@ -229,23 +239,26 @@ class Timeout(contextlib.ContextDecorator):
|
|||
class WorkingDirectory(contextlib.ContextDecorator):
|
||||
# Usage: @WorkingDirectory(dir) decorator or 'with WorkingDirectory(dir):' context manager
|
||||
def __init__(self, new_dir):
|
||||
"""Initializes a context manager/decorator to temporarily change the working directory."""
|
||||
self.dir = new_dir # new dir
|
||||
self.cwd = Path.cwd().resolve() # current dir
|
||||
|
||||
def __enter__(self):
|
||||
"""Temporarily changes the working directory within a 'with' statement context."""
|
||||
os.chdir(self.dir)
|
||||
|
||||
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||
"""Restores the original working directory upon exiting a 'with' statement context."""
|
||||
os.chdir(self.cwd)
|
||||
|
||||
|
||||
def methods(instance):
|
||||
# Get class/instance methods
|
||||
"""Returns list of method names for a class/instance excluding dunder methods."""
|
||||
return [f for f in dir(instance) if callable(getattr(instance, f)) and not f.startswith("__")]
|
||||
|
||||
|
||||
def print_args(args: Optional[dict] = None, show_file=True, show_func=False):
|
||||
# Print function arguments (optional args dict)
|
||||
"""Logs the arguments of the calling function, with options to include the filename and function name."""
|
||||
x = inspect.currentframe().f_back # previous frame
|
||||
file, _, func, _, _ = inspect.getframeinfo(x)
|
||||
if args is None: # get args automatically
|
||||
|
@ -260,7 +273,11 @@ def print_args(args: Optional[dict] = None, show_file=True, show_func=False):
|
|||
|
||||
|
||||
def init_seeds(seed=0, deterministic=False):
|
||||
# Initialize random number generator (RNG) seeds https://pytorch.org/docs/stable/notes/randomness.html
|
||||
"""
|
||||
Initializes RNG seeds and sets deterministic options if specified.
|
||||
|
||||
See https://pytorch.org/docs/stable/notes/randomness.html
|
||||
"""
|
||||
random.seed(seed)
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
|
@ -275,36 +292,38 @@ def init_seeds(seed=0, deterministic=False):
|
|||
|
||||
|
||||
def intersect_dicts(da, db, exclude=()):
|
||||
# Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
|
||||
"""Returns intersection of `da` and `db` dicts with matching keys and shapes, excluding `exclude` keys; uses `da`
|
||||
values.
|
||||
"""
|
||||
return {k: v for k, v in da.items() if k in db and all(x not in k for x in exclude) and v.shape == db[k].shape}
|
||||
|
||||
|
||||
def get_default_args(func):
|
||||
# Get func() default arguments
|
||||
"""Returns a dict of `func` default arguments by inspecting its signature."""
|
||||
signature = inspect.signature(func)
|
||||
return {k: v.default for k, v in signature.parameters.items() if v.default is not inspect.Parameter.empty}
|
||||
|
||||
|
||||
def get_latest_run(search_dir="."):
|
||||
# Return path to most recent 'last.pt' in /runs (i.e. to --resume from)
|
||||
"""Returns the path to the most recent 'last.pt' file in /runs to resume from, searches in `search_dir`."""
|
||||
last_list = glob.glob(f"{search_dir}/**/last*.pt", recursive=True)
|
||||
return max(last_list, key=os.path.getctime) if last_list else ""
|
||||
|
||||
|
||||
def file_age(path=__file__):
|
||||
# Return days since last file update
|
||||
"""Calculates and returns the age of a file in days based on its last modification time."""
|
||||
dt = datetime.now() - datetime.fromtimestamp(Path(path).stat().st_mtime) # delta
|
||||
return dt.days # + dt.seconds / 86400 # fractional days
|
||||
|
||||
|
||||
def file_date(path=__file__):
|
||||
# Return human-readable file modification date, i.e. '2021-3-26'
|
||||
"""Returns a human-readable file modification date in 'YYYY-M-D' format, given a file path."""
|
||||
t = datetime.fromtimestamp(Path(path).stat().st_mtime)
|
||||
return f"{t.year}-{t.month}-{t.day}"
|
||||
|
||||
|
||||
def file_size(path):
|
||||
# Return file/dir size (MB)
|
||||
"""Returns file or directory size in megabytes (MB) for a given path, where directories are recursively summed."""
|
||||
mb = 1 << 20 # bytes to MiB (1024 ** 2)
|
||||
path = Path(path)
|
||||
if path.is_file():
|
||||
|
@ -316,7 +335,9 @@ def file_size(path):
|
|||
|
||||
|
||||
def check_online():
|
||||
# Check internet connectivity
|
||||
"""Checks internet connectivity by attempting to create a connection to "1.1.1.1" on port 443, retries once if the
|
||||
first attempt fails.
|
||||
"""
|
||||
import socket
|
||||
|
||||
def run_once():
|
||||
|
@ -342,7 +363,9 @@ def git_describe(path=ROOT): # path must be a directory
|
|||
@TryExcept()
|
||||
@WorkingDirectory(ROOT)
|
||||
def check_git_status(repo="ultralytics/yolov5", branch="master"):
|
||||
# YOLOv5 status check, recommend 'git pull' if code is out of date
|
||||
"""Checks if YOLOv5 code is up-to-date with the repository, advising 'git pull' if behind; errors return informative
|
||||
messages.
|
||||
"""
|
||||
url = f"https://github.com/{repo}"
|
||||
msg = f", for updates see {url}"
|
||||
s = colorstr("github: ") # string
|
||||
|
@ -369,7 +392,7 @@ def check_git_status(repo="ultralytics/yolov5", branch="master"):
|
|||
|
||||
@WorkingDirectory(ROOT)
|
||||
def check_git_info(path="."):
|
||||
# YOLOv5 git info check, return {remote, branch, commit}
|
||||
"""Checks YOLOv5 git info, returning a dict with remote URL, branch name, and commit hash."""
|
||||
check_requirements("gitpython")
|
||||
import git
|
||||
|
||||
|
@ -387,12 +410,12 @@ def check_git_info(path="."):
|
|||
|
||||
|
||||
def check_python(minimum="3.8.0"):
|
||||
# Check current python version vs. required python version
|
||||
"""Checks if current Python version meets the minimum required version, exits if not."""
|
||||
check_version(platform.python_version(), minimum, name="Python ", hard=True)
|
||||
|
||||
|
||||
def check_version(current="0.0.0", minimum="0.0.0", name="version ", pinned=False, hard=False, verbose=False):
|
||||
# Check version vs. required version
|
||||
"""Checks if the current version meets the minimum required version, exits or warns based on parameters."""
|
||||
current, minimum = (pkg.parse_version(x) for x in (current, minimum))
|
||||
result = (current == minimum) if pinned else (current >= minimum) # bool
|
||||
s = f"WARNING ⚠️ {name}{minimum} is required by YOLOv5, but {name}{current} is currently installed" # string
|
||||
|
@ -404,7 +427,7 @@ def check_version(current="0.0.0", minimum="0.0.0", name="version ", pinned=Fals
|
|||
|
||||
|
||||
def check_img_size(imgsz, s=32, floor=0):
|
||||
# Verify image size is a multiple of stride s in each dimension
|
||||
"""Adjusts image size to be divisible by stride `s`, supports int or list/tuple input, returns adjusted size."""
|
||||
if isinstance(imgsz, int): # integer i.e. img_size=640
|
||||
new_size = max(make_divisible(imgsz, int(s)), floor)
|
||||
else: # list i.e. img_size=[640, 480]
|
||||
|
@ -416,7 +439,7 @@ def check_img_size(imgsz, s=32, floor=0):
|
|||
|
||||
|
||||
def check_imshow(warn=False):
|
||||
# Check if environment supports image displays
|
||||
"""Checks environment support for image display; warns on failure if `warn=True`."""
|
||||
try:
|
||||
assert not is_jupyter()
|
||||
assert not is_docker()
|
||||
|
@ -432,7 +455,7 @@ def check_imshow(warn=False):
|
|||
|
||||
|
||||
def check_suffix(file="yolov5s.pt", suffix=(".pt",), msg=""):
|
||||
# Check file(s) for acceptable suffix
|
||||
"""Validates if a file or files have an acceptable suffix, raising an error if not."""
|
||||
if file and suffix:
|
||||
if isinstance(suffix, str):
|
||||
suffix = [suffix]
|
||||
|
@ -443,12 +466,12 @@ def check_suffix(file="yolov5s.pt", suffix=(".pt",), msg=""):
|
|||
|
||||
|
||||
def check_yaml(file, suffix=(".yaml", ".yml")):
|
||||
# Search/download YAML file (if necessary) and return path, checking suffix
|
||||
"""Searches/downloads a YAML file, verifies its suffix (.yaml or .yml), and returns the file path."""
|
||||
return check_file(file, suffix)
|
||||
|
||||
|
||||
def check_file(file, suffix=""):
|
||||
# Search/download file (if necessary) and return path
|
||||
"""Searches/downloads a file, checks its suffix (if provided), and returns the file path."""
|
||||
check_suffix(file, suffix) # optional
|
||||
file = str(file) # convert to str()
|
||||
if os.path.isfile(file) or not file: # exists
|
||||
|
@ -478,7 +501,7 @@ def check_file(file, suffix=""):
|
|||
|
||||
|
||||
def check_font(font=FONT, progress=False):
|
||||
# Download font to CONFIG_DIR if necessary
|
||||
"""Ensures specified font exists or downloads it from Ultralytics assets, optionally displaying progress."""
|
||||
font = Path(font)
|
||||
file = CONFIG_DIR / font.name
|
||||
if not font.exists() and not file.exists():
|
||||
|
@ -488,7 +511,7 @@ def check_font(font=FONT, progress=False):
|
|||
|
||||
|
||||
def check_dataset(data, autodownload=True):
|
||||
# Download, check and/or unzip dataset if not found locally
|
||||
"""Validates and/or auto-downloads a dataset, returning its configuration as a dictionary."""
|
||||
|
||||
# Download (optional)
|
||||
extract_dir = ""
|
||||
|
@ -554,7 +577,7 @@ def check_dataset(data, autodownload=True):
|
|||
|
||||
|
||||
def check_amp(model):
|
||||
# Check PyTorch Automatic Mixed Precision (AMP) functionality. Return True on correct operation
|
||||
"""Checks PyTorch AMP functionality for a model, returns True if AMP operates correctly, otherwise False."""
|
||||
from models.common import AutoShape, DetectMultiBackend
|
||||
|
||||
def amp_allclose(model, im):
|
||||
|
@ -582,19 +605,23 @@ def check_amp(model):
|
|||
|
||||
|
||||
def yaml_load(file="data.yaml"):
|
||||
# Single-line safe yaml loading
|
||||
"""Safely loads and returns the contents of a YAML file specified by `file` argument."""
|
||||
with open(file, errors="ignore") as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
|
||||
def yaml_save(file="data.yaml", data={}):
|
||||
# Single-line safe yaml saving
|
||||
"""Safely saves `data` to a YAML file specified by `file`, converting `Path` objects to strings; `data` is a
|
||||
dictionary.
|
||||
"""
|
||||
with open(file, "w") as f:
|
||||
yaml.safe_dump({k: str(v) if isinstance(v, Path) else v for k, v in data.items()}, f, sort_keys=False)
|
||||
|
||||
|
||||
def unzip_file(file, path=None, exclude=(".DS_Store", "__MACOSX")):
|
||||
# Unzip a *.zip file to path/, excluding files containing strings in exclude list
|
||||
"""Unzips `file` to `path` (default: file's parent), excluding filenames containing any in `exclude` (`.DS_Store`,
|
||||
`__MACOSX`).
|
||||
"""
|
||||
if path is None:
|
||||
path = Path(file).parent # default path
|
||||
with ZipFile(file) as zipObj:
|
||||
|
@ -604,13 +631,18 @@ def unzip_file(file, path=None, exclude=(".DS_Store", "__MACOSX")):
|
|||
|
||||
|
||||
def url2file(url):
|
||||
# Convert URL to filename, i.e. https://url.com/file.txt?auth -> file.txt
|
||||
"""
|
||||
Converts a URL string to a valid filename by stripping protocol, domain, and any query parameters.
|
||||
|
||||
Example https://url.com/file.txt?auth -> file.txt
|
||||
"""
|
||||
url = str(Path(url)).replace(":/", "://") # Pathlib turns :// -> :/
|
||||
return Path(urllib.parse.unquote(url)).name.split("?")[0] # '%2F' to '/', split https://url.com/file.txt?auth
|
||||
|
||||
|
||||
def download(url, dir=".", unzip=True, delete=True, curl=False, threads=1, retry=3):
|
||||
# Multithreaded file download and unzip function, used in data.yaml for autodownload
|
||||
"""Downloads and optionally unzips files concurrently, supporting retries and curl fallback."""
|
||||
|
||||
def download_one(url, dir):
|
||||
# Download 1 file
|
||||
success = True
|
||||
|
@ -656,24 +688,34 @@ def download(url, dir=".", unzip=True, delete=True, curl=False, threads=1, retry
|
|||
|
||||
|
||||
def make_divisible(x, divisor):
|
||||
# Returns nearest x divisible by divisor
|
||||
"""Adjusts `x` to be divisible by `divisor`, returning the nearest greater or equal value."""
|
||||
if isinstance(divisor, torch.Tensor):
|
||||
divisor = int(divisor.max()) # to int
|
||||
return math.ceil(x / divisor) * divisor
|
||||
|
||||
|
||||
def clean_str(s):
|
||||
# Cleans a string by replacing special characters with underscore _
|
||||
"""Cleans a string by replacing special characters with underscore, e.g., `clean_str('#example!')` returns
|
||||
'_example_'.
|
||||
"""
|
||||
return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)
|
||||
|
||||
|
||||
def one_cycle(y1=0.0, y2=1.0, steps=100):
|
||||
# lambda function for sinusoidal ramp from y1 to y2 https://arxiv.org/pdf/1812.01187.pdf
|
||||
"""
|
||||
Generates a lambda for a sinusoidal ramp from y1 to y2 over 'steps'.
|
||||
|
||||
See https://arxiv.org/pdf/1812.01187.pdf for details.
|
||||
"""
|
||||
return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1
|
||||
|
||||
|
||||
def colorstr(*input):
|
||||
# Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world')
|
||||
"""
|
||||
Colors a string using ANSI escape codes, e.g., colorstr('blue', 'hello world').
|
||||
|
||||
See https://en.wikipedia.org/wiki/ANSI_escape_code.
|
||||
"""
|
||||
*args, string = input if len(input) > 1 else ("blue", "bold", input[0]) # color arguments, string
|
||||
colors = {
|
||||
"black": "\033[30m", # basic colors
|
||||
|
@ -700,7 +742,7 @@ def colorstr(*input):
|
|||
|
||||
|
||||
def labels_to_class_weights(labels, nc=80):
|
||||
# Get class weights (inverse frequency) from training labels
|
||||
"""Calculates class weights from labels to handle class imbalance in training; input shape: (n, 5)."""
|
||||
if labels[0] is None: # no labels loaded
|
||||
return torch.Tensor()
|
||||
|
||||
|
@ -719,7 +761,7 @@ def labels_to_class_weights(labels, nc=80):
|
|||
|
||||
|
||||
def labels_to_image_weights(labels, nc=80, class_weights=np.ones(80)):
|
||||
# Produces image weights based on class_weights and image contents
|
||||
"""Calculates image weights from labels using class weights for weighted sampling."""
|
||||
# Usage: index = random.choices(range(n), weights=image_weights, k=1) # weighted image sample
|
||||
class_counts = np.array([np.bincount(x[:, 0].astype(int), minlength=nc) for x in labels])
|
||||
return (class_weights.reshape(1, nc) * class_counts).sum(1)
|
||||
|
@ -816,7 +858,7 @@ def coco80_to_coco91_class(): # converts 80-index (val2014) to 91-index (paper)
|
|||
|
||||
|
||||
def xyxy2xywh(x):
|
||||
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right
|
||||
"""Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] where xy1=top-left, xy2=bottom-right."""
|
||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||
y[..., 0] = (x[..., 0] + x[..., 2]) / 2 # x center
|
||||
y[..., 1] = (x[..., 1] + x[..., 3]) / 2 # y center
|
||||
|
@ -826,7 +868,7 @@ def xyxy2xywh(x):
|
|||
|
||||
|
||||
def xywh2xyxy(x):
|
||||
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
|
||||
"""Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right."""
|
||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||
y[..., 0] = x[..., 0] - x[..., 2] / 2 # top left x
|
||||
y[..., 1] = x[..., 1] - x[..., 3] / 2 # top left y
|
||||
|
@ -836,7 +878,7 @@ def xywh2xyxy(x):
|
|||
|
||||
|
||||
def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
|
||||
# Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
|
||||
"""Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right."""
|
||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||
y[..., 0] = w * (x[..., 0] - x[..., 2] / 2) + padw # top left x
|
||||
y[..., 1] = h * (x[..., 1] - x[..., 3] / 2) + padh # top left y
|
||||
|
@ -846,7 +888,7 @@ def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
|
|||
|
||||
|
||||
def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
|
||||
# Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right
|
||||
"""Convert nx4 boxes from [x1, y1, x2, y2] to [x, y, w, h] normalized where xy1=top-left, xy2=bottom-right."""
|
||||
if clip:
|
||||
clip_boxes(x, (h - eps, w - eps)) # warning: inplace clip
|
||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||
|
@ -858,7 +900,7 @@ def xyxy2xywhn(x, w=640, h=640, clip=False, eps=0.0):
|
|||
|
||||
|
||||
def xyn2xy(x, w=640, h=640, padw=0, padh=0):
|
||||
# Convert normalized segments into pixel segments, shape (n,2)
|
||||
"""Convert normalized segments into pixel segments, shape (n,2)."""
|
||||
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
|
||||
y[..., 0] = w * x[..., 0] + padw # top left x
|
||||
y[..., 1] = h * x[..., 1] + padh # top left y
|
||||
|
@ -866,7 +908,7 @@ def xyn2xy(x, w=640, h=640, padw=0, padh=0):
|
|||
|
||||
|
||||
def segment2box(segment, width=640, height=640):
|
||||
# Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)
|
||||
"""Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)."""
|
||||
x, y = segment.T # segment xy
|
||||
inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
|
||||
(
|
||||
|
@ -877,7 +919,7 @@ def segment2box(segment, width=640, height=640):
|
|||
|
||||
|
||||
def segments2boxes(segments):
|
||||
# Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
|
||||
"""Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)."""
|
||||
boxes = []
|
||||
for s in segments:
|
||||
x, y = s.T # segment xy
|
||||
|
@ -886,7 +928,7 @@ def segments2boxes(segments):
|
|||
|
||||
|
||||
def resample_segments(segments, n=1000):
|
||||
# Up-sample an (n,2) segment
|
||||
"""Resamples an (n,2) segment to a fixed number of points for consistent representation."""
|
||||
for i, s in enumerate(segments):
|
||||
s = np.concatenate((s, s[0:1, :]), axis=0)
|
||||
x = np.linspace(0, len(s) - 1, n)
|
||||
|
@ -896,7 +938,7 @@ def resample_segments(segments, n=1000):
|
|||
|
||||
|
||||
def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
|
||||
# Rescale boxes (xyxy) from img1_shape to img0_shape
|
||||
"""Rescales (xyxy) bounding boxes from img1_shape to img0_shape, optionally using provided `ratio_pad`."""
|
||||
if ratio_pad is None: # calculate from img0_shape
|
||||
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
||||
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
|
||||
|
@ -912,7 +954,7 @@ def scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None):
|
|||
|
||||
|
||||
def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False):
|
||||
# Rescale coords (xyxy) from img1_shape to img0_shape
|
||||
"""Rescales segment coordinates from img1_shape to img0_shape, optionally normalizing them with custom padding."""
|
||||
if ratio_pad is None: # calculate from img0_shape
|
||||
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
|
||||
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
|
||||
|
@ -931,7 +973,7 @@ def scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=F
|
|||
|
||||
|
||||
def clip_boxes(boxes, shape):
|
||||
# Clip boxes (xyxy) to image shape (height, width)
|
||||
"""Clips bounding box coordinates (xyxy) to fit within the specified image shape (height, width)."""
|
||||
if isinstance(boxes, torch.Tensor): # faster individually
|
||||
boxes[..., 0].clamp_(0, shape[1]) # x1
|
||||
boxes[..., 1].clamp_(0, shape[0]) # y1
|
||||
|
@ -943,7 +985,7 @@ def clip_boxes(boxes, shape):
|
|||
|
||||
|
||||
def clip_segments(segments, shape):
|
||||
# Clip segments (xy1,xy2,...) to image shape (height, width)
|
||||
"""Clips segment coordinates (xy1, xy2, ...) to an image's boundaries given its shape (height, width)."""
|
||||
if isinstance(segments, torch.Tensor): # faster individually
|
||||
segments[:, 0].clamp_(0, shape[1]) # x
|
||||
segments[:, 1].clamp_(0, shape[0]) # y
|
||||
|
@ -1083,6 +1125,7 @@ def strip_optimizer(f="best.pt", s=""): # from utils.general import *; strip_op
|
|||
|
||||
|
||||
def print_mutation(keys, results, hyp, save_dir, bucket, prefix=colorstr("evolve: ")):
|
||||
"""Logs evolution results and saves to CSV and YAML in `save_dir`, optionally syncs with `bucket`."""
|
||||
evolve_csv = save_dir / "evolve.csv"
|
||||
evolve_yaml = save_dir / "hyp_evolve.yaml"
|
||||
keys = tuple(keys) + tuple(hyp.keys()) # [results + hyps]
|
||||
|
@ -1137,7 +1180,7 @@ def print_mutation(keys, results, hyp, save_dir, bucket, prefix=colorstr("evolve
|
|||
|
||||
|
||||
def apply_classifier(x, model, img, im0):
|
||||
# Apply a second stage classifier to YOLO outputs
|
||||
"""Applies second-stage classifier to YOLO outputs, filtering detections by class match."""
|
||||
# Example model = torchvision.models.__dict__['efficientnet_b0'](pretrained=True).to(device).eval()
|
||||
im0 = [im0] if isinstance(im0, np.ndarray) else im0
|
||||
for i, d in enumerate(x): # per image
|
||||
|
@ -1172,7 +1215,12 @@ def apply_classifier(x, model, img, im0):
|
|||
|
||||
|
||||
def increment_path(path, exist_ok=False, sep="", mkdir=False):
|
||||
# Increment file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc.
|
||||
"""
|
||||
Generates an incremented file or directory path if it exists, with optional mkdir; args: path, exist_ok=False,
|
||||
sep="", mkdir=False.
|
||||
|
||||
Example: runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc
|
||||
"""
|
||||
path = Path(path) # os-agnostic
|
||||
if path.exists() and not exist_ok:
|
||||
path, suffix = (path.with_suffix(""), path.suffix) if path.is_file() else (path, "")
|
||||
|
@ -1202,10 +1250,14 @@ imshow_ = cv2.imshow # copy to avoid recursion errors
|
|||
|
||||
|
||||
def imread(filename, flags=cv2.IMREAD_COLOR):
|
||||
"""Reads an image from a file and returns it as a numpy array, using OpenCV's imdecode to support multilanguage
|
||||
paths.
|
||||
"""
|
||||
return cv2.imdecode(np.fromfile(filename, np.uint8), flags)
|
||||
|
||||
|
||||
def imwrite(filename, img):
|
||||
"""Writes an image to a file, returns True on success and False on failure, supports multilanguage paths."""
|
||||
try:
|
||||
cv2.imencode(Path(filename).suffix, img)[1].tofile(filename)
|
||||
return True
|
||||
|
@ -1214,6 +1266,7 @@ def imwrite(filename, img):
|
|||
|
||||
|
||||
def imshow(path, im):
|
||||
"""Displays an image using Unicode path, requires encoded path and image matrix as input."""
|
||||
imshow_(path.encode("unicode_escape").decode(), im)
|
||||
|
||||
|
||||
|
|
|
@ -73,6 +73,7 @@ def _json_default(value):
|
|||
class Loggers:
|
||||
# YOLOv5 Loggers class
|
||||
def __init__(self, save_dir=None, weights=None, opt=None, hyp=None, logger=None, include=LOGGERS):
|
||||
"""Initializes loggers for YOLOv5 training and validation metrics, paths, and options."""
|
||||
self.save_dir = save_dir
|
||||
self.weights = weights
|
||||
self.opt = opt
|
||||
|
@ -150,7 +151,7 @@ class Loggers:
|
|||
|
||||
@property
|
||||
def remote_dataset(self):
|
||||
# Get data_dict if custom dataset artifact link is provided
|
||||
"""Fetches dataset dictionary from remote logging services like ClearML, Weights & Biases, or Comet ML."""
|
||||
data_dict = None
|
||||
if self.clearml:
|
||||
data_dict = self.clearml.data_dict
|
||||
|
@ -162,15 +163,17 @@ class Loggers:
|
|||
return data_dict
|
||||
|
||||
def on_train_start(self):
|
||||
"""Initializes the training process for Comet ML logger if it's configured."""
|
||||
if self.comet_logger:
|
||||
self.comet_logger.on_train_start()
|
||||
|
||||
def on_pretrain_routine_start(self):
|
||||
"""Invokes pre-training routine start hook for Comet ML logger if available."""
|
||||
if self.comet_logger:
|
||||
self.comet_logger.on_pretrain_routine_start()
|
||||
|
||||
def on_pretrain_routine_end(self, labels, names):
|
||||
# Callback runs on pre-train routine end
|
||||
"""Callback that runs at the end of pre-training routine, logging label plots if enabled."""
|
||||
if self.plots:
|
||||
plot_labels(labels, names, self.save_dir)
|
||||
paths = self.save_dir.glob("*labels*.jpg") # training labels
|
||||
|
@ -183,6 +186,7 @@ class Loggers:
|
|||
self.clearml.log_plot(title=path.stem, plot_path=path)
|
||||
|
||||
def on_train_batch_end(self, model, ni, imgs, targets, paths, vals):
|
||||
"""Logs training batch end events, plots images, and updates external loggers with batch-end data."""
|
||||
log_dict = dict(zip(self.keys[:3], vals))
|
||||
# Callback runs on train batch end
|
||||
# ni: number integrated batches (since train start)
|
||||
|
@ -203,7 +207,7 @@ class Loggers:
|
|||
self.comet_logger.on_train_batch_end(log_dict, step=ni)
|
||||
|
||||
def on_train_epoch_end(self, epoch):
|
||||
# Callback runs on train epoch end
|
||||
"""Callback that updates the current epoch in Weights & Biases at the end of a training epoch."""
|
||||
if self.wandb:
|
||||
self.wandb.current_epoch = epoch + 1
|
||||
|
||||
|
@ -211,22 +215,24 @@ class Loggers:
|
|||
self.comet_logger.on_train_epoch_end(epoch)
|
||||
|
||||
def on_val_start(self):
|
||||
"""Callback that signals the start of a validation phase to the Comet logger."""
|
||||
if self.comet_logger:
|
||||
self.comet_logger.on_val_start()
|
||||
|
||||
def on_val_image_end(self, pred, predn, path, names, im):
|
||||
# Callback runs on val image end
|
||||
"""Callback that logs a validation image and its predictions to WandB or ClearML."""
|
||||
if self.wandb:
|
||||
self.wandb.val_one_image(pred, predn, path, names, im)
|
||||
if self.clearml:
|
||||
self.clearml.log_image_with_boxes(path, pred, names, im)
|
||||
|
||||
def on_val_batch_end(self, batch_i, im, targets, paths, shapes, out):
|
||||
"""Logs validation batch results to Comet ML during training at the end of each validation batch."""
|
||||
if self.comet_logger:
|
||||
self.comet_logger.on_val_batch_end(batch_i, im, targets, paths, shapes, out)
|
||||
|
||||
def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix):
|
||||
# Callback runs on val end
|
||||
"""Logs validation results to WandB or ClearML at the end of the validation process."""
|
||||
if self.wandb or self.clearml:
|
||||
files = sorted(self.save_dir.glob("val*.jpg"))
|
||||
if self.wandb:
|
||||
|
@ -238,7 +244,7 @@ class Loggers:
|
|||
self.comet_logger.on_val_end(nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix)
|
||||
|
||||
def on_fit_epoch_end(self, vals, epoch, best_fitness, fi):
|
||||
# Callback runs at the end of each fit (train+val) epoch
|
||||
"""Callback that logs metrics and saves them to CSV or NDJSON at the end of each fit (train+val) epoch."""
|
||||
x = dict(zip(self.keys, vals))
|
||||
if self.csv:
|
||||
file = self.save_dir / "results.csv"
|
||||
|
@ -277,7 +283,7 @@ class Loggers:
|
|||
self.comet_logger.on_fit_epoch_end(x, epoch=epoch)
|
||||
|
||||
def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
|
||||
# Callback runs on model save event
|
||||
"""Callback that handles model saving events, logging to Weights & Biases or ClearML if enabled."""
|
||||
if (epoch + 1) % self.opt.save_period == 0 and not final_epoch and self.opt.save_period != -1:
|
||||
if self.wandb:
|
||||
self.wandb.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
|
||||
|
@ -290,7 +296,7 @@ class Loggers:
|
|||
self.comet_logger.on_model_save(last, epoch, final_epoch, best_fitness, fi)
|
||||
|
||||
def on_train_end(self, last, best, epoch, results):
|
||||
# Callback runs on training end, i.e. saving best model
|
||||
"""Callback that runs at the end of training to save plots and log results."""
|
||||
if self.plots:
|
||||
plot_results(file=self.save_dir / "results.csv") # save results.png
|
||||
files = ["results.png", "confusion_matrix.png", *(f"{x}_curve.png" for x in ("F1", "PR", "P", "R"))]
|
||||
|
@ -326,7 +332,7 @@ class Loggers:
|
|||
self.comet_logger.on_train_end(files, self.save_dir, last, best, epoch, final_results)
|
||||
|
||||
def on_params_update(self, params: dict):
|
||||
# Update hyperparams or configs of the experiment
|
||||
"""Updates experiment hyperparameters or configurations in WandB, Comet, or ClearML."""
|
||||
if self.wandb:
|
||||
self.wandb.wandb_run.config.update(params, allow_val_change=True)
|
||||
if self.comet_logger:
|
||||
|
@ -346,7 +352,7 @@ class GenericLogger:
|
|||
"""
|
||||
|
||||
def __init__(self, opt, console_logger, include=("tb", "wandb", "clearml")):
|
||||
# init default loggers
|
||||
"""Initializes a generic logger with optional TensorBoard, W&B, and ClearML support."""
|
||||
self.save_dir = Path(opt.save_dir)
|
||||
self.include = include
|
||||
self.console_logger = console_logger
|
||||
|
@ -381,7 +387,7 @@ class GenericLogger:
|
|||
self.clearml = None
|
||||
|
||||
def log_metrics(self, metrics, epoch):
|
||||
# Log metrics dictionary to all loggers
|
||||
"""Logs metrics to CSV, TensorBoard, W&B, and ClearML; `metrics` is a dict, `epoch` is an int."""
|
||||
if self.csv:
|
||||
keys, vals = list(metrics.keys()), list(metrics.values())
|
||||
n = len(metrics) + 1 # number of cols
|
||||
|
@ -400,7 +406,7 @@ class GenericLogger:
|
|||
self.clearml.log_scalars(metrics, epoch)
|
||||
|
||||
def log_images(self, files, name="Images", epoch=0):
|
||||
# Log images to all loggers
|
||||
"""Logs images to all loggers with optional naming and epoch specification."""
|
||||
files = [Path(f) for f in (files if isinstance(files, (tuple, list)) else [files])] # to Path
|
||||
files = [f for f in files if f.exists()] # filter by exists
|
||||
|
||||
|
@ -418,11 +424,12 @@ class GenericLogger:
|
|||
self.clearml.log_debug_samples(files, title=name)
|
||||
|
||||
def log_graph(self, model, imgsz=(640, 640)):
|
||||
# Log model graph to all loggers
|
||||
"""Logs model graph to all configured loggers with specified input image size."""
|
||||
if self.tb:
|
||||
log_tensorboard_graph(self.tb, model, imgsz)
|
||||
|
||||
def log_model(self, model_path, epoch=0, metadata=None):
|
||||
"""Logs the model to all configured loggers with optional epoch and metadata."""
|
||||
if metadata is None:
|
||||
metadata = {}
|
||||
# Log model to all loggers
|
||||
|
@ -434,7 +441,7 @@ class GenericLogger:
|
|||
self.clearml.log_model(model_path=model_path, model_name=model_path.stem)
|
||||
|
||||
def update_params(self, params):
|
||||
# Update the parameters logged
|
||||
"""Updates logged parameters in WandB and/or ClearML if enabled."""
|
||||
if self.wandb:
|
||||
wandb.run.config.update(params, allow_val_change=True)
|
||||
if self.clearml:
|
||||
|
@ -442,7 +449,7 @@ class GenericLogger:
|
|||
|
||||
|
||||
def log_tensorboard_graph(tb, model, imgsz=(640, 640)):
|
||||
# Log model graph to TensorBoard
|
||||
"""Logs the model graph to TensorBoard with specified image size and model."""
|
||||
try:
|
||||
p = next(model.parameters()) # for device, type
|
||||
imgsz = (imgsz, imgsz) if isinstance(imgsz, int) else imgsz # expand
|
||||
|
@ -455,7 +462,7 @@ def log_tensorboard_graph(tb, model, imgsz=(640, 640)):
|
|||
|
||||
|
||||
def web_project_name(project):
|
||||
# Convert local project name to web project name
|
||||
"""Converts a local project name to a standardized web project name with optional suffixes."""
|
||||
if not project.startswith("runs/train"):
|
||||
return project
|
||||
suffix = "-Classify" if project.endswith("-cls") else "-Segment" if project.endswith("-seg") else ""
|
||||
|
|
|
@ -165,6 +165,7 @@ class CometLogger:
|
|||
self.experiment.log_other("optimizer_parameters", json.dumps(self.hyp))
|
||||
|
||||
def _get_experiment(self, mode, experiment_id=None):
|
||||
"""Returns a new or existing Comet.ml experiment based on mode and optional experiment_id."""
|
||||
if mode == "offline":
|
||||
return (
|
||||
comet_ml.ExistingOfflineExperiment(
|
||||
|
@ -197,21 +198,27 @@ class CometLogger:
|
|||
return
|
||||
|
||||
def log_metrics(self, log_dict, **kwargs):
|
||||
"""Logs metrics to the current experiment, accepting a dictionary of metric names and values."""
|
||||
self.experiment.log_metrics(log_dict, **kwargs)
|
||||
|
||||
def log_parameters(self, log_dict, **kwargs):
|
||||
"""Logs parameters to the current experiment, accepting a dictionary of parameter names and values."""
|
||||
self.experiment.log_parameters(log_dict, **kwargs)
|
||||
|
||||
def log_asset(self, asset_path, **kwargs):
|
||||
"""Logs a file or directory as an asset to the current experiment."""
|
||||
self.experiment.log_asset(asset_path, **kwargs)
|
||||
|
||||
def log_asset_data(self, asset, **kwargs):
|
||||
"""Logs in-memory data as an asset to the current experiment, with optional kwargs."""
|
||||
self.experiment.log_asset_data(asset, **kwargs)
|
||||
|
||||
def log_image(self, img, **kwargs):
|
||||
"""Logs an image to the current experiment with optional kwargs."""
|
||||
self.experiment.log_image(img, **kwargs)
|
||||
|
||||
def log_model(self, path, opt, epoch, fitness_score, best_model=False):
|
||||
"""Logs model checkpoint to experiment with path, options, epoch, fitness, and best model flag."""
|
||||
if not self.save_model:
|
||||
return
|
||||
|
||||
|
@ -235,6 +242,7 @@ class CometLogger:
|
|||
)
|
||||
|
||||
def check_dataset(self, data_file):
|
||||
"""Validates the dataset configuration by loading the YAML file specified in `data_file`."""
|
||||
with open(data_file) as f:
|
||||
data_config = yaml.safe_load(f)
|
||||
|
||||
|
@ -247,6 +255,7 @@ class CometLogger:
|
|||
return check_dataset(data_file)
|
||||
|
||||
def log_predictions(self, image, labelsn, path, shape, predn):
|
||||
"""Logs predictions with IOU filtering, given image, labels, path, shape, and predictions."""
|
||||
if self.logged_images_count >= self.max_images:
|
||||
return
|
||||
detections = predn[predn[:, 4] > self.conf_thres]
|
||||
|
@ -287,6 +296,7 @@ class CometLogger:
|
|||
return
|
||||
|
||||
def preprocess_prediction(self, image, labels, shape, pred):
|
||||
"""Processes prediction data, resizing labels and adding dataset metadata."""
|
||||
nl, _ = labels.shape[0], pred.shape[0]
|
||||
|
||||
# Predictions
|
||||
|
@ -306,6 +316,7 @@ class CometLogger:
|
|||
return predn, labelsn
|
||||
|
||||
def add_assets_to_artifact(self, artifact, path, asset_path, split):
|
||||
"""Adds image and label assets to a wandb artifact given dataset split and paths."""
|
||||
img_paths = sorted(glob.glob(f"{asset_path}/*"))
|
||||
label_paths = img2label_paths(img_paths)
|
||||
|
||||
|
@ -331,6 +342,7 @@ class CometLogger:
|
|||
return artifact
|
||||
|
||||
def upload_dataset_artifact(self):
|
||||
"""Uploads a YOLOv5 dataset as an artifact to the Comet.ml platform."""
|
||||
dataset_name = self.data_dict.get("dataset_name", "yolov5-dataset")
|
||||
path = str((ROOT / Path(self.data_dict["path"])).resolve())
|
||||
|
||||
|
@ -355,6 +367,7 @@ class CometLogger:
|
|||
return
|
||||
|
||||
def download_dataset_artifact(self, artifact_path):
|
||||
"""Downloads a dataset artifact to a specified directory using the experiment's logged artifact."""
|
||||
logged_artifact = self.experiment.get_artifact(artifact_path)
|
||||
artifact_save_dir = str(Path(self.opt.save_dir) / logged_artifact.name)
|
||||
logged_artifact.download(artifact_save_dir)
|
||||
|
@ -374,6 +387,7 @@ class CometLogger:
|
|||
return self.update_data_paths(data_dict)
|
||||
|
||||
def update_data_paths(self, data_dict):
|
||||
"""Updates data paths in the dataset dictionary, defaulting 'path' to an empty string if not present."""
|
||||
path = data_dict.get("path", "")
|
||||
|
||||
for split in ["train", "val", "test"]:
|
||||
|
@ -386,6 +400,7 @@ class CometLogger:
|
|||
return data_dict
|
||||
|
||||
def on_pretrain_routine_end(self, paths):
|
||||
"""Called at the end of pretraining routine to handle paths if training is not being resumed."""
|
||||
if self.opt.resume:
|
||||
return
|
||||
|
||||
|
@ -398,20 +413,25 @@ class CometLogger:
|
|||
return
|
||||
|
||||
def on_train_start(self):
|
||||
"""Logs hyperparameters at the start of training."""
|
||||
self.log_parameters(self.hyp)
|
||||
|
||||
def on_train_epoch_start(self):
|
||||
"""Called at the start of each training epoch."""
|
||||
return
|
||||
|
||||
def on_train_epoch_end(self, epoch):
|
||||
"""Updates the current epoch in the experiment tracking at the end of each epoch."""
|
||||
self.experiment.curr_epoch = epoch
|
||||
|
||||
return
|
||||
|
||||
def on_train_batch_start(self):
|
||||
"""Called at the start of each training batch."""
|
||||
return
|
||||
|
||||
def on_train_batch_end(self, log_dict, step):
|
||||
"""Callback function that updates and logs metrics at the end of each training batch if conditions are met."""
|
||||
self.experiment.curr_step = step
|
||||
if self.log_batch_metrics and (step % self.comet_log_batch_interval == 0):
|
||||
self.log_metrics(log_dict, step=step)
|
||||
|
@ -419,6 +439,7 @@ class CometLogger:
|
|||
return
|
||||
|
||||
def on_train_end(self, files, save_dir, last, best, epoch, results):
|
||||
"""Logs metadata and optionally saves model files at the end of training."""
|
||||
if self.comet_log_predictions:
|
||||
curr_epoch = self.experiment.curr_epoch
|
||||
self.experiment.log_asset_data(self.metadata_dict, "image-metadata.json", epoch=curr_epoch)
|
||||
|
@ -446,12 +467,15 @@ class CometLogger:
|
|||
self.finish_run()
|
||||
|
||||
def on_val_start(self):
|
||||
"""Called at the start of validation, currently a placeholder with no functionality."""
|
||||
return
|
||||
|
||||
def on_val_batch_start(self):
|
||||
"""Placeholder called at the start of a validation batch with no current functionality."""
|
||||
return
|
||||
|
||||
def on_val_batch_end(self, batch_i, images, targets, paths, shapes, outputs):
|
||||
"""Callback executed at the end of a validation batch, conditionally logs predictions to Comet ML."""
|
||||
if not (self.comet_log_predictions and ((batch_i + 1) % self.comet_log_prediction_interval == 0)):
|
||||
return
|
||||
|
||||
|
@ -470,6 +494,7 @@ class CometLogger:
|
|||
return
|
||||
|
||||
def on_val_end(self, nt, tp, fp, p, r, f1, ap, ap50, ap_class, confusion_matrix):
|
||||
"""Logs per-class metrics to Comet.ml after validation if enabled and more than one class exists."""
|
||||
if self.comet_log_per_class_metrics and self.num_classes > 1:
|
||||
for i, c in enumerate(ap_class):
|
||||
class_name = self.class_names[c]
|
||||
|
@ -504,14 +529,18 @@ class CometLogger:
|
|||
)
|
||||
|
||||
def on_fit_epoch_end(self, result, epoch):
|
||||
"""Logs metrics at the end of each training epoch."""
|
||||
self.log_metrics(result, epoch=epoch)
|
||||
|
||||
def on_model_save(self, last, epoch, final_epoch, best_fitness, fi):
|
||||
"""Callback to save model checkpoints periodically if conditions are met."""
|
||||
if ((epoch + 1) % self.opt.save_period == 0 and not final_epoch) and self.opt.save_period != -1:
|
||||
self.log_model(last.parent, self.opt, epoch, fi, best_model=best_fitness == fi)
|
||||
|
||||
def on_params_update(self, params):
|
||||
"""Logs updated parameters during training."""
|
||||
self.log_parameters(params)
|
||||
|
||||
def finish_run(self):
|
||||
"""Ends the current experiment and logs its completion."""
|
||||
self.experiment.end()
|
||||
|
|
|
@ -17,6 +17,7 @@ COMET_DEFAULT_CHECKPOINT_FILENAME = os.getenv("COMET_DEFAULT_CHECKPOINT_FILENAME
|
|||
|
||||
|
||||
def download_model_checkpoint(opt, experiment):
|
||||
"""Downloads YOLOv5 model checkpoint from Comet ML experiment, updating `opt.weights` with download path."""
|
||||
model_dir = f"{opt.project}/{experiment.name}"
|
||||
os.makedirs(model_dir, exist_ok=True)
|
||||
|
||||
|
|
|
@ -25,6 +25,9 @@ COMET_PROJECT_NAME = config.get_string(os.getenv("COMET_PROJECT_NAME"), "comet.p
|
|||
|
||||
|
||||
def get_args(known=False):
|
||||
"""Parses command-line arguments for YOLOv5 training, supporting configuration of weights, data paths,
|
||||
hyperparameters, and more.
|
||||
"""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--weights", type=str, default=ROOT / "yolov5s.pt", help="initial weights path")
|
||||
parser.add_argument("--cfg", type=str, default="", help="model.yaml path")
|
||||
|
@ -83,6 +86,7 @@ def get_args(known=False):
|
|||
|
||||
|
||||
def run(parameters, opt):
|
||||
"""Executes YOLOv5 training with given hyperparameters and options, setting up device and training directories."""
|
||||
hyp_dict = {k: v for k, v in parameters.items() if k not in ["epochs", "batch_size"]}
|
||||
|
||||
opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok or opt.evolve))
|
||||
|
|
|
@ -152,6 +152,7 @@ class WandbLogger:
|
|||
LOGGER.info(f"Saving model artifact on epoch {epoch + 1}")
|
||||
|
||||
def val_one_image(self, pred, predn, path, names, im):
|
||||
"""Evaluates model prediction for a single image, returning metrics and visualizations."""
|
||||
pass
|
||||
|
||||
def log(self, log_dict):
|
||||
|
|
|
@ -16,11 +16,17 @@ def smooth_BCE(eps=0.1): # https://github.com/ultralytics/yolov3/issues/238#iss
|
|||
class BCEBlurWithLogitsLoss(nn.Module):
|
||||
# BCEwithLogitLoss() with reduced missing label effects.
|
||||
def __init__(self, alpha=0.05):
|
||||
"""Initializes a modified BCEWithLogitsLoss with reduced missing label effects, taking optional alpha smoothing
|
||||
parameter.
|
||||
"""
|
||||
super().__init__()
|
||||
self.loss_fcn = nn.BCEWithLogitsLoss(reduction="none") # must be nn.BCEWithLogitsLoss()
|
||||
self.alpha = alpha
|
||||
|
||||
def forward(self, pred, true):
|
||||
"""Computes modified BCE loss for YOLOv5 with reduced missing label effects, taking pred and true tensors,
|
||||
returns mean loss.
|
||||
"""
|
||||
loss = self.loss_fcn(pred, true)
|
||||
pred = torch.sigmoid(pred) # prob from logits
|
||||
dx = pred - true # reduce only missing label effects
|
||||
|
@ -33,6 +39,9 @@ class BCEBlurWithLogitsLoss(nn.Module):
|
|||
class FocalLoss(nn.Module):
|
||||
# Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
|
||||
def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
|
||||
"""Initializes FocalLoss with specified loss function, gamma, and alpha values; modifies loss reduction to
|
||||
'none'.
|
||||
"""
|
||||
super().__init__()
|
||||
self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
|
||||
self.gamma = gamma
|
||||
|
@ -41,6 +50,7 @@ class FocalLoss(nn.Module):
|
|||
self.loss_fcn.reduction = "none" # required to apply FL to each element
|
||||
|
||||
def forward(self, pred, true):
|
||||
"""Calculates the focal loss between predicted and true labels using a modified BCEWithLogitsLoss."""
|
||||
loss = self.loss_fcn(pred, true)
|
||||
# p_t = torch.exp(-loss)
|
||||
# loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability
|
||||
|
@ -63,6 +73,7 @@ class FocalLoss(nn.Module):
|
|||
class QFocalLoss(nn.Module):
|
||||
# Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
|
||||
def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
|
||||
"""Initializes Quality Focal Loss with given loss function, gamma, alpha; modifies reduction to 'none'."""
|
||||
super().__init__()
|
||||
self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
|
||||
self.gamma = gamma
|
||||
|
@ -71,6 +82,9 @@ class QFocalLoss(nn.Module):
|
|||
self.loss_fcn.reduction = "none" # required to apply FL to each element
|
||||
|
||||
def forward(self, pred, true):
|
||||
"""Computes the focal loss between `pred` and `true` using BCEWithLogitsLoss, adjusting for imbalance with
|
||||
`gamma` and `alpha`.
|
||||
"""
|
||||
loss = self.loss_fcn(pred, true)
|
||||
|
||||
pred_prob = torch.sigmoid(pred) # prob from logits
|
||||
|
@ -91,6 +105,7 @@ class ComputeLoss:
|
|||
|
||||
# Compute losses
|
||||
def __init__(self, model, autobalance=False):
|
||||
"""Initializes ComputeLoss with model and autobalance option, autobalances losses if True."""
|
||||
device = next(model.parameters()).device # get model device
|
||||
h = model.hyp # hyperparameters
|
||||
|
||||
|
@ -173,7 +188,9 @@ class ComputeLoss:
|
|||
return (lbox + lobj + lcls) * bs, torch.cat((lbox, lobj, lcls)).detach()
|
||||
|
||||
def build_targets(self, p, targets):
|
||||
# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
|
||||
"""Prepares model targets from input targets (image,class,x,y,w,h) for loss computation, returning class, box,
|
||||
indices, and anchors.
|
||||
"""
|
||||
na, nt = self.na, targets.shape[0] # number of anchors, targets
|
||||
tcls, tbox, indices, anch = [], [], [], []
|
||||
gain = torch.ones(7, device=self.device) # normalized to gridspace gain
|
||||
|
|
|
@ -13,13 +13,13 @@ from utils import TryExcept, threaded
|
|||
|
||||
|
||||
def fitness(x):
|
||||
# Model fitness as a weighted combination of metrics
|
||||
"""Calculates fitness of a model using weighted sum of metrics P, R, mAP@0.5, mAP@0.5:0.95."""
|
||||
w = [0.0, 0.0, 0.1, 0.9] # weights for [P, R, mAP@0.5, mAP@0.5:0.95]
|
||||
return (x[:, :4] * w).sum(1)
|
||||
|
||||
|
||||
def smooth(y, f=0.05):
|
||||
# Box filter of fraction f
|
||||
"""Applies box filter smoothing to array `y` with fraction `f`, yielding a smoothed array."""
|
||||
nf = round(len(y) * f * 2) // 2 + 1 # number of filter elements (must be odd)
|
||||
p = np.ones(nf // 2) # ones padding
|
||||
yp = np.concatenate((p * y[0], y, p * y[-1]), 0) # y padded
|
||||
|
@ -126,6 +126,7 @@ def compute_ap(recall, precision):
|
|||
class ConfusionMatrix:
|
||||
# Updated version of https://github.com/kaanakan/object_detection_confusion_matrix
|
||||
def __init__(self, nc, conf=0.25, iou_thres=0.45):
|
||||
"""Initializes ConfusionMatrix with given number of classes, confidence, and IoU threshold."""
|
||||
self.matrix = np.zeros((nc + 1, nc + 1))
|
||||
self.nc = nc # number of classes
|
||||
self.conf = conf
|
||||
|
@ -179,6 +180,9 @@ class ConfusionMatrix:
|
|||
self.matrix[dc, self.nc] += 1 # predicted background
|
||||
|
||||
def tp_fp(self):
|
||||
"""Calculates true positives (tp) and false positives (fp) excluding the background class from the confusion
|
||||
matrix.
|
||||
"""
|
||||
tp = self.matrix.diagonal() # true positives
|
||||
fp = self.matrix.sum(1) - tp # false positives
|
||||
# fn = self.matrix.sum(0) - tp # false negatives (missed detections)
|
||||
|
@ -186,6 +190,7 @@ class ConfusionMatrix:
|
|||
|
||||
@TryExcept("WARNING ⚠️ ConfusionMatrix plot failure")
|
||||
def plot(self, normalize=True, save_dir="", names=()):
|
||||
"""Plots confusion matrix using seaborn, optional normalization; can save plot to specified directory."""
|
||||
import seaborn as sn
|
||||
|
||||
array = self.matrix / ((self.matrix.sum(0).reshape(1, -1) + 1e-9) if normalize else 1) # normalize columns
|
||||
|
@ -217,12 +222,17 @@ class ConfusionMatrix:
|
|||
plt.close(fig)
|
||||
|
||||
def print(self):
|
||||
"""Prints the confusion matrix row-wise, with each class and its predictions separated by spaces."""
|
||||
for i in range(self.nc + 1):
|
||||
print(" ".join(map(str, self.matrix[i])))
|
||||
|
||||
|
||||
def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7):
|
||||
# Returns Intersection over Union (IoU) of box1(1,4) to box2(n,4)
|
||||
"""
|
||||
Calculates IoU, GIoU, DIoU, or CIoU between two boxes, supporting xywh/xyxy formats.
|
||||
|
||||
Input shapes are box1(1,4) to box2(n,4).
|
||||
"""
|
||||
|
||||
# Get the coordinates of bounding boxes
|
||||
if xywh: # transform from xywh to xyxy
|
||||
|
@ -312,7 +322,9 @@ def bbox_ioa(box1, box2, eps=1e-7):
|
|||
|
||||
|
||||
def wh_iou(wh1, wh2, eps=1e-7):
|
||||
# Returns the nxm IoU matrix. wh1 is nx2, wh2 is mx2
|
||||
"""Calculates the Intersection over Union (IoU) for two sets of widths and heights; `wh1` and `wh2` should be nx2
|
||||
and mx2 tensors.
|
||||
"""
|
||||
wh1 = wh1[:, None] # [N,1,2]
|
||||
wh2 = wh2[None] # [1,M,2]
|
||||
inter = torch.min(wh1, wh2).prod(2) # [N,M]
|
||||
|
@ -324,7 +336,9 @@ def wh_iou(wh1, wh2, eps=1e-7):
|
|||
|
||||
@threaded
|
||||
def plot_pr_curve(px, py, ap, save_dir=Path("pr_curve.png"), names=()):
|
||||
# Precision-recall curve
|
||||
"""Plots precision-recall curve, optionally per class, saving to `save_dir`; `px`, `py` are lists, `ap` is Nx2
|
||||
array, `names` optional.
|
||||
"""
|
||||
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
|
||||
py = np.stack(py, axis=1)
|
||||
|
||||
|
@ -347,7 +361,7 @@ def plot_pr_curve(px, py, ap, save_dir=Path("pr_curve.png"), names=()):
|
|||
|
||||
@threaded
|
||||
def plot_mc_curve(px, py, save_dir=Path("mc_curve.png"), names=(), xlabel="Confidence", ylabel="Metric"):
|
||||
# Metric-confidence curve
|
||||
"""Plots a metric-confidence curve for model predictions, supporting per-class visualization and smoothing."""
|
||||
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
|
||||
|
||||
if 0 < len(names) < 21: # display per-class legend if < 21 classes
|
||||
|
|
|
@ -31,7 +31,11 @@ matplotlib.use("Agg") # for writing to files only
|
|||
class Colors:
|
||||
# Ultralytics color palette https://ultralytics.com/
|
||||
def __init__(self):
|
||||
# hex = matplotlib.colors.TABLEAU_COLORS.values()
|
||||
"""
|
||||
Initializes the Colors class with a palette derived from Ultralytics color scheme, converting hex codes to RGB.
|
||||
|
||||
Colors derived from `hex = matplotlib.colors.TABLEAU_COLORS.values()`.
|
||||
"""
|
||||
hexs = (
|
||||
"FF3838",
|
||||
"FF9D97",
|
||||
|
@ -58,6 +62,7 @@ class Colors:
|
|||
self.n = len(self.palette)
|
||||
|
||||
def __call__(self, i, bgr=False):
|
||||
"""Returns color from palette by index `i`, in BGR format if `bgr=True`, else RGB; `i` is an integer index."""
|
||||
c = self.palette[int(i) % self.n]
|
||||
return (c[2], c[1], c[0]) if bgr else c
|
||||
|
||||
|
@ -100,7 +105,11 @@ def feature_visualization(x, module_type, stage, n=32, save_dir=Path("runs/detec
|
|||
|
||||
|
||||
def hist2d(x, y, n=100):
|
||||
# 2d histogram used in labels.png and evolve.png
|
||||
"""
|
||||
Generates a logarithmic 2D histogram, useful for visualizing label or evolution distributions.
|
||||
|
||||
Used in used in labels.png and evolve.png.
|
||||
"""
|
||||
xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n)
|
||||
hist, xedges, yedges = np.histogram2d(x, y, (xedges, yedges))
|
||||
xidx = np.clip(np.digitize(x, xedges) - 1, 0, hist.shape[0] - 1)
|
||||
|
@ -109,6 +118,7 @@ def hist2d(x, y, n=100):
|
|||
|
||||
|
||||
def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
|
||||
"""Applies a low-pass Butterworth filter to `data` with specified `cutoff`, `fs`, and `order`."""
|
||||
from scipy.signal import butter, filtfilt
|
||||
|
||||
# https://stackoverflow.com/questions/28536191/how-to-filter-smooth-with-scipy-numpy
|
||||
|
@ -122,7 +132,9 @@ def butter_lowpass_filtfilt(data, cutoff=1500, fs=50000, order=5):
|
|||
|
||||
|
||||
def output_to_target(output, max_det=300):
|
||||
# Convert model output to target format [batch_id, class_id, x, y, w, h, conf] for plotting
|
||||
"""Converts YOLOv5 model output to [batch_id, class_id, x, y, w, h, conf] format for plotting, limiting detections
|
||||
to `max_det`.
|
||||
"""
|
||||
targets = []
|
||||
for i, o in enumerate(output):
|
||||
box, conf, cls = o[:max_det, :6].cpu().split((4, 1, 1), 1)
|
||||
|
@ -133,7 +145,7 @@ def output_to_target(output, max_det=300):
|
|||
|
||||
@threaded
|
||||
def plot_images(images, targets, paths=None, fname="images.jpg", names=None):
|
||||
# Plot image grid with labels
|
||||
"""Plots an image grid with labels from YOLOv5 predictions or targets, saving to `fname`."""
|
||||
if isinstance(images, torch.Tensor):
|
||||
images = images.cpu().float().numpy()
|
||||
if isinstance(targets, torch.Tensor):
|
||||
|
@ -197,7 +209,7 @@ def plot_images(images, targets, paths=None, fname="images.jpg", names=None):
|
|||
|
||||
|
||||
def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=""):
|
||||
# Plot LR simulating training for full epochs
|
||||
"""Plots learning rate schedule for given optimizer and scheduler, saving plot to `save_dir`."""
|
||||
optimizer, scheduler = copy(optimizer), copy(scheduler) # do not modify originals
|
||||
y = []
|
||||
for _ in range(epochs):
|
||||
|
@ -295,7 +307,7 @@ def plot_val_study(file="", dir="", x=None): # from utils.plots import *; plot_
|
|||
|
||||
@TryExcept() # known issue https://github.com/ultralytics/yolov5/issues/5395
|
||||
def plot_labels(labels, names=(), save_dir=Path("")):
|
||||
# plot dataset labels
|
||||
"""Plots dataset labels, saving correlogram and label images, handles classes, and visualizes bounding boxes."""
|
||||
LOGGER.info(f"Plotting labels to {save_dir / 'labels.jpg'}... ")
|
||||
c, b = labels[:, 0], labels[:, 1:].transpose() # classes, boxes
|
||||
nc = int(c.max() + 1) # number of classes
|
||||
|
@ -340,7 +352,7 @@ def plot_labels(labels, names=(), save_dir=Path("")):
|
|||
|
||||
|
||||
def imshow_cls(im, labels=None, pred=None, names=None, nmax=25, verbose=False, f=Path("images.jpg")):
|
||||
# Show classification image grid with labels (optional) and predictions (optional)
|
||||
"""Displays a grid of images with optional labels and predictions, saving to a file."""
|
||||
from utils.augmentations import denormalize
|
||||
|
||||
names = names or [f"class{i}" for i in range(1000)]
|
||||
|
@ -397,7 +409,11 @@ def plot_evolve(evolve_csv="path/to/evolve.csv"): # from utils.plots import *;
|
|||
|
||||
|
||||
def plot_results(file="path/to/results.csv", dir=""):
|
||||
# Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
|
||||
"""
|
||||
Plots training results from a 'results.csv' file; accepts file path and directory as arguments.
|
||||
|
||||
Example: from utils.plots import *; plot_results('path/to/results.csv')
|
||||
"""
|
||||
save_dir = Path(file).parent if file else Path(dir)
|
||||
fig, ax = plt.subplots(2, 5, figsize=(12, 6), tight_layout=True)
|
||||
ax = ax.ravel()
|
||||
|
@ -424,7 +440,11 @@ def plot_results(file="path/to/results.csv", dir=""):
|
|||
|
||||
|
||||
def profile_idetection(start=0, stop=0, labels=(), save_dir=""):
|
||||
# Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection()
|
||||
"""
|
||||
Plots per-image iDetection logs, comparing metrics like storage and performance over time.
|
||||
|
||||
Example: from utils.plots import *; profile_idetection()
|
||||
"""
|
||||
ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel()
|
||||
s = ["Images", "Free Storage (GB)", "RAM Usage (GB)", "Battery", "dt_raw (ms)", "dt_smooth (ms)", "real-world FPS"]
|
||||
files = list(Path(save_dir).glob("frames*.txt"))
|
||||
|
@ -455,7 +475,9 @@ def profile_idetection(start=0, stop=0, labels=(), save_dir=""):
|
|||
|
||||
|
||||
def save_one_box(xyxy, im, file=Path("im.jpg"), gain=1.02, pad=10, square=False, BGR=False, save=True):
|
||||
# Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop
|
||||
"""Crops and saves an image from bounding box `xyxy`, applied with `gain` and `pad`, optionally squares and adjusts
|
||||
for BGR.
|
||||
"""
|
||||
xyxy = torch.tensor(xyxy).view(-1, 4)
|
||||
b = xyxy2xywh(xyxy) # boxes
|
||||
if square:
|
||||
|
|
|
@ -12,7 +12,11 @@ from ..general import resample_segments, segment2box
|
|||
|
||||
|
||||
def mixup(im, labels, segments, im2, labels2, segments2):
|
||||
# Applies MixUp augmentation https://arxiv.org/pdf/1710.09412.pdf
|
||||
"""
|
||||
Applies MixUp augmentation blending two images, labels, and segments with a random ratio.
|
||||
|
||||
See https://arxiv.org/pdf/1710.09412.pdf
|
||||
"""
|
||||
r = np.random.beta(32.0, 32.0) # mixup ratio, alpha=beta=32.0
|
||||
im = (im * r + im2 * (1 - r)).astype(np.uint8)
|
||||
labels = np.concatenate((labels, labels2), 0)
|
||||
|
|
|
@ -123,6 +123,7 @@ class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing
|
|||
self.overlap = overlap
|
||||
|
||||
def __getitem__(self, index):
|
||||
"""Returns a transformed item from the dataset at the specified index, handling indexing and image weighting."""
|
||||
index = self.indices[index] # linear, shuffled, or image_weights
|
||||
|
||||
hyp = self.hyp
|
||||
|
@ -230,7 +231,7 @@ class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing
|
|||
return (torch.from_numpy(img), labels_out, self.im_files[index], shapes, masks)
|
||||
|
||||
def load_mosaic(self, index):
|
||||
# YOLOv5 4-mosaic loader. Loads 1 image + 3 random images into a 4-image mosaic
|
||||
"""Loads 1 image + 3 random images into a 4-image YOLOv5 mosaic, adjusting labels and segments accordingly."""
|
||||
labels4, segments4 = [], []
|
||||
s = self.img_size
|
||||
yc, xc = (int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border) # mosaic center x, y
|
||||
|
@ -291,6 +292,7 @@ class LoadImagesAndLabelsAndMasks(LoadImagesAndLabels): # for training/testing
|
|||
|
||||
@staticmethod
|
||||
def collate_fn(batch):
|
||||
"""Custom collation function for DataLoader, batches images, labels, paths, shapes, and segmentation masks."""
|
||||
img, label, path, shapes, masks = zip(*batch) # transposed
|
||||
batched_masks = torch.cat(masks, 0)
|
||||
for i, l in enumerate(label):
|
||||
|
|
|
@ -144,7 +144,9 @@ def masks_iou(mask1, mask2, eps=1e-7):
|
|||
|
||||
|
||||
def masks2segments(masks, strategy="largest"):
|
||||
# Convert masks(n,160,160) into segments(n,xy)
|
||||
"""Converts binary (n,160,160) masks to polygon segments with options for concatenation or selecting the largest
|
||||
segment.
|
||||
"""
|
||||
segments = []
|
||||
for x in masks.int().cpu().numpy().astype("uint8"):
|
||||
c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]
|
||||
|
|
|
@ -12,6 +12,9 @@ from .general import crop_mask
|
|||
class ComputeLoss:
|
||||
# Compute losses
|
||||
def __init__(self, model, autobalance=False, overlap=False):
|
||||
"""Initializes the compute loss function for YOLOv5 models with options for autobalancing and overlap
|
||||
handling.
|
||||
"""
|
||||
self.sort_obj_iou = False
|
||||
self.overlap = overlap
|
||||
device = next(model.parameters()).device # get model device
|
||||
|
@ -109,13 +112,15 @@ class ComputeLoss:
|
|||
return loss * bs, torch.cat((lbox, lseg, lobj, lcls)).detach()
|
||||
|
||||
def single_mask_loss(self, gt_mask, pred, proto, xyxy, area):
|
||||
# Mask loss for one image
|
||||
"""Calculates and normalizes single mask loss for YOLOv5 between predicted and ground truth masks."""
|
||||
pred_mask = (pred @ proto.view(self.nm, -1)).view(-1, *proto.shape[1:]) # (n,32) @ (32,80,80) -> (n,80,80)
|
||||
loss = F.binary_cross_entropy_with_logits(pred_mask, gt_mask, reduction="none")
|
||||
return (crop_mask(loss, xyxy).mean(dim=(1, 2)) / area).mean()
|
||||
|
||||
def build_targets(self, p, targets):
|
||||
# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
|
||||
"""Prepares YOLOv5 targets for loss computation; inputs targets (image, class, x, y, w, h), output target
|
||||
classes/boxes.
|
||||
"""
|
||||
na, nt = self.na, targets.shape[0] # number of anchors, targets
|
||||
tcls, tbox, indices, anch, tidxs, xywhn = [], [], [], [], [], []
|
||||
gain = torch.ones(8, device=self.device) # normalized to gridspace gain
|
||||
|
|
|
@ -7,7 +7,7 @@ from ..metrics import ap_per_class
|
|||
|
||||
|
||||
def fitness(x):
|
||||
# Model fitness as a weighted combination of metrics
|
||||
"""Evaluates model fitness by a weighted sum of 8 metrics, `x`: [N,8] array, weights: [0.1, 0.9] for mAP and F1."""
|
||||
w = [0.0, 0.0, 0.1, 0.9, 0.0, 0.0, 0.1, 0.9]
|
||||
return (x[:, :8] * w).sum(1)
|
||||
|
||||
|
@ -128,6 +128,7 @@ class Metric:
|
|||
return (self.p[i], self.r[i], self.ap50[i], self.ap[i])
|
||||
|
||||
def get_maps(self, nc):
|
||||
"""Calculates and returns mean Average Precision (mAP) for each class given number of classes `nc`."""
|
||||
maps = np.zeros(nc) + self.map
|
||||
for i, c in enumerate(self.ap_class_index):
|
||||
maps[c] = self.ap[i]
|
||||
|
@ -162,17 +163,22 @@ class Metrics:
|
|||
self.metric_mask.update(list(results["masks"].values()))
|
||||
|
||||
def mean_results(self):
|
||||
"""Computes and returns the mean results for both box and mask metrics by summing their individual means."""
|
||||
return self.metric_box.mean_results() + self.metric_mask.mean_results()
|
||||
|
||||
def class_result(self, i):
|
||||
"""Returns the sum of box and mask metric results for a specified class index `i`."""
|
||||
return self.metric_box.class_result(i) + self.metric_mask.class_result(i)
|
||||
|
||||
def get_maps(self, nc):
|
||||
"""Calculates and returns the sum of mean average precisions (mAPs) for both box and mask metrics for `nc`
|
||||
classes.
|
||||
"""
|
||||
return self.metric_box.get_maps(nc) + self.metric_mask.get_maps(nc)
|
||||
|
||||
@property
|
||||
def ap_class_index(self):
|
||||
# boxes and masks have the same ap_class_index
|
||||
"""Returns the class index for average precision, shared by both box and mask metrics."""
|
||||
return self.metric_box.ap_class_index
|
||||
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ from ..plots import Annotator, colors
|
|||
|
||||
@threaded
|
||||
def plot_images_and_masks(images, targets, masks, paths=None, fname="images.jpg", names=None):
|
||||
# Plot image grid with labels
|
||||
"""Plots a grid of images, their labels, and masks with optional resizing and annotations, saving to fname."""
|
||||
if isinstance(images, torch.Tensor):
|
||||
images = images.cpu().float().numpy()
|
||||
if isinstance(targets, torch.Tensor):
|
||||
|
@ -111,7 +111,11 @@ def plot_images_and_masks(images, targets, masks, paths=None, fname="images.jpg"
|
|||
|
||||
|
||||
def plot_results_with_masks(file="path/to/results.csv", dir="", best=True):
|
||||
# Plot training results.csv. Usage: from utils.plots import *; plot_results('path/to/results.csv')
|
||||
"""
|
||||
Plots training results from CSV files, plotting best or last result highlights based on `best` parameter.
|
||||
|
||||
Example: from utils.plots import *; plot_results('path/to/results.csv')
|
||||
"""
|
||||
save_dir = Path(file).parent if file else Path(dir)
|
||||
fig, ax = plt.subplots(2, 8, figsize=(18, 6), tight_layout=True)
|
||||
ax = ax.ravel()
|
||||
|
|
|
@ -34,7 +34,8 @@ warnings.filterwarnings("ignore", category=UserWarning)
|
|||
|
||||
|
||||
def smart_inference_mode(torch_1_9=check_version(torch.__version__, "1.9.0")):
|
||||
# Applies torch.inference_mode() decorator if torch>=1.9.0 else torch.no_grad() decorator
|
||||
"""Applies torch.inference_mode() if torch>=1.9.0, else torch.no_grad() as a decorator for functions."""
|
||||
|
||||
def decorate(fn):
|
||||
return (torch.inference_mode if torch_1_9 else torch.no_grad)()(fn)
|
||||
|
||||
|
@ -42,7 +43,9 @@ def smart_inference_mode(torch_1_9=check_version(torch.__version__, "1.9.0")):
|
|||
|
||||
|
||||
def smartCrossEntropyLoss(label_smoothing=0.0):
|
||||
# Returns nn.CrossEntropyLoss with label smoothing enabled for torch>=1.10.0
|
||||
"""Returns a CrossEntropyLoss with optional label smoothing for torch>=1.10.0; warns if smoothing on lower
|
||||
versions.
|
||||
"""
|
||||
if check_version(torch.__version__, "1.10.0"):
|
||||
return nn.CrossEntropyLoss(label_smoothing=label_smoothing)
|
||||
if label_smoothing > 0:
|
||||
|
@ -51,7 +54,7 @@ def smartCrossEntropyLoss(label_smoothing=0.0):
|
|||
|
||||
|
||||
def smart_DDP(model):
|
||||
# Model DDP creation with checks
|
||||
"""Initializes DistributedDataParallel (DDP) for model training, respecting torch version constraints."""
|
||||
assert not check_version(torch.__version__, "1.12.0", pinned=True), (
|
||||
"torch==1.12.0 torchvision==0.13.0 DDP training is not supported due to a known issue. "
|
||||
"Please upgrade or downgrade torch to use DDP. See https://github.com/ultralytics/yolov5/issues/8395"
|
||||
|
@ -63,7 +66,7 @@ def smart_DDP(model):
|
|||
|
||||
|
||||
def reshape_classifier_output(model, n=1000):
|
||||
# Update a TorchVision classification model to class count 'n' if required
|
||||
"""Reshapes last layer of model to match class count 'n', supporting Classify, Linear, Sequential types."""
|
||||
from models.common import Classify
|
||||
|
||||
name, m = list((model.model if hasattr(model, "model") else model).named_children())[-1] # last module
|
||||
|
@ -87,7 +90,9 @@ def reshape_classifier_output(model, n=1000):
|
|||
|
||||
@contextmanager
|
||||
def torch_distributed_zero_first(local_rank: int):
|
||||
# Decorator to make all processes in distributed training wait for each local_master to do something
|
||||
"""Context manager ensuring ordered operations in distributed training by making all processes wait for the leading
|
||||
process.
|
||||
"""
|
||||
if local_rank not in [-1, 0]:
|
||||
dist.barrier(device_ids=[local_rank])
|
||||
yield
|
||||
|
@ -96,7 +101,7 @@ def torch_distributed_zero_first(local_rank: int):
|
|||
|
||||
|
||||
def device_count():
|
||||
# Returns number of CUDA devices available. Safe version of torch.cuda.device_count(). Supports Linux and Windows
|
||||
"""Returns the number of available CUDA devices; works on Linux and Windows by invoking `nvidia-smi`."""
|
||||
assert platform.system() in ("Linux", "Windows"), "device_count() only supported on Linux or Windows"
|
||||
try:
|
||||
cmd = "nvidia-smi -L | wc -l" if platform.system() == "Linux" else 'nvidia-smi -L | find /c /v ""' # Windows
|
||||
|
@ -106,7 +111,7 @@ def device_count():
|
|||
|
||||
|
||||
def select_device(device="", batch_size=0, newline=True):
|
||||
# device = None or 'cpu' or 0 or '0' or '0,1,2,3'
|
||||
"""Selects computing device (CPU, CUDA GPU, MPS) for YOLOv5 model deployment, logging device info."""
|
||||
s = f"YOLOv5 🚀 {git_describe() or file_date()} Python-{platform.python_version()} torch-{torch.__version__} "
|
||||
device = str(device).strip().lower().replace("cuda:", "").replace("none", "") # to string, 'cuda:0' to '0'
|
||||
cpu = device == "cpu"
|
||||
|
@ -143,7 +148,7 @@ def select_device(device="", batch_size=0, newline=True):
|
|||
|
||||
|
||||
def time_sync():
|
||||
# PyTorch-accurate time
|
||||
"""Synchronizes PyTorch for accurate timing, leveraging CUDA if available, and returns the current time."""
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.synchronize()
|
||||
return time.time()
|
||||
|
@ -203,16 +208,19 @@ def profile(input, ops, n=10, device=None):
|
|||
|
||||
|
||||
def is_parallel(model):
|
||||
# Returns True if model is of type DP or DDP
|
||||
"""Checks if the model is using Data Parallelism (DP) or Distributed Data Parallelism (DDP)."""
|
||||
return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
|
||||
|
||||
|
||||
def de_parallel(model):
|
||||
# De-parallelize a model: returns single-GPU model if model is of type DP or DDP
|
||||
"""Returns a single-GPU model by removing Data Parallelism (DP) or Distributed Data Parallelism (DDP) if applied."""
|
||||
return model.module if is_parallel(model) else model
|
||||
|
||||
|
||||
def initialize_weights(model):
|
||||
"""Initializes weights of Conv2d, BatchNorm2d, and activations (Hardswish, LeakyReLU, ReLU, ReLU6, SiLU) in the
|
||||
model.
|
||||
"""
|
||||
for m in model.modules():
|
||||
t = type(m)
|
||||
if t is nn.Conv2d:
|
||||
|
@ -225,12 +233,14 @@ def initialize_weights(model):
|
|||
|
||||
|
||||
def find_modules(model, mclass=nn.Conv2d):
|
||||
# Finds layer indices matching module class 'mclass'
|
||||
"""Finds and returns list of layer indices in `model.module_list` matching the specified `mclass`."""
|
||||
return [i for i, m in enumerate(model.module_list) if isinstance(m, mclass)]
|
||||
|
||||
|
||||
def sparsity(model):
|
||||
# Return global model sparsity
|
||||
"""Calculates and returns the global sparsity of a model as the ratio of zero-valued parameters to total
|
||||
parameters.
|
||||
"""
|
||||
a, b = 0, 0
|
||||
for p in model.parameters():
|
||||
a += p.numel()
|
||||
|
@ -239,7 +249,7 @@ def sparsity(model):
|
|||
|
||||
|
||||
def prune(model, amount=0.3):
|
||||
# Prune model to requested global sparsity
|
||||
"""Prunes Conv2d layers in a model to a specified sparsity using L1 unstructured pruning."""
|
||||
import torch.nn.utils.prune as prune
|
||||
|
||||
for name, m in model.named_modules():
|
||||
|
@ -250,7 +260,11 @@ def prune(model, amount=0.3):
|
|||
|
||||
|
||||
def fuse_conv_and_bn(conv, bn):
|
||||
# Fuse Conv2d() and BatchNorm2d() layers https://tehnokv.com/posts/fusing-batchnorm-and-conv/
|
||||
"""
|
||||
Fuses Conv2d and BatchNorm2d layers into a single Conv2d layer.
|
||||
|
||||
See https://tehnokv.com/posts/fusing-batchnorm-and-conv/.
|
||||
"""
|
||||
fusedconv = (
|
||||
nn.Conv2d(
|
||||
conv.in_channels,
|
||||
|
@ -280,7 +294,11 @@ def fuse_conv_and_bn(conv, bn):
|
|||
|
||||
|
||||
def model_info(model, verbose=False, imgsz=640):
|
||||
# Model information. img_size may be int or list, i.e. img_size=640 or img_size=[640, 320]
|
||||
"""
|
||||
Prints model summary including layers, parameters, gradients, and FLOPs; imgsz may be int or list.
|
||||
|
||||
Example: img_size=640 or img_size=[640, 320]
|
||||
"""
|
||||
n_p = sum(x.numel() for x in model.parameters()) # number parameters
|
||||
n_g = sum(x.numel() for x in model.parameters() if x.requires_grad) # number gradients
|
||||
if verbose:
|
||||
|
@ -319,7 +337,7 @@ def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416)
|
|||
|
||||
|
||||
def copy_attr(a, b, include=(), exclude=()):
|
||||
# Copy attributes from b to a, options to only include [...] and to exclude [...]
|
||||
"""Copies attributes from object b to a, optionally filtering with include and exclude lists."""
|
||||
for k, v in b.__dict__.items():
|
||||
if (len(include) and k not in include) or k.startswith("_") or k in exclude:
|
||||
continue
|
||||
|
@ -328,7 +346,11 @@ def copy_attr(a, b, include=(), exclude=()):
|
|||
|
||||
|
||||
def smart_optimizer(model, name="Adam", lr=0.001, momentum=0.9, decay=1e-5):
|
||||
# YOLOv5 3-param group optimizer: 0) weights with decay, 1) weights no decay, 2) biases no decay
|
||||
"""
|
||||
Initializes YOLOv5 smart optimizer with 3 parameter groups for different decay configurations.
|
||||
|
||||
Groups are 0) weights with decay, 1) weights no decay, 2) biases no decay.
|
||||
"""
|
||||
g = [], [], [] # optimizer parameter groups
|
||||
bn = tuple(v for k, v in nn.__dict__.items() if "Norm" in k) # normalization layers, i.e. BatchNorm2d()
|
||||
for v in model.modules():
|
||||
|
@ -361,7 +383,7 @@ def smart_optimizer(model, name="Adam", lr=0.001, momentum=0.9, decay=1e-5):
|
|||
|
||||
|
||||
def smart_hub_load(repo="ultralytics/yolov5", model="yolov5s", **kwargs):
|
||||
# YOLOv5 torch.hub.load() wrapper with smart error/issue handling
|
||||
"""YOLOv5 torch.hub.load() wrapper with smart error handling, adjusting torch arguments for compatibility."""
|
||||
if check_version(torch.__version__, "1.9.1"):
|
||||
kwargs["skip_validation"] = True # validation causes GitHub API rate limit errors
|
||||
if check_version(torch.__version__, "1.12.0"):
|
||||
|
@ -373,7 +395,7 @@ def smart_hub_load(repo="ultralytics/yolov5", model="yolov5s", **kwargs):
|
|||
|
||||
|
||||
def smart_resume(ckpt, optimizer, ema=None, weights="yolov5s.pt", epochs=300, resume=True):
|
||||
# Resume training from a partially trained checkpoint
|
||||
"""Resumes training from a checkpoint, updating optimizer, ema, and epochs, with optional resume verification."""
|
||||
best_fitness = 0.0
|
||||
start_epoch = ckpt["epoch"] + 1
|
||||
if ckpt["optimizer"] is not None:
|
||||
|
@ -397,12 +419,14 @@ def smart_resume(ckpt, optimizer, ema=None, weights="yolov5s.pt", epochs=300, re
|
|||
class EarlyStopping:
|
||||
# YOLOv5 simple early stopper
|
||||
def __init__(self, patience=30):
|
||||
"""Initializes simple early stopping mechanism for YOLOv5, with adjustable patience for non-improving epochs."""
|
||||
self.best_fitness = 0.0 # i.e. mAP
|
||||
self.best_epoch = 0
|
||||
self.patience = patience or float("inf") # epochs to wait after fitness stops improving to stop
|
||||
self.possible_stop = False # possible stop may occur next epoch
|
||||
|
||||
def __call__(self, epoch, fitness):
|
||||
"""Evaluates if training should stop based on fitness improvement and patience, returning a boolean."""
|
||||
if fitness >= self.best_fitness: # >= 0 to allow for early zero-fitness stage of training
|
||||
self.best_epoch = epoch
|
||||
self.best_fitness = fitness
|
||||
|
@ -426,7 +450,9 @@ class ModelEMA:
|
|||
"""
|
||||
|
||||
def __init__(self, model, decay=0.9999, tau=2000, updates=0):
|
||||
# Create EMA
|
||||
"""Initializes EMA with model parameters, decay rate, tau for decay adjustment, and update count; sets model to
|
||||
evaluation mode.
|
||||
"""
|
||||
self.ema = deepcopy(de_parallel(model)).eval() # FP32 EMA
|
||||
self.updates = updates # number of EMA updates
|
||||
self.decay = lambda x: decay * (1 - math.exp(-x / tau)) # decay exponential ramp (to help early epochs)
|
||||
|
@ -434,7 +460,7 @@ class ModelEMA:
|
|||
p.requires_grad_(False)
|
||||
|
||||
def update(self, model):
|
||||
# Update EMA parameters
|
||||
"""Updates the Exponential Moving Average (EMA) parameters based on the current model's parameters."""
|
||||
self.updates += 1
|
||||
d = self.decay(self.updates)
|
||||
|
||||
|
@ -446,5 +472,7 @@ class ModelEMA:
|
|||
# assert v.dtype == msd[k].dtype == torch.float32, f'{k}: EMA {v.dtype} and model {msd[k].dtype} must be FP32'
|
||||
|
||||
def update_attr(self, model, include=(), exclude=("process_group", "reducer")):
|
||||
# Update EMA attributes
|
||||
"""Updates EMA attributes by copying specified attributes from model to EMA, excluding certain attributes by
|
||||
default.
|
||||
"""
|
||||
copy_attr(self.ema, model, include, exclude)
|
||||
|
|
|
@ -71,6 +71,7 @@ class TritonRemoteModel:
|
|||
return result[0] if len(result) == 1 else result
|
||||
|
||||
def _create_inputs(self, *args, **kwargs):
|
||||
"""Creates input tensors from args or kwargs, not both; raises error if none or both are provided."""
|
||||
args_len, kwargs_len = len(args), len(kwargs)
|
||||
if not args_len and not kwargs_len:
|
||||
raise RuntimeError("No inputs provided.")
|
||||
|
|
12
val.py
12
val.py
|
@ -62,7 +62,7 @@ from utils.torch_utils import select_device, smart_inference_mode
|
|||
|
||||
|
||||
def save_one_txt(predn, save_conf, shape, file):
|
||||
# Save one txt result
|
||||
"""Saves one detection result to a txt file in normalized xywh format, optionally including confidence."""
|
||||
gn = torch.tensor(shape)[[1, 0, 1, 0]] # normalization gain whwh
|
||||
for *xyxy, conf, cls in predn.tolist():
|
||||
xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh
|
||||
|
@ -72,7 +72,11 @@ def save_one_txt(predn, save_conf, shape, file):
|
|||
|
||||
|
||||
def save_one_json(predn, jdict, path, class_map):
|
||||
# Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
|
||||
"""
|
||||
Saves one JSON detection result with image ID, category ID, bounding box, and score.
|
||||
|
||||
Example: {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
|
||||
"""
|
||||
image_id = int(path.stem) if path.stem.isnumeric() else path.stem
|
||||
box = xyxy2xywh(predn[:, :4]) # xywh
|
||||
box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner
|
||||
|
@ -359,6 +363,7 @@ def run(
|
|||
|
||||
|
||||
def parse_opt():
|
||||
"""Parses command-line options for YOLOv5 model inference configuration."""
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--data", type=str, default=ROOT / "data/coco128.yaml", help="dataset.yaml path")
|
||||
parser.add_argument("--weights", nargs="+", type=str, default=ROOT / "yolov5s.pt", help="model path(s)")
|
||||
|
@ -391,6 +396,9 @@ def parse_opt():
|
|||
|
||||
|
||||
def main(opt):
|
||||
"""Executes YOLOv5 tasks like training, validation, testing, speed, and study benchmarks based on provided
|
||||
options.
|
||||
"""
|
||||
check_requirements(ROOT / "requirements.txt", exclude=("tensorboard", "thop"))
|
||||
|
||||
if opt.task in ("train", "val", "test"): # run normally
|
||||
|
|
Loading…
Reference in New Issue