mirror of
https://github.com/ultralytics/yolov5.git
synced 2025-06-03 14:49:29 +08:00
* Update LICENSE to AGPL-3.0 This pull request updates the license of the YOLOv5 project from GNU General Public License v3.0 (GPL-3.0) to GNU Affero General Public License v3.0 (AGPL-3.0). We at Ultralytics have decided to make this change in order to better protect our intellectual property and ensure that any modifications made to the YOLOv5 source code will be shared back with the community when used over a network. AGPL-3.0 is very similar to GPL-3.0, but with an additional clause to address the use of software over a network. This change ensures that if someone modifies YOLOv5 and provides it as a service over a network (e.g., through a web application or API), they must also make the source code of their modified version available to users of the service. This update includes the following changes: - Replace the `LICENSE` file with the AGPL-3.0 license text - Update the license reference in the `README.md` file - Update the license headers in source code files We believe that this change will promote a more collaborative environment and help drive further innovation within the YOLOv5 community. Please review the changes and let us know if you have any questions or concerns. Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> * Update headers to AGPL-3.0 --------- Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
334 lines
16 KiB
Python
334 lines
16 KiB
Python
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
|
|
"""
|
|
Train a YOLOv5 classifier model on a classification dataset
|
|
|
|
Usage - Single-GPU training:
|
|
$ python classify/train.py --model yolov5s-cls.pt --data imagenette160 --epochs 5 --img 224
|
|
|
|
Usage - Multi-GPU DDP training:
|
|
$ python -m torch.distributed.run --nproc_per_node 4 --master_port 2022 classify/train.py --model yolov5s-cls.pt --data imagenet --epochs 5 --img 224 --device 0,1,2,3
|
|
|
|
Datasets: --data mnist, fashion-mnist, cifar10, cifar100, imagenette, imagewoof, imagenet, or 'path/to/data'
|
|
YOLOv5-cls models: --model yolov5n-cls.pt, yolov5s-cls.pt, yolov5m-cls.pt, yolov5l-cls.pt, yolov5x-cls.pt
|
|
Torchvision models: --model resnet50, efficientnet_b0, etc. See https://pytorch.org/vision/stable/models.html
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
import subprocess
|
|
import sys
|
|
import time
|
|
from copy import deepcopy
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
import torch
|
|
import torch.distributed as dist
|
|
import torch.hub as hub
|
|
import torch.optim.lr_scheduler as lr_scheduler
|
|
import torchvision
|
|
from torch.cuda import amp
|
|
from tqdm import tqdm
|
|
|
|
FILE = Path(__file__).resolve()
|
|
ROOT = FILE.parents[1] # YOLOv5 root directory
|
|
if str(ROOT) not in sys.path:
|
|
sys.path.append(str(ROOT)) # add ROOT to PATH
|
|
ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative
|
|
|
|
from classify import val as validate
|
|
from models.experimental import attempt_load
|
|
from models.yolo import ClassificationModel, DetectionModel
|
|
from utils.dataloaders import create_classification_dataloader
|
|
from utils.general import (DATASETS_DIR, LOGGER, TQDM_BAR_FORMAT, WorkingDirectory, check_git_info, check_git_status,
|
|
check_requirements, colorstr, download, increment_path, init_seeds, print_args, yaml_save)
|
|
from utils.loggers import GenericLogger
|
|
from utils.plots import imshow_cls
|
|
from utils.torch_utils import (ModelEMA, de_parallel, model_info, reshape_classifier_output, select_device, smart_DDP,
|
|
smart_optimizer, smartCrossEntropyLoss, torch_distributed_zero_first)
|
|
|
|
LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html
|
|
RANK = int(os.getenv('RANK', -1))
|
|
WORLD_SIZE = int(os.getenv('WORLD_SIZE', 1))
|
|
GIT_INFO = check_git_info()
|
|
|
|
|
|
def train(opt, device):
|
|
init_seeds(opt.seed + 1 + RANK, deterministic=True)
|
|
save_dir, data, bs, epochs, nw, imgsz, pretrained = \
|
|
opt.save_dir, Path(opt.data), opt.batch_size, opt.epochs, min(os.cpu_count() - 1, opt.workers), \
|
|
opt.imgsz, str(opt.pretrained).lower() == 'true'
|
|
cuda = device.type != 'cpu'
|
|
|
|
# Directories
|
|
wdir = save_dir / 'weights'
|
|
wdir.mkdir(parents=True, exist_ok=True) # make dir
|
|
last, best = wdir / 'last.pt', wdir / 'best.pt'
|
|
|
|
# Save run settings
|
|
yaml_save(save_dir / 'opt.yaml', vars(opt))
|
|
|
|
# Logger
|
|
logger = GenericLogger(opt=opt, console_logger=LOGGER) if RANK in {-1, 0} else None
|
|
|
|
# Download Dataset
|
|
with torch_distributed_zero_first(LOCAL_RANK), WorkingDirectory(ROOT):
|
|
data_dir = data if data.is_dir() else (DATASETS_DIR / data)
|
|
if not data_dir.is_dir():
|
|
LOGGER.info(f'\nDataset not found ⚠️, missing path {data_dir}, attempting download...')
|
|
t = time.time()
|
|
if str(data) == 'imagenet':
|
|
subprocess.run(['bash', str(ROOT / 'data/scripts/get_imagenet.sh')], shell=True, check=True)
|
|
else:
|
|
url = f'https://github.com/ultralytics/yolov5/releases/download/v1.0/{data}.zip'
|
|
download(url, dir=data_dir.parent)
|
|
s = f"Dataset download success ✅ ({time.time() - t:.1f}s), saved to {colorstr('bold', data_dir)}\n"
|
|
LOGGER.info(s)
|
|
|
|
# Dataloaders
|
|
nc = len([x for x in (data_dir / 'train').glob('*') if x.is_dir()]) # number of classes
|
|
trainloader = create_classification_dataloader(path=data_dir / 'train',
|
|
imgsz=imgsz,
|
|
batch_size=bs // WORLD_SIZE,
|
|
augment=True,
|
|
cache=opt.cache,
|
|
rank=LOCAL_RANK,
|
|
workers=nw)
|
|
|
|
test_dir = data_dir / 'test' if (data_dir / 'test').exists() else data_dir / 'val' # data/test or data/val
|
|
if RANK in {-1, 0}:
|
|
testloader = create_classification_dataloader(path=test_dir,
|
|
imgsz=imgsz,
|
|
batch_size=bs // WORLD_SIZE * 2,
|
|
augment=False,
|
|
cache=opt.cache,
|
|
rank=-1,
|
|
workers=nw)
|
|
|
|
# Model
|
|
with torch_distributed_zero_first(LOCAL_RANK), WorkingDirectory(ROOT):
|
|
if Path(opt.model).is_file() or opt.model.endswith('.pt'):
|
|
model = attempt_load(opt.model, device='cpu', fuse=False)
|
|
elif opt.model in torchvision.models.__dict__: # TorchVision models i.e. resnet50, efficientnet_b0
|
|
model = torchvision.models.__dict__[opt.model](weights='IMAGENET1K_V1' if pretrained else None)
|
|
else:
|
|
m = hub.list('ultralytics/yolov5') # + hub.list('pytorch/vision') # models
|
|
raise ModuleNotFoundError(f'--model {opt.model} not found. Available models are: \n' + '\n'.join(m))
|
|
if isinstance(model, DetectionModel):
|
|
LOGGER.warning("WARNING ⚠️ pass YOLOv5 classifier model with '-cls' suffix, i.e. '--model yolov5s-cls.pt'")
|
|
model = ClassificationModel(model=model, nc=nc, cutoff=opt.cutoff or 10) # convert to classification model
|
|
reshape_classifier_output(model, nc) # update class count
|
|
for m in model.modules():
|
|
if not pretrained and hasattr(m, 'reset_parameters'):
|
|
m.reset_parameters()
|
|
if isinstance(m, torch.nn.Dropout) and opt.dropout is not None:
|
|
m.p = opt.dropout # set dropout
|
|
for p in model.parameters():
|
|
p.requires_grad = True # for training
|
|
model = model.to(device)
|
|
|
|
# Info
|
|
if RANK in {-1, 0}:
|
|
model.names = trainloader.dataset.classes # attach class names
|
|
model.transforms = testloader.dataset.torch_transforms # attach inference transforms
|
|
model_info(model)
|
|
if opt.verbose:
|
|
LOGGER.info(model)
|
|
images, labels = next(iter(trainloader))
|
|
file = imshow_cls(images[:25], labels[:25], names=model.names, f=save_dir / 'train_images.jpg')
|
|
logger.log_images(file, name='Train Examples')
|
|
logger.log_graph(model, imgsz) # log model
|
|
|
|
# Optimizer
|
|
optimizer = smart_optimizer(model, opt.optimizer, opt.lr0, momentum=0.9, decay=opt.decay)
|
|
|
|
# Scheduler
|
|
lrf = 0.01 # final lr (fraction of lr0)
|
|
# lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - lrf) + lrf # cosine
|
|
lf = lambda x: (1 - x / epochs) * (1 - lrf) + lrf # linear
|
|
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
|
|
# scheduler = lr_scheduler.OneCycleLR(optimizer, max_lr=lr0, total_steps=epochs, pct_start=0.1,
|
|
# final_div_factor=1 / 25 / lrf)
|
|
|
|
# EMA
|
|
ema = ModelEMA(model) if RANK in {-1, 0} else None
|
|
|
|
# DDP mode
|
|
if cuda and RANK != -1:
|
|
model = smart_DDP(model)
|
|
|
|
# Train
|
|
t0 = time.time()
|
|
criterion = smartCrossEntropyLoss(label_smoothing=opt.label_smoothing) # loss function
|
|
best_fitness = 0.0
|
|
scaler = amp.GradScaler(enabled=cuda)
|
|
val = test_dir.stem # 'val' or 'test'
|
|
LOGGER.info(f'Image sizes {imgsz} train, {imgsz} test\n'
|
|
f'Using {nw * WORLD_SIZE} dataloader workers\n'
|
|
f"Logging results to {colorstr('bold', save_dir)}\n"
|
|
f'Starting {opt.model} training on {data} dataset with {nc} classes for {epochs} epochs...\n\n'
|
|
f"{'Epoch':>10}{'GPU_mem':>10}{'train_loss':>12}{f'{val}_loss':>12}{'top1_acc':>12}{'top5_acc':>12}")
|
|
for epoch in range(epochs): # loop over the dataset multiple times
|
|
tloss, vloss, fitness = 0.0, 0.0, 0.0 # train loss, val loss, fitness
|
|
model.train()
|
|
if RANK != -1:
|
|
trainloader.sampler.set_epoch(epoch)
|
|
pbar = enumerate(trainloader)
|
|
if RANK in {-1, 0}:
|
|
pbar = tqdm(enumerate(trainloader), total=len(trainloader), bar_format=TQDM_BAR_FORMAT)
|
|
for i, (images, labels) in pbar: # progress bar
|
|
images, labels = images.to(device, non_blocking=True), labels.to(device)
|
|
|
|
# Forward
|
|
with amp.autocast(enabled=cuda): # stability issues when enabled
|
|
loss = criterion(model(images), labels)
|
|
|
|
# Backward
|
|
scaler.scale(loss).backward()
|
|
|
|
# Optimize
|
|
scaler.unscale_(optimizer) # unscale gradients
|
|
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0) # clip gradients
|
|
scaler.step(optimizer)
|
|
scaler.update()
|
|
optimizer.zero_grad()
|
|
if ema:
|
|
ema.update(model)
|
|
|
|
if RANK in {-1, 0}:
|
|
# Print
|
|
tloss = (tloss * i + loss.item()) / (i + 1) # update mean losses
|
|
mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB)
|
|
pbar.desc = f"{f'{epoch + 1}/{epochs}':>10}{mem:>10}{tloss:>12.3g}" + ' ' * 36
|
|
|
|
# Test
|
|
if i == len(pbar) - 1: # last batch
|
|
top1, top5, vloss = validate.run(model=ema.ema,
|
|
dataloader=testloader,
|
|
criterion=criterion,
|
|
pbar=pbar) # test accuracy, loss
|
|
fitness = top1 # define fitness as top1 accuracy
|
|
|
|
# Scheduler
|
|
scheduler.step()
|
|
|
|
# Log metrics
|
|
if RANK in {-1, 0}:
|
|
# Best fitness
|
|
if fitness > best_fitness:
|
|
best_fitness = fitness
|
|
|
|
# Log
|
|
metrics = {
|
|
'train/loss': tloss,
|
|
f'{val}/loss': vloss,
|
|
'metrics/accuracy_top1': top1,
|
|
'metrics/accuracy_top5': top5,
|
|
'lr/0': optimizer.param_groups[0]['lr']} # learning rate
|
|
logger.log_metrics(metrics, epoch)
|
|
|
|
# Save model
|
|
final_epoch = epoch + 1 == epochs
|
|
if (not opt.nosave) or final_epoch:
|
|
ckpt = {
|
|
'epoch': epoch,
|
|
'best_fitness': best_fitness,
|
|
'model': deepcopy(ema.ema).half(), # deepcopy(de_parallel(model)).half(),
|
|
'ema': None, # deepcopy(ema.ema).half(),
|
|
'updates': ema.updates,
|
|
'optimizer': None, # optimizer.state_dict(),
|
|
'opt': vars(opt),
|
|
'git': GIT_INFO, # {remote, branch, commit} if a git repo
|
|
'date': datetime.now().isoformat()}
|
|
|
|
# Save last, best and delete
|
|
torch.save(ckpt, last)
|
|
if best_fitness == fitness:
|
|
torch.save(ckpt, best)
|
|
del ckpt
|
|
|
|
# Train complete
|
|
if RANK in {-1, 0} and final_epoch:
|
|
LOGGER.info(f'\nTraining complete ({(time.time() - t0) / 3600:.3f} hours)'
|
|
f"\nResults saved to {colorstr('bold', save_dir)}"
|
|
f'\nPredict: python classify/predict.py --weights {best} --source im.jpg'
|
|
f'\nValidate: python classify/val.py --weights {best} --data {data_dir}'
|
|
f'\nExport: python export.py --weights {best} --include onnx'
|
|
f"\nPyTorch Hub: model = torch.hub.load('ultralytics/yolov5', 'custom', '{best}')"
|
|
f'\nVisualize: https://netron.app\n')
|
|
|
|
# Plot examples
|
|
images, labels = (x[:25] for x in next(iter(testloader))) # first 25 images and labels
|
|
pred = torch.max(ema.ema(images.to(device)), 1)[1]
|
|
file = imshow_cls(images, labels, pred, de_parallel(model).names, verbose=False, f=save_dir / 'test_images.jpg')
|
|
|
|
# Log results
|
|
meta = {'epochs': epochs, 'top1_acc': best_fitness, 'date': datetime.now().isoformat()}
|
|
logger.log_images(file, name='Test Examples (true-predicted)', epoch=epoch)
|
|
logger.log_model(best, epochs, metadata=meta)
|
|
|
|
|
|
def parse_opt(known=False):
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('--model', type=str, default='yolov5s-cls.pt', help='initial weights path')
|
|
parser.add_argument('--data', type=str, default='imagenette160', help='cifar10, cifar100, mnist, imagenet, ...')
|
|
parser.add_argument('--epochs', type=int, default=10, help='total training epochs')
|
|
parser.add_argument('--batch-size', type=int, default=64, help='total batch size for all GPUs')
|
|
parser.add_argument('--imgsz', '--img', '--img-size', type=int, default=224, help='train, val image size (pixels)')
|
|
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
|
|
parser.add_argument('--cache', type=str, nargs='?', const='ram', help='--cache images in "ram" (default) or "disk"')
|
|
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
|
|
parser.add_argument('--workers', type=int, default=8, help='max dataloader workers (per RANK in DDP mode)')
|
|
parser.add_argument('--project', default=ROOT / 'runs/train-cls', help='save to project/name')
|
|
parser.add_argument('--name', default='exp', help='save to project/name')
|
|
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
|
|
parser.add_argument('--pretrained', nargs='?', const=True, default=True, help='start from i.e. --pretrained False')
|
|
parser.add_argument('--optimizer', choices=['SGD', 'Adam', 'AdamW', 'RMSProp'], default='Adam', help='optimizer')
|
|
parser.add_argument('--lr0', type=float, default=0.001, help='initial learning rate')
|
|
parser.add_argument('--decay', type=float, default=5e-5, help='weight decay')
|
|
parser.add_argument('--label-smoothing', type=float, default=0.1, help='Label smoothing epsilon')
|
|
parser.add_argument('--cutoff', type=int, default=None, help='Model layer cutoff index for Classify() head')
|
|
parser.add_argument('--dropout', type=float, default=None, help='Dropout (fraction)')
|
|
parser.add_argument('--verbose', action='store_true', help='Verbose mode')
|
|
parser.add_argument('--seed', type=int, default=0, help='Global training seed')
|
|
parser.add_argument('--local_rank', type=int, default=-1, help='Automatic DDP Multi-GPU argument, do not modify')
|
|
return parser.parse_known_args()[0] if known else parser.parse_args()
|
|
|
|
|
|
def main(opt):
|
|
# Checks
|
|
if RANK in {-1, 0}:
|
|
print_args(vars(opt))
|
|
check_git_status()
|
|
check_requirements()
|
|
|
|
# DDP mode
|
|
device = select_device(opt.device, batch_size=opt.batch_size)
|
|
if LOCAL_RANK != -1:
|
|
assert opt.batch_size != -1, 'AutoBatch is coming soon for classification, please pass a valid --batch-size'
|
|
assert opt.batch_size % WORLD_SIZE == 0, f'--batch-size {opt.batch_size} must be multiple of WORLD_SIZE'
|
|
assert torch.cuda.device_count() > LOCAL_RANK, 'insufficient CUDA devices for DDP command'
|
|
torch.cuda.set_device(LOCAL_RANK)
|
|
device = torch.device('cuda', LOCAL_RANK)
|
|
dist.init_process_group(backend='nccl' if dist.is_nccl_available() else 'gloo')
|
|
|
|
# Parameters
|
|
opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok) # increment run
|
|
|
|
# Train
|
|
train(opt, device)
|
|
|
|
|
|
def run(**kwargs):
|
|
# Usage: from yolov5 import classify; classify.train.run(data=mnist, imgsz=320, model='yolov5m')
|
|
opt = parse_opt(True)
|
|
for k, v in kwargs.items():
|
|
setattr(opt, k, v)
|
|
main(opt)
|
|
return opt
|
|
|
|
|
|
if __name__ == '__main__':
|
|
opt = parse_opt()
|
|
main(opt)
|