pull/13244/head
UltralyticsAssistant 2024-08-05 22:09:00 +00:00
parent f20e9969d4
commit 4736f445d7
3 changed files with 72 additions and 227 deletions

View File

@ -224,7 +224,6 @@ def train(opt, device):
for i, (images, labels) in pbar: # progress bar
images, labels = images.to(device, non_blocking=True), labels.to(device)
amp_autocast = None
if check_version(torch.__version__, "2.4.0"):
amp_autocast = torch.amp.autocast("cuda", enabled=device.type != "cpu")

274
train.py
View File

@ -95,9 +95,7 @@ from utils.torch_utils import (
torch_distributed_zero_first,
)
LOCAL_RANK = int(
os.getenv("LOCAL_RANK", -1)
) # https://pytorch.org/docs/stable/elastic/run.html
LOCAL_RANK = int(os.getenv("LOCAL_RANK", -1)) # https://pytorch.org/docs/stable/elastic/run.html
RANK = int(os.getenv("RANK", -1))
WORLD_SIZE = int(os.getenv("WORLD_SIZE", 1))
GIT_INFO = check_git_info()
@ -177,9 +175,7 @@ def train(hyp, opt, device, callbacks):
if isinstance(hyp, str):
with open(hyp, errors="ignore") as f:
hyp = yaml.safe_load(f) # load hyps dict
LOGGER.info(
colorstr("hyperparameters: ") + ", ".join(f"{k}={v}" for k, v in hyp.items())
)
LOGGER.info(colorstr("hyperparameters: ") + ", ".join(f"{k}={v}" for k, v in hyp.items()))
opt.hyp = hyp.copy() # for saving hyps to checkpoints
# Save run settings
@ -227,14 +223,8 @@ def train(hyp, opt, device, callbacks):
data_dict = data_dict or check_dataset(data) # check if None
train_path, val_path = data_dict["train"], data_dict["val"]
nc = 1 if single_cls else int(data_dict["nc"]) # number of classes
names = (
{0: "item"}
if single_cls and len(data_dict["names"]) != 1
else data_dict["names"]
) # class names
is_coco = isinstance(val_path, str) and val_path.endswith(
"coco/val2017.txt"
) # COCO dataset
names = {0: "item"} if single_cls and len(data_dict["names"]) != 1 else data_dict["names"] # class names
is_coco = isinstance(val_path, str) and val_path.endswith("coco/val2017.txt") # COCO dataset
# Model
check_suffix(weights, ".pt") # check weights
@ -242,31 +232,19 @@ def train(hyp, opt, device, callbacks):
if pretrained:
with torch_distributed_zero_first(LOCAL_RANK):
weights = attempt_download(weights) # download if not found locally
ckpt = torch.load(
weights, map_location="cpu"
) # load checkpoint to CPU to avoid CUDA memory leak
model = Model(
cfg or ckpt["model"].yaml, ch=3, nc=nc, anchors=hyp.get("anchors")
).to(
device
) # create
exclude = (
["anchor"] if (cfg or hyp.get("anchors")) and not resume else []
) # exclude keys
ckpt = torch.load(weights, map_location="cpu") # load checkpoint to CPU to avoid CUDA memory leak
model = Model(cfg or ckpt["model"].yaml, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device) # create
exclude = ["anchor"] if (cfg or hyp.get("anchors")) and not resume else [] # exclude keys
csd = ckpt["model"].float().state_dict() # checkpoint state_dict as FP32
csd = intersect_dicts(csd, model.state_dict(), exclude=exclude) # intersect
model.load_state_dict(csd, strict=False) # load
LOGGER.info(
f"Transferred {len(csd)}/{len(model.state_dict())} items from {weights}"
) # report
LOGGER.info(f"Transferred {len(csd)}/{len(model.state_dict())} items from {weights}") # report
else:
model = Model(cfg, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device) # create
amp = check_amp(model) # check AMP
# Freeze
freeze = [
f"model.{x}." for x in (freeze if len(freeze) > 1 else range(freeze[0]))
] # layers to freeze
freeze = [f"model.{x}." for x in (freeze if len(freeze) > 1 else range(freeze[0]))] # layers to freeze
for k, v in model.named_parameters():
v.requires_grad = True # train all layers
# v.register_hook(lambda x: torch.nan_to_num(x)) # NaN to 0 (commented for erratic training results)
@ -287,9 +265,7 @@ def train(hyp, opt, device, callbacks):
nbs = 64 # nominal batch size
accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing
hyp["weight_decay"] *= batch_size * accumulate / nbs # scale weight_decay
optimizer = smart_optimizer(
model, opt.optimizer, hyp["lr0"], hyp["momentum"], hyp["weight_decay"]
)
optimizer = smart_optimizer(model, opt.optimizer, hyp["lr0"], hyp["momentum"], hyp["weight_decay"])
# Scheduler
if opt.cos_lr:
@ -300,9 +276,7 @@ def train(hyp, opt, device, callbacks):
"""Linear learning rate scheduler function with decay calculated by epoch proportion."""
return (1 - x / epochs) * (1.0 - hyp["lrf"]) + hyp["lrf"] # linear
scheduler = lr_scheduler.LambdaLR(
optimizer, lr_lambda=lf
) # plot_lr_scheduler(optimizer, scheduler, epochs)
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs)
# EMA
ema = ModelEMA(model) if RANK in {-1, 0} else None
@ -311,9 +285,7 @@ def train(hyp, opt, device, callbacks):
best_fitness, start_epoch = 0.0, 0
if pretrained:
if resume:
best_fitness, start_epoch, epochs = smart_resume(
ckpt, optimizer, ema, weights, epochs, resume
)
best_fitness, start_epoch, epochs = smart_resume(ckpt, optimizer, ema, weights, epochs, resume)
del ckpt, csd
# DP mode
@ -350,9 +322,7 @@ def train(hyp, opt, device, callbacks):
)
labels = np.concatenate(dataset.labels, 0)
mlc = int(labels[:, 0].max()) # max label class
assert (
mlc < nc
), f"Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}"
assert mlc < nc, f"Label class {mlc} exceeds nc={nc} in {data}. Possible class labels are 0-{nc - 1}"
# Process 0
if RANK in {-1, 0}:
@ -373,9 +343,7 @@ def train(hyp, opt, device, callbacks):
if not resume:
if not opt.noautoanchor:
check_anchors(
dataset, model=model, thr=hyp["anchor_t"], imgsz=imgsz
) # run AutoAnchor
check_anchors(dataset, model=model, thr=hyp["anchor_t"], imgsz=imgsz) # run AutoAnchor
model.half().float() # pre-reduce anchor precision
callbacks.run("on_pretrain_routine_end", labels, names)
@ -392,17 +360,13 @@ def train(hyp, opt, device, callbacks):
hyp["label_smoothing"] = opt.label_smoothing
model.nc = nc # attach number of classes to model
model.hyp = hyp # attach hyperparameters to model
model.class_weights = (
labels_to_class_weights(dataset.labels, nc).to(device) * nc
) # attach class weights
model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights
model.names = names
# Start training
t0 = time.time()
nb = len(train_loader) # number of batches
nw = max(
round(hyp["warmup_epochs"] * nb), 100
) # number of warmup iterations, max(3 epochs, 100 iterations)
nw = max(round(hyp["warmup_epochs"] * nb), 100) # number of warmup iterations, max(3 epochs, 100 iterations)
# nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training
last_opt_step = -1
maps = np.zeros(nc) # mAP per class
@ -424,23 +388,15 @@ def train(hyp, opt, device, callbacks):
f"Logging results to {colorstr('bold', save_dir)}\n"
f"Starting training for {epochs} epochs..."
)
for epoch in range(
start_epoch, epochs
): # epoch ------------------------------------------------------------------
for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
callbacks.run("on_train_epoch_start")
model.train()
# Update image weights (optional, single-GPU only)
if opt.image_weights:
cw = (
model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc
) # class weights
iw = labels_to_image_weights(
dataset.labels, nc=nc, class_weights=cw
) # image weights
dataset.indices = random.choices(
range(dataset.n), weights=iw, k=dataset.n
) # rand weighted idx
cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights
iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights
dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx
# Update mosaic border (optional)
# b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
@ -470,14 +426,10 @@ def train(hyp, opt, device, callbacks):
targets,
paths,
_,
) in (
pbar
): # batch -------------------------------------------------------------
) in pbar: # batch -------------------------------------------------------------
callbacks.run("on_train_batch_start")
ni = i + nb * epoch # number integrated batches (since train start)
imgs = (
imgs.to(device, non_blocking=True).float() / 255
) # uint8 to float32, 0-255 to 0.0-1.0
imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0
# Warmup
if ni <= nw:
@ -495,23 +447,15 @@ def train(hyp, opt, device, callbacks):
],
)
if "momentum" in x:
x["momentum"] = np.interp(
ni, xi, [hyp["warmup_momentum"], hyp["momentum"]]
)
x["momentum"] = np.interp(ni, xi, [hyp["warmup_momentum"], hyp["momentum"]])
# Multi-scale
if opt.multi_scale:
sz = (
random.randrange(int(imgsz * 0.5), int(imgsz * 1.5) + gs) // gs * gs
) # size
sz = random.randrange(int(imgsz * 0.5), int(imgsz * 1.5) + gs) // gs * gs # size
sf = sz / max(imgs.shape[2:]) # scale factor
if sf != 1:
ns = [
math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]
] # new shape (stretched to gs-multiple)
imgs = nn.functional.interpolate(
imgs, size=ns, mode="bilinear", align_corners=False
)
ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple)
imgs = nn.functional.interpolate(imgs, size=ns, mode="bilinear", align_corners=False)
amp_autocast = None
if check_version(torch.__version__, "2.4.0"):
@ -522,9 +466,7 @@ def train(hyp, opt, device, callbacks):
# Forward
with amp_autocast:
pred = model(imgs) # forward
loss, loss_items = compute_loss(
pred, targets.to(device)
) # loss scaled by batch_size
loss, loss_items = compute_loss(pred, targets.to(device)) # loss scaled by batch_size
if RANK != -1:
loss *= WORLD_SIZE # gradient averaged between devices in DDP mode
if opt.quad:
@ -536,9 +478,7 @@ def train(hyp, opt, device, callbacks):
# Optimize - https://pytorch.org/docs/master/notes/amp_examples.html
if ni - last_opt_step >= accumulate:
scaler.unscale_(optimizer) # unscale gradients
torch.nn.utils.clip_grad_norm_(
model.parameters(), max_norm=10.0
) # clip gradients
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=10.0) # clip gradients
scaler.step(optimizer) # optimizer.step
scaler.update()
optimizer.zero_grad()
@ -560,9 +500,7 @@ def train(hyp, opt, device, callbacks):
imgs.shape[-1],
)
)
callbacks.run(
"on_train_batch_end", model, ni, imgs, targets, paths, list(mloss)
)
callbacks.run("on_train_batch_end", model, ni, imgs, targets, paths, list(mloss))
if callbacks.stop_training:
return
# end batch ------------------------------------------------------------------------------------------------
@ -574,9 +512,7 @@ def train(hyp, opt, device, callbacks):
if RANK in {-1, 0}:
# mAP
callbacks.run("on_train_epoch_end", epoch=epoch)
ema.update_attr(
model, include=["yaml", "nc", "hyp", "names", "stride", "class_weights"]
)
ema.update_attr(model, include=["yaml", "nc", "hyp", "names", "stride", "class_weights"])
final_epoch = (epoch + 1 == epochs) or stopper.possible_stop
if not noval or final_epoch: # Calculate mAP
results, maps, _ = validate.run(
@ -594,9 +530,7 @@ def train(hyp, opt, device, callbacks):
)
# Update best mAP
fi = fitness(
np.array(results).reshape(1, -1)
) # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
stop = stopper(epoch=epoch, fitness=fi) # early stop check
if fi > best_fitness:
best_fitness = fi
@ -624,16 +558,12 @@ def train(hyp, opt, device, callbacks):
if opt.save_period > 0 and epoch % opt.save_period == 0:
torch.save(ckpt, w / f"epoch{epoch}.pt")
del ckpt
callbacks.run(
"on_model_save", last, epoch, final_epoch, best_fitness, fi
)
callbacks.run("on_model_save", last, epoch, final_epoch, best_fitness, fi)
# EarlyStopping
if RANK != -1: # if DDP training
broadcast_list = [stop if RANK == 0 else None]
dist.broadcast_object_list(
broadcast_list, 0
) # broadcast 'stop' to all ranks
dist.broadcast_object_list(broadcast_list, 0) # broadcast 'stop' to all ranks
if RANK != 0:
stop = broadcast_list[0]
if stop:
@ -642,9 +572,7 @@ def train(hyp, opt, device, callbacks):
# end epoch ----------------------------------------------------------------------------------------------------
# end training -----------------------------------------------------------------------------------------------------
if RANK in {-1, 0}:
LOGGER.info(
f"\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours."
)
LOGGER.info(f"\n{epoch - start_epoch + 1} epochs completed in {(time.time() - t0) / 3600:.3f} hours.")
for f in last, best:
if f.exists():
strip_optimizer(f) # strip optimizers
@ -655,9 +583,7 @@ def train(hyp, opt, device, callbacks):
batch_size=batch_size // WORLD_SIZE * 2,
imgsz=imgsz,
model=attempt_load(f, device).half(),
iou_thres=(
0.65 if is_coco else 0.60
), # best pycocotools at iou 0.65
iou_thres=(0.65 if is_coco else 0.60), # best pycocotools at iou 0.65
single_cls=single_cls,
dataloader=val_loader,
save_dir=save_dir,
@ -705,13 +631,9 @@ def parse_opt(known=False):
- Tutorial: https://docs.ultralytics.com/yolov5/tutorials/train_custom_data
"""
parser = argparse.ArgumentParser()
parser.add_argument(
"--weights", type=str, default=ROOT / "yolov5s.pt", help="initial weights path"
)
parser.add_argument("--weights", type=str, default=ROOT / "yolov5s.pt", help="initial weights path")
parser.add_argument("--cfg", type=str, default="", help="model.yaml path")
parser.add_argument(
"--data", type=str, default=ROOT / "data/coco128.yaml", help="dataset.yaml path"
)
parser.add_argument("--data", type=str, default=ROOT / "data/coco128.yaml", help="dataset.yaml path")
parser.add_argument(
"--hyp",
type=str,
@ -741,15 +663,9 @@ def parse_opt(known=False):
default=False,
help="resume most recent training",
)
parser.add_argument(
"--nosave", action="store_true", help="only save final checkpoint"
)
parser.add_argument(
"--noval", action="store_true", help="only validate final epoch"
)
parser.add_argument(
"--noautoanchor", action="store_true", help="disable AutoAnchor"
)
parser.add_argument("--nosave", action="store_true", help="only save final checkpoint")
parser.add_argument("--noval", action="store_true", help="only validate final epoch")
parser.add_argument("--noautoanchor", action="store_true", help="disable AutoAnchor")
parser.add_argument("--noplots", action="store_true", help="save no plot files")
parser.add_argument(
"--evolve",
@ -771,20 +687,14 @@ def parse_opt(known=False):
help="resume evolve from last generation",
)
parser.add_argument("--bucket", type=str, default="", help="gsutil bucket")
parser.add_argument(
"--cache", type=str, nargs="?", const="ram", help="image --cache ram/disk"
)
parser.add_argument("--cache", type=str, nargs="?", const="ram", help="image --cache ram/disk")
parser.add_argument(
"--image-weights",
action="store_true",
help="use weighted image selection for training",
)
parser.add_argument(
"--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu"
)
parser.add_argument(
"--multi-scale", action="store_true", help="vary img-size +/- 50%%"
)
parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
parser.add_argument("--multi-scale", action="store_true", help="vary img-size +/- 50%%")
parser.add_argument(
"--single-cls",
action="store_true",
@ -808,9 +718,7 @@ def parse_opt(known=False):
default=8,
help="max dataloader workers (per RANK in DDP mode)",
)
parser.add_argument(
"--project", default=ROOT / "runs/train", help="save to project/name"
)
parser.add_argument("--project", default=ROOT / "runs/train", help="save to project/name")
parser.add_argument("--name", default="exp", help="save to project/name")
parser.add_argument(
"--exist-ok",
@ -819,9 +727,7 @@ def parse_opt(known=False):
)
parser.add_argument("--quad", action="store_true", help="quad dataloader")
parser.add_argument("--cos-lr", action="store_true", help="cosine LR scheduler")
parser.add_argument(
"--label-smoothing", type=float, default=0.0, help="Label smoothing epsilon"
)
parser.add_argument("--label-smoothing", type=float, default=0.0, help="Label smoothing epsilon")
parser.add_argument(
"--patience",
type=int,
@ -872,9 +778,7 @@ def parse_opt(known=False):
)
# NDJSON logging
parser.add_argument(
"--ndjson-console", action="store_true", help="Log ndjson to console"
)
parser.add_argument("--ndjson-console", action="store_true", help="Log ndjson to console")
parser.add_argument("--ndjson-file", action="store_true", help="Log ndjson to file")
return parser.parse_known_args()[0] if known else parser.parse_args()
@ -903,9 +807,7 @@ def main(opt, callbacks=Callbacks()):
# Resume (from specified or most recent last.pt)
if opt.resume and not check_comet_resume(opt) and not opt.evolve:
last = Path(
check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run()
)
last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run())
opt_yaml = last.parent.parent / "opt.yaml" # train options yaml
opt_data = opt.data # original dataset
if opt_yaml.is_file():
@ -925,13 +827,9 @@ def main(opt, callbacks=Callbacks()):
str(opt.weights),
str(opt.project),
) # checks
assert len(opt.cfg) or len(
opt.weights
), "either --cfg or --weights must be specified"
assert len(opt.cfg) or len(opt.weights), "either --cfg or --weights must be specified"
if opt.evolve:
if opt.project == str(
ROOT / "runs/train"
): # if default project name, rename to runs/evolve
if opt.project == str(ROOT / "runs/train"): # if default project name, rename to runs/evolve
opt.project = str(ROOT / "runs/evolve")
opt.exist_ok, opt.resume = (
opt.resume,
@ -939,9 +837,7 @@ def main(opt, callbacks=Callbacks()):
) # pass resume to exist_ok and disable resume
if opt.name == "cfg":
opt.name = Path(opt.cfg).stem # use model.yaml as name
opt.save_dir = str(
increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)
)
opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))
# DDP mode
device = select_device(opt.device, batch_size=opt.batch_size)
@ -949,15 +845,9 @@ def main(opt, callbacks=Callbacks()):
msg = "is not compatible with YOLOv5 Multi-GPU DDP training"
assert not opt.image_weights, f"--image-weights {msg}"
assert not opt.evolve, f"--evolve {msg}"
assert (
opt.batch_size != -1
), f"AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size"
assert (
opt.batch_size % WORLD_SIZE == 0
), f"--batch-size {opt.batch_size} must be multiple of WORLD_SIZE"
assert (
torch.cuda.device_count() > LOCAL_RANK
), "insufficient CUDA devices for DDP command"
assert opt.batch_size != -1, f"AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size"
assert opt.batch_size % WORLD_SIZE == 0, f"--batch-size {opt.batch_size} must be multiple of WORLD_SIZE"
assert torch.cuda.device_count() > LOCAL_RANK, "insufficient CUDA devices for DDP command"
torch.cuda.set_device(LOCAL_RANK)
device = torch.device("cuda", LOCAL_RANK)
dist.init_process_group(
@ -1059,18 +949,14 @@ def main(opt, callbacks=Callbacks()):
upper_limit = np.array([meta[k][2] for k in hyp_GA.keys()])
# Create gene_ranges list to hold the range of values for each gene in the population
gene_ranges = [
(lower_limit[i], upper_limit[i]) for i in range(len(upper_limit))
]
gene_ranges = [(lower_limit[i], upper_limit[i]) for i in range(len(upper_limit))]
# Initialize the population with initial_values or random values
initial_values = []
# If resuming evolution from a previous checkpoint
if opt.resume_evolve is not None:
assert os.path.isfile(
ROOT / opt.resume_evolve
), "evolve population path is wrong!"
assert os.path.isfile(ROOT / opt.resume_evolve), "evolve population path is wrong!"
with open(ROOT / opt.resume_evolve, errors="ignore") as f:
evolve_population = yaml.safe_load(f)
for value in evolve_population.values():
@ -1079,9 +965,7 @@ def main(opt, callbacks=Callbacks()):
# If not resuming from a previous checkpoint, generate initial values from .yaml files in opt.evolve_population
else:
yaml_files = [
f for f in os.listdir(opt.evolve_population) if f.endswith(".yaml")
]
yaml_files = [f for f in os.listdir(opt.evolve_population) if f.endswith(".yaml")]
for file_name in yaml_files:
with open(os.path.join(opt.evolve_population, file_name)) as yaml_file:
value = yaml.safe_load(yaml_file)
@ -1090,14 +974,9 @@ def main(opt, callbacks=Callbacks()):
# Generate random values within the search space for the rest of the population
if initial_values is None:
population = [
generate_individual(gene_ranges, len(hyp_GA)) for _ in range(pop_size)
]
population = [generate_individual(gene_ranges, len(hyp_GA)) for _ in range(pop_size)]
elif pop_size > 1:
population = [
generate_individual(gene_ranges, len(hyp_GA))
for _ in range(pop_size - len(initial_values))
]
population = [generate_individual(gene_ranges, len(hyp_GA)) for _ in range(pop_size - len(initial_values))]
for initial_value in initial_values:
population = [initial_value] + population
@ -1107,19 +986,14 @@ def main(opt, callbacks=Callbacks()):
if generation >= 1:
save_dict = {}
for i in range(len(population)):
little_dict = {
list_keys[j]: float(population[i][j])
for j in range(len(population[i]))
}
little_dict = {list_keys[j]: float(population[i][j]) for j in range(len(population[i]))}
save_dict[f"gen{str(generation)}number{str(i)}"] = little_dict
with open(save_dir / "evolve_population.yaml", "w") as outfile:
yaml.dump(save_dict, outfile, default_flow_style=False)
# Adaptive elite size
elite_size = min_elite_size + int(
(max_elite_size - min_elite_size) * (generation / opt.evolve)
)
elite_size = min_elite_size + int((max_elite_size - min_elite_size) * (generation / opt.evolve))
# Evaluate the fitness of each individual in the population
fitness_scores = []
for individual in population:
@ -1147,25 +1021,16 @@ def main(opt, callbacks=Callbacks()):
# Adaptive tournament size
tournament_size = max(
max(2, tournament_size_min),
int(
min(tournament_size_max, pop_size)
- (generation / (opt.evolve / 10))
),
int(min(tournament_size_max, pop_size) - (generation / (opt.evolve / 10))),
)
# Perform tournament selection to choose the best individual
tournament_indices = random.sample(range(pop_size), tournament_size)
tournament_fitness = [fitness_scores[j] for j in tournament_indices]
winner_index = tournament_indices[
tournament_fitness.index(max(tournament_fitness))
]
winner_index = tournament_indices[tournament_fitness.index(max(tournament_fitness))]
selected_indices.append(winner_index)
# Add the elite individuals to the selected indices
elite_indices = [
i
for i in range(pop_size)
if fitness_scores[i] in sorted(fitness_scores)[-elite_size:]
]
elite_indices = [i for i in range(pop_size) if fitness_scores[i] in sorted(fitness_scores)[-elite_size:]]
selected_indices.extend(elite_indices)
# Create the next generation through crossover and mutation
next_generation = []
@ -1182,25 +1047,18 @@ def main(opt, callbacks=Callbacks()):
)
if random.uniform(0, 1) < crossover_rate:
crossover_point = random.randint(1, len(hyp_GA) - 1)
child = (
population[parent1_index][:crossover_point]
+ population[parent2_index][crossover_point:]
)
child = population[parent1_index][:crossover_point] + population[parent2_index][crossover_point:]
else:
child = population[parent1_index]
# Adaptive mutation rate
mutation_rate = max(
mutation_rate_min,
min(
mutation_rate_max, mutation_rate_max - (generation / opt.evolve)
),
min(mutation_rate_max, mutation_rate_max - (generation / opt.evolve)),
)
for j in range(len(hyp_GA)):
if random.uniform(0, 1) < mutation_rate:
child[j] += random.uniform(-0.1, 0.1)
child[j] = min(
max(child[j], gene_ranges[j][0]), gene_ranges[j][1]
)
child[j] = min(max(child[j], gene_ranges[j][0]), gene_ranges[j][1])
next_generation.append(child)
# Replace the old population with the new generation
population = next_generation

View File

@ -14,9 +14,7 @@ def check_train_batch_size(model, imgsz=640, amp=True):
"""Checks and computes optimal training batch size for YOLOv5 model, given image size and AMP setting."""
if check_version(torch.__version__, "2.4.0"):
with torch.amp.autocast("cuda", enabled=amp):
return autobatch(
deepcopy(model).train(), imgsz
) # compute optimal batch size
return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size
with torch.cuda.amp.autocast(amp):
return autobatch(deepcopy(model).train(), imgsz) # compute optimal batch size
@ -34,14 +32,10 @@ def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
LOGGER.info(f"{prefix}Computing optimal batch size for --imgsz {imgsz}")
device = next(model.parameters()).device # get model device
if device.type == "cpu":
LOGGER.info(
f"{prefix}CUDA not detected, using default CPU batch-size {batch_size}"
)
LOGGER.info(f"{prefix}CUDA not detected, using default CPU batch-size {batch_size}")
return batch_size
if torch.backends.cudnn.benchmark:
LOGGER.info(
f"{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}"
)
LOGGER.info(f"{prefix} ⚠️ Requires torch.backends.cudnn.benchmark=False, using default batch-size {batch_size}")
return batch_size
# Inspect CUDA memory
@ -52,9 +46,7 @@ def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
r = torch.cuda.memory_reserved(device) / gb # GiB reserved
a = torch.cuda.memory_allocated(device) / gb # GiB allocated
f = t - (r + a) # GiB free
LOGGER.info(
f"{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free"
)
LOGGER.info(f"{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free")
# Profile batch sizes
batch_sizes = [1, 2, 4, 8, 16]
@ -74,12 +66,8 @@ def autobatch(model, imgsz=640, fraction=0.8, batch_size=16):
b = batch_sizes[max(i - 1, 0)] # select prior safe point
if b < 1 or b > 1024: # b outside of safe range
b = batch_size
LOGGER.warning(
f"{prefix}WARNING ⚠️ CUDA anomaly detected, recommend restart environment and retry command."
)
LOGGER.warning(f"{prefix}WARNING ⚠️ CUDA anomaly detected, recommend restart environment and retry command.")
fraction = (np.polyval(p, b) + r + a) / t # actual fraction predicted
LOGGER.info(
f"{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅"
)
LOGGER.info(f"{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%) ✅")
return b