diff --git a/data/hyp.scratch.custom.yaml b/data/hyp.scratch.custom.yaml
index 92b50d3..8570d73 100644
--- a/data/hyp.scratch.custom.yaml
+++ b/data/hyp.scratch.custom.yaml
@@ -27,4 +27,5 @@ fliplr: 0.5  # image flip left-right (probability)
 mosaic: 1.0  # image mosaic (probability)
 mixup: 0.0  # image mixup (probability)
 copy_paste: 0.0  # image copy paste (probability)
-paste_in: 0.0  # image copy paste (probability)
+paste_in: 0.0  # image copy paste (probability), use 0 for faster training
+loss_ota: 1 # use ComputeLossOTA, use 0 for faster training
\ No newline at end of file
diff --git a/data/hyp.scratch.p5.yaml b/data/hyp.scratch.p5.yaml
index a64c404..a409bac 100644
--- a/data/hyp.scratch.p5.yaml
+++ b/data/hyp.scratch.p5.yaml
@@ -27,4 +27,5 @@ fliplr: 0.5  # image flip left-right (probability)
 mosaic: 1.0  # image mosaic (probability)
 mixup: 0.15  # image mixup (probability)
 copy_paste: 0.0  # image copy paste (probability)
-paste_in: 0.15  # image copy paste (probability)
+paste_in: 0.15  # image copy paste (probability), use 0 for faster training
+loss_ota: 1 # use ComputeLossOTA, use 0 for faster training
\ No newline at end of file
diff --git a/data/hyp.scratch.p6.yaml b/data/hyp.scratch.p6.yaml
index 6ab7c01..192d0d5 100644
--- a/data/hyp.scratch.p6.yaml
+++ b/data/hyp.scratch.p6.yaml
@@ -27,4 +27,5 @@ fliplr: 0.5  # image flip left-right (probability)
 mosaic: 1.0  # image mosaic (probability)
 mixup: 0.15  # image mixup (probability)
 copy_paste: 0.0  # image copy paste (probability)
-paste_in: 0.15  # image copy paste (probability)
+paste_in: 0.15  # image copy paste (probability), use 0 for faster training
+loss_ota: 1 # use ComputeLossOTA, use 0 for faster training
\ No newline at end of file
diff --git a/data/hyp.scratch.tiny.yaml b/data/hyp.scratch.tiny.yaml
index 01c6f49..b0dc14a 100644
--- a/data/hyp.scratch.tiny.yaml
+++ b/data/hyp.scratch.tiny.yaml
@@ -27,4 +27,5 @@ fliplr: 0.5  # image flip left-right (probability)
 mosaic: 1.0  # image mosaic (probability)
 mixup: 0.05  # image mixup (probability)
 copy_paste: 0.0  # image copy paste (probability)
-paste_in: 0.05  # image copy paste (probability)
+paste_in: 0.05  # image copy paste (probability), use 0 for faster training
+loss_ota: 1 # use ComputeLossOTA, use 0 for faster training
diff --git a/train.py b/train.py
index c6db018..2864636 100644
--- a/train.py
+++ b/train.py
@@ -359,7 +359,10 @@ def train(hyp, opt, device, tb_writer=None):
             # Forward
             with amp.autocast(enabled=cuda):
                 pred = model(imgs)  # forward
-                loss, loss_items = compute_loss_ota(pred, targets.to(device), imgs)  # loss scaled by batch_size
+                if hyp['loss_ota'] == 1:
+                    loss, loss_items = compute_loss_ota(pred, targets.to(device), imgs)  # loss scaled by batch_size
+                else:
+                    loss, loss_items = compute_loss(pred, targets.to(device))  # loss scaled by batch_size
                 if rank != -1:
                     loss *= opt.world_size  # gradient averaged between devices in DDP mode
                 if opt.quad: