detect.py: support of txt file list

FL with configured weighted loss : alpha test.py: tSNE over the cropped bbox features
2025-02-16 10:05:06 +02:00 · 2025-02-16 10:05:06 +02:00 · 78cd0cb8f6
parent 6b8d33fa31
commit 78cd0cb8f6
2 changed files with 161 additions and 0 deletions
--- a/data/hyp.tir_od.tiny_aug_gamma_scaling_before_mosaic_rnd_scaling_no_ota.yaml
+++ b/data/hyp.tir_od.tiny_aug_gamma_scaling_before_mosaic_rnd_scaling_no_ota.yaml
@ -0,0 +1,37 @@
+lr0: 0.001 #0.001  # initial learning rate (SGD=1E-2, Adam=1E-3)
+lrf: 0.01  # final OneCycleLR learning rate (lr0 * lrf)
+momentum: 0.937  # SGD momentum/Adam beta1
+weight_decay: 0.005  # optimizer weight decay 5e-4  It resolve mAP of overfitting test
+warmup_epochs: 3.0  # warmup epochs (fractions ok)
+warmup_momentum: 0.8  # warmup initial momentum
+warmup_bias_lr: 0.001 #0.001  # warmup initial bias lr
+loss_ota: 0 #1 # use ComputeLossOTA, use 0 for faster training
+box: 0.05  # box loss gain
+cls: 0.5  # cls loss gain
+cls_pw: 1.0  # cls BCELoss positive_weight
+obj: 1.0  # obj loss gain (scale with pixels)
+obj_pw: 1.0  # obj BCELoss positive_weight
+iou_t: 0.6  # like the default in the code was 0.2 IoU training threshold
+anchor_t: 4.0  # anchor-multiple threshold
+anchors: 2  # HK TODO modify to 3    ------------------------   anchors per output layer (0 to ignore)  @@HK was 3
+fl_gamma: 3.0 #1.5  # focal loss gamma (efficientDet default gamma=1.5)
+hsv_h: 0.0  # image HSV-Hue augmentation (fraction)
+hsv_s: 0.0  # image HSV-Saturation augmentation (fraction)
+hsv_v: 0.0  # image HSV-Value augmentation (fraction)
+degrees: 0 # image rotation (+/- deg)
+translate: 0.2 #0.2  # image translation (+/- fraction)
+scale: 0.5  # image scale (+/- gain)
+shear: 0.0  # image shear (+/- deg)
+perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
+flipud: 0.3  # image flip up-down (probability)
+fliplr: 0.5  # image flip left-right (probability)
+mosaic: 0.5  # image mosaic (probability)
+mixup: 0.15  # image mixup (probability)
+copy_paste: 0.0  # image copy paste (probability)
+paste_in: 0.1 # 0.1  # image copy paste (probability), use 0 for faster training : cutout
+inversion: 0.5 #opposite temperature
+img_percentile_removal: 0.3
+beta : 0.3
+random_perspective : 1
+scaling_before_mosaic : 1
+gamma : 80   # percent 90 percente more stability to gamma
--- a/yolo_object_embeddings.py
+++ b/yolo_object_embeddings.py
@ -0,0 +1,124 @@
+import torch
+import torch.nn.functional as F
+import numpy as np
+from sklearn.manifold import TSNE
+import matplotlib.pyplot as plt
+import torchvision.ops as ops
+import os
+class ObjectEmbeddingVisualizer:
+    def __init__(self, model, device):
+        # # model_type = 'yolov7'
+        # # self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        # self.model = torch.hub.load('WongKinYiu/yolov7' if model_type == 'yolov7' else 'ultralytics/yolov5',
+        #                           'custom' if model_type == 'yolov7' else 'yolov5s')
+        # self.model.to(self.device).eval()
+        self.model = model
+        self.device = device
+    def extract_object_features(self, image, predictions):
+        with torch.no_grad():
+            # Get feature maps
+            if hasattr(self.model, 'model'):
+                feature_maps = self.model.model.backbone(image.to(self.device))
+            else:
+                feature_maps = self.model.backbone(image.to(self.device))
+            
+            # Get boxes and labels
+            boxes = predictions[0].boxes.xyxy  # x1, y1, x2, y2
+            labels = predictions[0].boxes.cls
+            
+            object_features = []
+            for scale_idx, feat_map in enumerate(feature_maps):
+                # Calculate scale ratio
+                scale_h = image.shape[2] / feat_map.shape[2]
+                scale_w = image.shape[3] / feat_map.shape[3]
+                
+                # Scale boxes to feature map size
+                scaled_boxes = boxes.clone()
+                scaled_boxes[:, [0, 2]] = scaled_boxes[:, [0, 2]] / scale_w
+                scaled_boxes[:, [1, 3]] = scaled_boxes[:, [1, 3]] / scale_h
+                
+                # ROI pooling
+                roi_features = ops.roi_batchool(feat_map, [scaled_boxes.to(self.device)],
+                                         output_size=(7, 7))
+
+                # Global average pooling
+                pooled_features = F.adaptive_avg_pool2d(roi_features, (1, 1))
+                object_features.append(pooled_features.squeeze(-1).squeeze(-1))
+
+            # Concatenate features from all scales
+            all_features = torch.cat(object_features, dim=1)
+            return all_features.cpu().numpy(), labels.cpu().numpy()
+
+    def extract_object_grounded_features(self, feature_maps, predictions, image_shape: tuple):
+        scale = 2
+        assert len(image_shape) == 4, 'image shape should be tensor [ch, h, w]'
+        embeddings = dict()
+        object_cls = list()#dict()
+        object_features = []
+        try :
+            for i_batch, pred in enumerate(predictions):
+                # object_features = list()
+                for scale_idx, feat_map_all_batches in enumerate(feature_maps): # run over all 3 FM of 3 scales in all batches
+                    if scale_idx != scale:
+                        continue # take only the last scale
+                    feat_map = feat_map_all_batches[i_batch, :, :,:]
+                    boxes = pred[:,:4]  # x1, y1, x2, y2
+                    labels = pred[:, 5]
+                    # Calculate scale ratio
+                    scale_h = image_shape[2] / feat_map.shape[1]
+                    scale_w = image_shape[3] / feat_map.shape[2]
+
+                    # Scale boxes to feature map size
+                    scaled_boxes = boxes.clone()
+                    scaled_boxes[:, [0, 2]] = scaled_boxes[:, [0, 2]] / scale_w
+                    scaled_boxes[:, [1, 3]] = scaled_boxes[:, [1, 3]] / scale_h
+
+                    # ROI pooling
+                    roi_features = ops.roi_pool(feat_map.float()[None,...], [scaled_boxes.to(self.device)],
+                                                output_size=(7, 7))
+
+                    # Global average pooling
+                    pooled_features = F.adaptive_avg_pool2d(roi_features, (1, 1))
+                    # object_features.append(pooled_features.squeeze(-1).squeeze(-1))
+                    [object_features.append(x.squeeze(-1).squeeze(-1)[None,...]) for x in pooled_features]
+                    [object_cls.append(x.cpu().numpy()) for x in labels]
+                # Concatenate features from all scales
+            all_features = torch.cat(object_features, dim=0)
+            object_cls = np.array(object_cls)
+        except Exception as e:
+            raise Exception(f'{i_batch}Error loading data from {i_batch}: {e}\nSee {i_batch}')
+
+            # embeddings.update({i_batch : all_features.cpu().numpy()})
+            # object_cls.update({i_batch : labels.cpu().numpy()})
+
+        return all_features, object_cls
+
+    def visualize_object_embeddings(self, features, labels, path, tag=''):
+        tsne = TSNE(n_components=2, perplexity=min(30, len(features)-1))
+        embeddings_2d = tsne.fit_transform(features.cpu().numpy())
+        
+        plt.figure(figsize=(10, 10))
+        # scatter = plt.scatter(embeddings_2d[:, 0], embeddings_2d[:, 1],
+        #                     c=labels, cmap='tab20')
+        scatter = plt.scatter(embeddings_2d[:, 0], embeddings_2d[:, 1],
+                            c=labels)
+        plt.colorbar(scatter, label='Object Class')
+        plt.title('Object Embeddings labels support {} , classes {}'.format(features.shape[0],
+                                                                            np.unique(labels).size))
+        plt.show()
+        plt.savefig(os.path.join(path,'tsne' + str(tag) + '.png'))
+
+        return embeddings_2d
+
+    def process_image(self, image_tensor):
+        predictions = self.model(image_tensor)
+        features, labels = self.extract_object_features(image_tensor, predictions)
+        embeddings = self.visualize_object_embeddings(features, labels)
+        return embeddings, labels
+"""
+# Usage example
+visualizer = ObjectEmbeddingVisualizer()
+# Assuming image_tensor is your input image [1, C, H, W]
+embeddings, labels = visualizer.process_image(image_tensor)
+
+"""