mirror of https://github.com/WongKinYiu/yolov7.git
detect.py: support of txt file list
FL with configured weighted loss : alpha test.py: tSNE over the cropped bbox featurespull/2071/head
parent
6b8d33fa31
commit
78cd0cb8f6
|
@ -0,0 +1,37 @@
|
|||
lr0: 0.001 #0.001 # initial learning rate (SGD=1E-2, Adam=1E-3)
|
||||
lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf)
|
||||
momentum: 0.937 # SGD momentum/Adam beta1
|
||||
weight_decay: 0.005 # optimizer weight decay 5e-4 It resolve mAP of overfitting test
|
||||
warmup_epochs: 3.0 # warmup epochs (fractions ok)
|
||||
warmup_momentum: 0.8 # warmup initial momentum
|
||||
warmup_bias_lr: 0.001 #0.001 # warmup initial bias lr
|
||||
loss_ota: 0 #1 # use ComputeLossOTA, use 0 for faster training
|
||||
box: 0.05 # box loss gain
|
||||
cls: 0.5 # cls loss gain
|
||||
cls_pw: 1.0 # cls BCELoss positive_weight
|
||||
obj: 1.0 # obj loss gain (scale with pixels)
|
||||
obj_pw: 1.0 # obj BCELoss positive_weight
|
||||
iou_t: 0.6 # like the default in the code was 0.2 IoU training threshold
|
||||
anchor_t: 4.0 # anchor-multiple threshold
|
||||
anchors: 2 # HK TODO modify to 3 ------------------------ anchors per output layer (0 to ignore) @@HK was 3
|
||||
fl_gamma: 3.0 #1.5 # focal loss gamma (efficientDet default gamma=1.5)
|
||||
hsv_h: 0.0 # image HSV-Hue augmentation (fraction)
|
||||
hsv_s: 0.0 # image HSV-Saturation augmentation (fraction)
|
||||
hsv_v: 0.0 # image HSV-Value augmentation (fraction)
|
||||
degrees: 0 # image rotation (+/- deg)
|
||||
translate: 0.2 #0.2 # image translation (+/- fraction)
|
||||
scale: 0.5 # image scale (+/- gain)
|
||||
shear: 0.0 # image shear (+/- deg)
|
||||
perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
|
||||
flipud: 0.3 # image flip up-down (probability)
|
||||
fliplr: 0.5 # image flip left-right (probability)
|
||||
mosaic: 0.5 # image mosaic (probability)
|
||||
mixup: 0.15 # image mixup (probability)
|
||||
copy_paste: 0.0 # image copy paste (probability)
|
||||
paste_in: 0.1 # 0.1 # image copy paste (probability), use 0 for faster training : cutout
|
||||
inversion: 0.5 #opposite temperature
|
||||
img_percentile_removal: 0.3
|
||||
beta : 0.3
|
||||
random_perspective : 1
|
||||
scaling_before_mosaic : 1
|
||||
gamma : 80 # percent 90 percente more stability to gamma
|
|
@ -0,0 +1,124 @@
|
|||
import torch
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
from sklearn.manifold import TSNE
|
||||
import matplotlib.pyplot as plt
|
||||
import torchvision.ops as ops
|
||||
import os
|
||||
class ObjectEmbeddingVisualizer:
|
||||
def __init__(self, model, device):
|
||||
# # model_type = 'yolov7'
|
||||
# # self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||
# self.model = torch.hub.load('WongKinYiu/yolov7' if model_type == 'yolov7' else 'ultralytics/yolov5',
|
||||
# 'custom' if model_type == 'yolov7' else 'yolov5s')
|
||||
# self.model.to(self.device).eval()
|
||||
self.model = model
|
||||
self.device = device
|
||||
def extract_object_features(self, image, predictions):
|
||||
with torch.no_grad():
|
||||
# Get feature maps
|
||||
if hasattr(self.model, 'model'):
|
||||
feature_maps = self.model.model.backbone(image.to(self.device))
|
||||
else:
|
||||
feature_maps = self.model.backbone(image.to(self.device))
|
||||
|
||||
# Get boxes and labels
|
||||
boxes = predictions[0].boxes.xyxy # x1, y1, x2, y2
|
||||
labels = predictions[0].boxes.cls
|
||||
|
||||
object_features = []
|
||||
for scale_idx, feat_map in enumerate(feature_maps):
|
||||
# Calculate scale ratio
|
||||
scale_h = image.shape[2] / feat_map.shape[2]
|
||||
scale_w = image.shape[3] / feat_map.shape[3]
|
||||
|
||||
# Scale boxes to feature map size
|
||||
scaled_boxes = boxes.clone()
|
||||
scaled_boxes[:, [0, 2]] = scaled_boxes[:, [0, 2]] / scale_w
|
||||
scaled_boxes[:, [1, 3]] = scaled_boxes[:, [1, 3]] / scale_h
|
||||
|
||||
# ROI pooling
|
||||
roi_features = ops.roi_batchool(feat_map, [scaled_boxes.to(self.device)],
|
||||
output_size=(7, 7))
|
||||
|
||||
# Global average pooling
|
||||
pooled_features = F.adaptive_avg_pool2d(roi_features, (1, 1))
|
||||
object_features.append(pooled_features.squeeze(-1).squeeze(-1))
|
||||
|
||||
# Concatenate features from all scales
|
||||
all_features = torch.cat(object_features, dim=1)
|
||||
return all_features.cpu().numpy(), labels.cpu().numpy()
|
||||
|
||||
def extract_object_grounded_features(self, feature_maps, predictions, image_shape: tuple):
|
||||
scale = 2
|
||||
assert len(image_shape) == 4, 'image shape should be tensor [ch, h, w]'
|
||||
embeddings = dict()
|
||||
object_cls = list()#dict()
|
||||
object_features = []
|
||||
try :
|
||||
for i_batch, pred in enumerate(predictions):
|
||||
# object_features = list()
|
||||
for scale_idx, feat_map_all_batches in enumerate(feature_maps): # run over all 3 FM of 3 scales in all batches
|
||||
if scale_idx != scale:
|
||||
continue # take only the last scale
|
||||
feat_map = feat_map_all_batches[i_batch, :, :,:]
|
||||
boxes = pred[:,:4] # x1, y1, x2, y2
|
||||
labels = pred[:, 5]
|
||||
# Calculate scale ratio
|
||||
scale_h = image_shape[2] / feat_map.shape[1]
|
||||
scale_w = image_shape[3] / feat_map.shape[2]
|
||||
|
||||
# Scale boxes to feature map size
|
||||
scaled_boxes = boxes.clone()
|
||||
scaled_boxes[:, [0, 2]] = scaled_boxes[:, [0, 2]] / scale_w
|
||||
scaled_boxes[:, [1, 3]] = scaled_boxes[:, [1, 3]] / scale_h
|
||||
|
||||
# ROI pooling
|
||||
roi_features = ops.roi_pool(feat_map.float()[None,...], [scaled_boxes.to(self.device)],
|
||||
output_size=(7, 7))
|
||||
|
||||
# Global average pooling
|
||||
pooled_features = F.adaptive_avg_pool2d(roi_features, (1, 1))
|
||||
# object_features.append(pooled_features.squeeze(-1).squeeze(-1))
|
||||
[object_features.append(x.squeeze(-1).squeeze(-1)[None,...]) for x in pooled_features]
|
||||
[object_cls.append(x.cpu().numpy()) for x in labels]
|
||||
# Concatenate features from all scales
|
||||
all_features = torch.cat(object_features, dim=0)
|
||||
object_cls = np.array(object_cls)
|
||||
except Exception as e:
|
||||
raise Exception(f'{i_batch}Error loading data from {i_batch}: {e}\nSee {i_batch}')
|
||||
|
||||
# embeddings.update({i_batch : all_features.cpu().numpy()})
|
||||
# object_cls.update({i_batch : labels.cpu().numpy()})
|
||||
|
||||
return all_features, object_cls
|
||||
|
||||
def visualize_object_embeddings(self, features, labels, path, tag=''):
|
||||
tsne = TSNE(n_components=2, perplexity=min(30, len(features)-1))
|
||||
embeddings_2d = tsne.fit_transform(features.cpu().numpy())
|
||||
|
||||
plt.figure(figsize=(10, 10))
|
||||
# scatter = plt.scatter(embeddings_2d[:, 0], embeddings_2d[:, 1],
|
||||
# c=labels, cmap='tab20')
|
||||
scatter = plt.scatter(embeddings_2d[:, 0], embeddings_2d[:, 1],
|
||||
c=labels)
|
||||
plt.colorbar(scatter, label='Object Class')
|
||||
plt.title('Object Embeddings labels support {} , classes {}'.format(features.shape[0],
|
||||
np.unique(labels).size))
|
||||
plt.show()
|
||||
plt.savefig(os.path.join(path,'tsne' + str(tag) + '.png'))
|
||||
|
||||
return embeddings_2d
|
||||
|
||||
def process_image(self, image_tensor):
|
||||
predictions = self.model(image_tensor)
|
||||
features, labels = self.extract_object_features(image_tensor, predictions)
|
||||
embeddings = self.visualize_object_embeddings(features, labels)
|
||||
return embeddings, labels
|
||||
"""
|
||||
# Usage example
|
||||
visualizer = ObjectEmbeddingVisualizer()
|
||||
# Assuming image_tensor is your input image [1, C, H, W]
|
||||
embeddings, labels = visualizer.process_image(image_tensor)
|
||||
|
||||
"""
|
Loading…
Reference in New Issue