mirror of https://github.com/WongKinYiu/yolov7.git
detect.py: support of txt file list
FL with configured weighted loss : alpha test.py: tSNE over the cropped bbox featurespull/2071/head
parent
6b8d33fa31
commit
78cd0cb8f6
|
@ -0,0 +1,37 @@
|
||||||
|
lr0: 0.001 #0.001 # initial learning rate (SGD=1E-2, Adam=1E-3)
|
||||||
|
lrf: 0.01 # final OneCycleLR learning rate (lr0 * lrf)
|
||||||
|
momentum: 0.937 # SGD momentum/Adam beta1
|
||||||
|
weight_decay: 0.005 # optimizer weight decay 5e-4 It resolve mAP of overfitting test
|
||||||
|
warmup_epochs: 3.0 # warmup epochs (fractions ok)
|
||||||
|
warmup_momentum: 0.8 # warmup initial momentum
|
||||||
|
warmup_bias_lr: 0.001 #0.001 # warmup initial bias lr
|
||||||
|
loss_ota: 0 #1 # use ComputeLossOTA, use 0 for faster training
|
||||||
|
box: 0.05 # box loss gain
|
||||||
|
cls: 0.5 # cls loss gain
|
||||||
|
cls_pw: 1.0 # cls BCELoss positive_weight
|
||||||
|
obj: 1.0 # obj loss gain (scale with pixels)
|
||||||
|
obj_pw: 1.0 # obj BCELoss positive_weight
|
||||||
|
iou_t: 0.6 # like the default in the code was 0.2 IoU training threshold
|
||||||
|
anchor_t: 4.0 # anchor-multiple threshold
|
||||||
|
anchors: 2 # HK TODO modify to 3 ------------------------ anchors per output layer (0 to ignore) @@HK was 3
|
||||||
|
fl_gamma: 3.0 #1.5 # focal loss gamma (efficientDet default gamma=1.5)
|
||||||
|
hsv_h: 0.0 # image HSV-Hue augmentation (fraction)
|
||||||
|
hsv_s: 0.0 # image HSV-Saturation augmentation (fraction)
|
||||||
|
hsv_v: 0.0 # image HSV-Value augmentation (fraction)
|
||||||
|
degrees: 0 # image rotation (+/- deg)
|
||||||
|
translate: 0.2 #0.2 # image translation (+/- fraction)
|
||||||
|
scale: 0.5 # image scale (+/- gain)
|
||||||
|
shear: 0.0 # image shear (+/- deg)
|
||||||
|
perspective: 0.0 # image perspective (+/- fraction), range 0-0.001
|
||||||
|
flipud: 0.3 # image flip up-down (probability)
|
||||||
|
fliplr: 0.5 # image flip left-right (probability)
|
||||||
|
mosaic: 0.5 # image mosaic (probability)
|
||||||
|
mixup: 0.15 # image mixup (probability)
|
||||||
|
copy_paste: 0.0 # image copy paste (probability)
|
||||||
|
paste_in: 0.1 # 0.1 # image copy paste (probability), use 0 for faster training : cutout
|
||||||
|
inversion: 0.5 #opposite temperature
|
||||||
|
img_percentile_removal: 0.3
|
||||||
|
beta : 0.3
|
||||||
|
random_perspective : 1
|
||||||
|
scaling_before_mosaic : 1
|
||||||
|
gamma : 80 # percent 90 percente more stability to gamma
|
|
@ -0,0 +1,124 @@
|
||||||
|
import torch
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.manifold import TSNE
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import torchvision.ops as ops
|
||||||
|
import os
|
||||||
|
class ObjectEmbeddingVisualizer:
|
||||||
|
def __init__(self, model, device):
|
||||||
|
# # model_type = 'yolov7'
|
||||||
|
# # self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
||||||
|
# self.model = torch.hub.load('WongKinYiu/yolov7' if model_type == 'yolov7' else 'ultralytics/yolov5',
|
||||||
|
# 'custom' if model_type == 'yolov7' else 'yolov5s')
|
||||||
|
# self.model.to(self.device).eval()
|
||||||
|
self.model = model
|
||||||
|
self.device = device
|
||||||
|
def extract_object_features(self, image, predictions):
|
||||||
|
with torch.no_grad():
|
||||||
|
# Get feature maps
|
||||||
|
if hasattr(self.model, 'model'):
|
||||||
|
feature_maps = self.model.model.backbone(image.to(self.device))
|
||||||
|
else:
|
||||||
|
feature_maps = self.model.backbone(image.to(self.device))
|
||||||
|
|
||||||
|
# Get boxes and labels
|
||||||
|
boxes = predictions[0].boxes.xyxy # x1, y1, x2, y2
|
||||||
|
labels = predictions[0].boxes.cls
|
||||||
|
|
||||||
|
object_features = []
|
||||||
|
for scale_idx, feat_map in enumerate(feature_maps):
|
||||||
|
# Calculate scale ratio
|
||||||
|
scale_h = image.shape[2] / feat_map.shape[2]
|
||||||
|
scale_w = image.shape[3] / feat_map.shape[3]
|
||||||
|
|
||||||
|
# Scale boxes to feature map size
|
||||||
|
scaled_boxes = boxes.clone()
|
||||||
|
scaled_boxes[:, [0, 2]] = scaled_boxes[:, [0, 2]] / scale_w
|
||||||
|
scaled_boxes[:, [1, 3]] = scaled_boxes[:, [1, 3]] / scale_h
|
||||||
|
|
||||||
|
# ROI pooling
|
||||||
|
roi_features = ops.roi_batchool(feat_map, [scaled_boxes.to(self.device)],
|
||||||
|
output_size=(7, 7))
|
||||||
|
|
||||||
|
# Global average pooling
|
||||||
|
pooled_features = F.adaptive_avg_pool2d(roi_features, (1, 1))
|
||||||
|
object_features.append(pooled_features.squeeze(-1).squeeze(-1))
|
||||||
|
|
||||||
|
# Concatenate features from all scales
|
||||||
|
all_features = torch.cat(object_features, dim=1)
|
||||||
|
return all_features.cpu().numpy(), labels.cpu().numpy()
|
||||||
|
|
||||||
|
def extract_object_grounded_features(self, feature_maps, predictions, image_shape: tuple):
|
||||||
|
scale = 2
|
||||||
|
assert len(image_shape) == 4, 'image shape should be tensor [ch, h, w]'
|
||||||
|
embeddings = dict()
|
||||||
|
object_cls = list()#dict()
|
||||||
|
object_features = []
|
||||||
|
try :
|
||||||
|
for i_batch, pred in enumerate(predictions):
|
||||||
|
# object_features = list()
|
||||||
|
for scale_idx, feat_map_all_batches in enumerate(feature_maps): # run over all 3 FM of 3 scales in all batches
|
||||||
|
if scale_idx != scale:
|
||||||
|
continue # take only the last scale
|
||||||
|
feat_map = feat_map_all_batches[i_batch, :, :,:]
|
||||||
|
boxes = pred[:,:4] # x1, y1, x2, y2
|
||||||
|
labels = pred[:, 5]
|
||||||
|
# Calculate scale ratio
|
||||||
|
scale_h = image_shape[2] / feat_map.shape[1]
|
||||||
|
scale_w = image_shape[3] / feat_map.shape[2]
|
||||||
|
|
||||||
|
# Scale boxes to feature map size
|
||||||
|
scaled_boxes = boxes.clone()
|
||||||
|
scaled_boxes[:, [0, 2]] = scaled_boxes[:, [0, 2]] / scale_w
|
||||||
|
scaled_boxes[:, [1, 3]] = scaled_boxes[:, [1, 3]] / scale_h
|
||||||
|
|
||||||
|
# ROI pooling
|
||||||
|
roi_features = ops.roi_pool(feat_map.float()[None,...], [scaled_boxes.to(self.device)],
|
||||||
|
output_size=(7, 7))
|
||||||
|
|
||||||
|
# Global average pooling
|
||||||
|
pooled_features = F.adaptive_avg_pool2d(roi_features, (1, 1))
|
||||||
|
# object_features.append(pooled_features.squeeze(-1).squeeze(-1))
|
||||||
|
[object_features.append(x.squeeze(-1).squeeze(-1)[None,...]) for x in pooled_features]
|
||||||
|
[object_cls.append(x.cpu().numpy()) for x in labels]
|
||||||
|
# Concatenate features from all scales
|
||||||
|
all_features = torch.cat(object_features, dim=0)
|
||||||
|
object_cls = np.array(object_cls)
|
||||||
|
except Exception as e:
|
||||||
|
raise Exception(f'{i_batch}Error loading data from {i_batch}: {e}\nSee {i_batch}')
|
||||||
|
|
||||||
|
# embeddings.update({i_batch : all_features.cpu().numpy()})
|
||||||
|
# object_cls.update({i_batch : labels.cpu().numpy()})
|
||||||
|
|
||||||
|
return all_features, object_cls
|
||||||
|
|
||||||
|
def visualize_object_embeddings(self, features, labels, path, tag=''):
|
||||||
|
tsne = TSNE(n_components=2, perplexity=min(30, len(features)-1))
|
||||||
|
embeddings_2d = tsne.fit_transform(features.cpu().numpy())
|
||||||
|
|
||||||
|
plt.figure(figsize=(10, 10))
|
||||||
|
# scatter = plt.scatter(embeddings_2d[:, 0], embeddings_2d[:, 1],
|
||||||
|
# c=labels, cmap='tab20')
|
||||||
|
scatter = plt.scatter(embeddings_2d[:, 0], embeddings_2d[:, 1],
|
||||||
|
c=labels)
|
||||||
|
plt.colorbar(scatter, label='Object Class')
|
||||||
|
plt.title('Object Embeddings labels support {} , classes {}'.format(features.shape[0],
|
||||||
|
np.unique(labels).size))
|
||||||
|
plt.show()
|
||||||
|
plt.savefig(os.path.join(path,'tsne' + str(tag) + '.png'))
|
||||||
|
|
||||||
|
return embeddings_2d
|
||||||
|
|
||||||
|
def process_image(self, image_tensor):
|
||||||
|
predictions = self.model(image_tensor)
|
||||||
|
features, labels = self.extract_object_features(image_tensor, predictions)
|
||||||
|
embeddings = self.visualize_object_embeddings(features, labels)
|
||||||
|
return embeddings, labels
|
||||||
|
"""
|
||||||
|
# Usage example
|
||||||
|
visualizer = ObjectEmbeddingVisualizer()
|
||||||
|
# Assuming image_tensor is your input image [1, C, H, W]
|
||||||
|
embeddings, labels = visualizer.process_image(image_tensor)
|
||||||
|
|
||||||
|
"""
|
Loading…
Reference in New Issue