2022-07-06 23:23:27 +08:00
# Dataset utils and dataloaders
import glob
import logging
import math
import os
import random
import shutil
import time
2024-10-14 19:02:24 +08:00
import warnings
2022-07-06 23:23:27 +08:00
from itertools import repeat
from multiprocessing . pool import ThreadPool
from pathlib import Path
from threading import Thread
import cv2
import numpy as np
import torch
import torch . nn . functional as F
from PIL import Image , ExifTags
from torch . utils . data import Dataset
from tqdm import tqdm
2024-10-14 19:02:24 +08:00
# from torchvision.transforms.functional import adjust_gamma
from skimage . exposure import adjust_gamma
import albumentations as A
2022-07-06 23:23:27 +08:00
import pickle
from copy import deepcopy
#from pycocotools import mask as maskUtils
from torchvision . utils import save_image
from torchvision . ops import roi_pool , roi_align , ps_roi_pool , ps_roi_align
from utils . general import check_requirements , xyxy2xywh , xywh2xyxy , xywhn2xyxy , xyn2xy , segment2box , segments2boxes , \
resample_segments , clean_str
from utils . torch_utils import torch_distributed_zero_first
2024-08-07 23:03:48 +08:00
# @@HK : pip install torch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 resolve h\lib\fbgemm.dll" or one of its dependencies on Windows
2022-07-06 23:23:27 +08:00
# Parameters
2024-08-27 18:58:16 +08:00
def flatten ( lst ) : return [ x for l in lst for x in l ]
2022-07-06 23:23:27 +08:00
help_url = ' https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data '
img_formats = [ ' bmp ' , ' jpg ' , ' jpeg ' , ' png ' , ' tif ' , ' tiff ' , ' dng ' , ' webp ' , ' mpo ' ] # acceptable image suffixes
vid_formats = [ ' mov ' , ' avi ' , ' mp4 ' , ' mpg ' , ' mpeg ' , ' m4v ' , ' wmv ' , ' mkv ' ] # acceptable video suffixes
logger = logging . getLogger ( __name__ )
# Get orientation exif tag
for orientation in ExifTags . TAGS . keys ( ) :
if ExifTags . TAGS [ orientation ] == ' Orientation ' :
break
def get_hash ( files ) :
# Returns a single hash value of a list of files
return sum ( os . path . getsize ( f ) for f in files if os . path . isfile ( f ) )
def exif_size ( img ) :
# Returns exif-corrected PIL size
s = img . size # (width, height)
try :
rotation = dict ( img . _getexif ( ) . items ( ) ) [ orientation ]
if rotation == 6 : # rotation 270
s = ( s [ 1 ] , s [ 0 ] )
elif rotation == 8 : # rotation 90
s = ( s [ 1 ] , s [ 0 ] )
except :
pass
return s
2024-10-21 20:57:07 +08:00
# import warnings
# warnings.filterwarnings('error', category=RuntimeWarning)
2024-09-15 14:20:39 +08:00
def scaling_image ( img , scaling_type , percentile = 0.03 , beta = 0.3 ) :
if scaling_type == ' standardization ' : # default by repo
img = img / 255.0
elif scaling_type == " single_image_0_to_1 " :
max_val = np . max ( img . ravel ( ) )
min_val = np . min ( img . ravel ( ) )
2024-10-21 20:57:07 +08:00
img = np . double ( img - min_val ) / ( np . double ( max_val - min_val ) + np . finfo ( np . float32 ) . eps )
2024-09-15 14:20:39 +08:00
img = np . minimum ( np . maximum ( img , 0 ) , 1 )
elif scaling_type == ' single_image_mean_std ' :
img = ( img - img . ravel ( ) . mean ( ) ) / img . ravel ( ) . std ( )
elif scaling_type == ' single_image_percentile_0_1 ' :
min_val = np . percentile ( img . ravel ( ) , percentile )
max_val = np . percentile ( img . ravel ( ) , 100 - percentile )
2024-10-21 20:57:07 +08:00
img = np . double ( img - min_val ) / ( np . double ( max_val - min_val ) + np . finfo ( np . float32 ) . eps )
2024-09-15 14:20:39 +08:00
img = np . minimum ( np . maximum ( img , 0 ) , 1 )
elif scaling_type == ' single_image_percentile_0_255 ' :
# min_val = np.percentile(img.ravel(), percentile)
# max_val = np.percentile(img.ravel(), 100 - percentile)
# img = np.double(img - min_val) / np.double(max_val - min_val)
# img = np.uint8(np.minimum(np.maximum(img, 0), 1)*255)
ImgMin = np . percentile ( img , percentile )
ImgMax = np . percentile ( img , 100 - percentile )
2024-10-21 20:57:07 +08:00
ImgDRC = ( np . double ( img - ImgMin ) / ( np . double ( ImgMax - ImgMin ) ) * 255 + np . finfo ( np . float32 ) . eps )
2024-09-15 14:20:39 +08:00
img_temp = ( np . uint8 ( np . minimum ( np . maximum ( ImgDRC , 0 ) , 255 ) ) )
# img_temp = img_temp / 255.0
return img_temp
elif scaling_type == ' remove+global_outlier_0_1 ' :
img = np . double ( img - img . min ( ) * ( beta ) ) / np . double ( img . max ( ) * ( 1 - beta ) - img . min ( ) * ( beta ) ) # beta in [percentile]
img = np . double ( np . minimum ( np . maximum ( img , 0 ) , 1 ) )
elif scaling_type == ' normalization_uint16 ' :
raise ValueError ( " normalization norm image method was not imp yet. " )
elif scaling_type == ' normalization ' :
raise ValueError ( " normalization norm image method was not imp yet. " )
else :
raise ValueError ( " Unknown norm image method " )
return img
2022-07-06 23:23:27 +08:00
def create_dataloader ( path , imgsz , batch_size , stride , opt , hyp = None , augment = False , cache = False , pad = 0.0 , rect = False ,
2024-08-27 18:58:16 +08:00
rank = - 1 , world_size = 1 , workers = 8 , image_weights = False , quad = False , prefix = ' ' , rel_path_images = ' ' , num_cls = - 1 ) :
2022-07-06 23:23:27 +08:00
# Make sure only the first process in DDP process the dataset first, and the following others can use the cache
2024-10-21 20:57:07 +08:00
if augment :
2024-11-06 22:54:53 +08:00
hyp [ ' gamma_liklihood ' ] = opt . gamma_aug_prob
print ( " " , 100 * ' == ' )
print ( ' gamma_liklihood was overriden by optional value ' , opt . gamma_aug_prob )
2024-10-14 19:02:24 +08:00
2022-07-06 23:23:27 +08:00
with torch_distributed_zero_first ( rank ) :
2024-11-04 16:08:35 +08:00
scaling_before_mosaic = bool ( hyp . get ( ' scaling_before_mosaic ' , False ) )
2022-07-06 23:23:27 +08:00
dataset = LoadImagesAndLabels ( path , imgsz , batch_size ,
augment = augment , # augment images
hyp = hyp , # augmentation hyperparameters
rect = rect , # rectangular training
cache_images = cache ,
single_cls = opt . single_cls ,
stride = int ( stride ) ,
pad = pad ,
image_weights = image_weights ,
2024-08-08 19:44:50 +08:00
prefix = prefix ,
2024-08-15 20:40:01 +08:00
rel_path_images = rel_path_images ,
scaling_type = opt . norm_type ,
2024-08-27 18:58:16 +08:00
input_channels = opt . input_channels ,
2024-09-15 14:20:39 +08:00
num_cls = num_cls ,
2024-09-23 16:23:13 +08:00
tir_channel_expansion = opt . tir_channel_expansion ,
2024-11-04 16:08:35 +08:00
no_tir_signal = opt . no_tir_signal ,
scaling_before_mosaic = scaling_before_mosaic )
2022-07-06 23:23:27 +08:00
batch_size = min ( batch_size , len ( dataset ) )
nw = min ( [ os . cpu_count ( ) / / world_size , batch_size if batch_size > 1 else 0 , workers ] ) # number of workers
sampler = torch . utils . data . distributed . DistributedSampler ( dataset ) if rank != - 1 else None
loader = torch . utils . data . DataLoader if image_weights else InfiniteDataLoader
# Use torch.utils.data.DataLoader() if dataset.properties will update during training else InfiniteDataLoader()
dataloader = loader ( dataset ,
batch_size = batch_size ,
num_workers = nw ,
sampler = sampler ,
pin_memory = True ,
collate_fn = LoadImagesAndLabels . collate_fn4 if quad else LoadImagesAndLabels . collate_fn )
return dataloader , dataset
class InfiniteDataLoader ( torch . utils . data . dataloader . DataLoader ) :
""" Dataloader that reuses workers
Uses same syntax as vanilla DataLoader
"""
def __init__ ( self , * args , * * kwargs ) :
super ( ) . __init__ ( * args , * * kwargs )
object . __setattr__ ( self , ' batch_sampler ' , _RepeatSampler ( self . batch_sampler ) )
self . iterator = super ( ) . __iter__ ( )
def __len__ ( self ) :
return len ( self . batch_sampler . sampler )
def __iter__ ( self ) :
for i in range ( len ( self ) ) :
yield next ( self . iterator )
class _RepeatSampler ( object ) :
""" Sampler that repeats forever
Args :
sampler ( Sampler )
"""
def __init__ ( self , sampler ) :
self . sampler = sampler
def __iter__ ( self ) :
while True :
yield from iter ( self . sampler )
class LoadImages : # for inference
2024-09-15 14:20:39 +08:00
def __init__ ( self , path , img_size = 640 , stride = 32 ,
2024-11-04 16:08:35 +08:00
scaling_type = ' standardization ' , img_percentile_removal = 0.3 , beta = 0.3 , input_channels = 3 ,
tir_channel_expansion = False , no_tir_signal = False ) :
2024-09-15 14:20:39 +08:00
2022-07-06 23:23:27 +08:00
p = str ( Path ( path ) . absolute ( ) ) # os-agnostic absolute path
if ' * ' in p :
files = sorted ( glob . glob ( p , recursive = True ) ) # glob
elif os . path . isdir ( p ) :
files = sorted ( glob . glob ( os . path . join ( p , ' *.* ' ) ) ) # dir
elif os . path . isfile ( p ) :
files = [ p ] # files
else :
raise Exception ( f ' ERROR: { p } does not exist ' )
images = [ x for x in files if x . split ( ' . ' ) [ - 1 ] . lower ( ) in img_formats ]
videos = [ x for x in files if x . split ( ' . ' ) [ - 1 ] . lower ( ) in vid_formats ]
ni , nv = len ( images ) , len ( videos )
self . img_size = img_size
self . stride = stride
self . files = images + videos
self . nf = ni + nv # number of files
self . video_flag = [ False ] * ni + [ True ] * nv
self . mode = ' image '
if any ( videos ) :
self . new_video ( videos [ 0 ] ) # new video
else :
self . cap = None
assert self . nf > 0 , f ' No images or videos found in { p } . ' \
f ' Supported formats are: \n images: { img_formats } \n videos: { vid_formats } '
2024-09-15 14:20:39 +08:00
self . scaling_type = scaling_type
self . percentile = img_percentile_removal
self . beta = beta
self . input_channels = input_channels
2024-11-04 16:08:35 +08:00
self . tir_channel_expansion = tir_channel_expansion
self . is_tir_signal = not ( no_tir_signal )
2024-09-15 14:20:39 +08:00
2022-07-06 23:23:27 +08:00
def __iter__ ( self ) :
self . count = 0
return self
def __next__ ( self ) :
if self . count == self . nf :
raise StopIteration
path = self . files [ self . count ]
if self . video_flag [ self . count ] :
# Read video
self . mode = ' video '
ret_val , img0 = self . cap . read ( )
if not ret_val :
self . count + = 1
self . cap . release ( )
if self . count == self . nf : # last video
raise StopIteration
else :
path = self . files [ self . count ]
self . new_video ( path )
ret_val , img0 = self . cap . read ( )
self . frame + = 1
print ( f ' video { self . count + 1 } / { self . nf } ( { self . frame } / { self . nframes } ) { path } : ' , end = ' ' )
else :
# Read image
self . count + = 1
2024-09-15 14:20:39 +08:00
# img0 = cv2.imread(path) # BGR
# 16bit unsigned
if os . path . basename ( path ) . split ( ' . ' ) [ - 1 ] == ' tiff ' :
img0 = cv2 . imread ( path , - 1 )
else :
img0 = cv2 . imread ( path ) # BGR
2022-07-06 23:23:27 +08:00
assert img0 is not None , ' Image Not Found ' + path
#print(f'image {self.count}/{self.nf} {path}: ', end='')
# Padded resize
img = letterbox ( img0 , self . img_size , stride = self . stride ) [ 0 ]
2024-11-04 16:08:35 +08:00
if self . tir_channel_expansion : # HK @@ according to the paper this CE is a sort of augmentation hence no need to preliminary augment. One of the channels are inversion hence avoid channel inversion aug
img = np . repeat ( img [ np . newaxis , : , : ] , 3 , axis = 0 ) # convert GL to RGB by replication
img_ce = np . zeros_like ( img ) . astype ( ' float64 ' )
# CH1 hist equalization
img_chan = scaling_image ( img [ 0 , : , : ] , scaling_type = self . scaling_type ,
percentile = 0 , beta = self . beta )
img_ce [ 0 , : , : ] = img_chan . astype ( ' float64 ' )
img_chan = scaling_image ( img [ 1 , : , : ] , scaling_type = self . scaling_type ,
percentile = self . percentile , beta = self . beta )
img_ce [ 1 , : , : ] = img_chan . astype ( ' float64 ' )
img_chan = inversion_aug ( img_ce [ 1 , : , : ] ) # invert the DRC one
img_ce [ 2 , : , : ] = img_chan . astype ( ' float64 ' )
img = img_ce
2024-09-15 14:20:39 +08:00
2024-10-27 20:49:26 +08:00
if not self . tir_channel_expansion :
if self . is_tir_signal :
img = np . repeat ( img [ np . newaxis , : , : ] , self . input_channels , axis = 0 ) #convert GL to RGB by replication
else :
# Convert
img = img [ : , : , : : - 1 ] . transpose ( 2 , 0 , 1 ) # BGR to RGB, to 3x416x416
2024-09-15 14:20:39 +08:00
# print('\n image file', self.img_files[index])
if 0 :
import matplotlib . pyplot as plt
plt . figure ( )
plt . hist ( img . ravel ( ) , bins = 128 )
plt . savefig ( os . path . join ( ' /home/hanoch/projects/tir_od/outputs ' , os . path . basename ( path ) . split ( ' . ' ) [ 0 ] + ' pre ' ) )
img = scaling_image ( img , scaling_type = self . scaling_type ,
percentile = self . percentile , beta = self . beta )
if 0 :
import matplotlib . pyplot as plt
plt . figure ( )
plt . hist ( img . ravel ( ) , bins = 128 )
plt . savefig ( os . path . join ( ' /home/hanoch/projects/tir_od/outputs ' , os . path . basename ( path ) . split ( ' . ' ) [ 0 ] + ' post ' ) )
2022-07-06 23:23:27 +08:00
img = np . ascontiguousarray ( img )
2024-09-15 14:20:39 +08:00
2022-07-06 23:23:27 +08:00
return path , img , img0 , self . cap
def new_video ( self , path ) :
self . frame = 0
self . cap = cv2 . VideoCapture ( path )
self . nframes = int ( self . cap . get ( cv2 . CAP_PROP_FRAME_COUNT ) )
def __len__ ( self ) :
return self . nf # number of files
class LoadWebcam : # for inference
def __init__ ( self , pipe = ' 0 ' , img_size = 640 , stride = 32 ) :
self . img_size = img_size
self . stride = stride
if pipe . isnumeric ( ) :
pipe = eval ( pipe ) # local camera
# pipe = 'rtsp://192.168.1.64/1' # IP camera
# pipe = 'rtsp://username:password@192.168.1.64/1' # IP camera with login
# pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg' # IP golf camera
self . pipe = pipe
self . cap = cv2 . VideoCapture ( pipe ) # video capture object
self . cap . set ( cv2 . CAP_PROP_BUFFERSIZE , 3 ) # set buffer size
def __iter__ ( self ) :
self . count = - 1
return self
def __next__ ( self ) :
self . count + = 1
if cv2 . waitKey ( 1 ) == ord ( ' q ' ) : # q to quit
self . cap . release ( )
cv2 . destroyAllWindows ( )
raise StopIteration
# Read frame
if self . pipe == 0 : # local camera
ret_val , img0 = self . cap . read ( )
img0 = cv2 . flip ( img0 , 1 ) # flip left-right
else : # IP camera
n = 0
while True :
n + = 1
self . cap . grab ( )
if n % 30 == 0 : # skip frames
ret_val , img0 = self . cap . retrieve ( )
if ret_val :
break
# Print
assert ret_val , f ' Camera Error { self . pipe } '
img_path = ' webcam.jpg '
print ( f ' webcam { self . count } : ' , end = ' ' )
# Padded resize
img = letterbox ( img0 , self . img_size , stride = self . stride ) [ 0 ]
# Convert
img = img [ : , : , : : - 1 ] . transpose ( 2 , 0 , 1 ) # BGR to RGB, to 3x416x416
img = np . ascontiguousarray ( img )
return img_path , img , img0 , None
def __len__ ( self ) :
return 0
class LoadStreams : # multiple IP or RTSP cameras
def __init__ ( self , sources = ' streams.txt ' , img_size = 640 , stride = 32 ) :
self . mode = ' stream '
self . img_size = img_size
self . stride = stride
if os . path . isfile ( sources ) :
with open ( sources , ' r ' ) as f :
sources = [ x . strip ( ) for x in f . read ( ) . strip ( ) . splitlines ( ) if len ( x . strip ( ) ) ]
else :
sources = [ sources ]
n = len ( sources )
self . imgs = [ None ] * n
self . sources = [ clean_str ( x ) for x in sources ] # clean source names for later
for i , s in enumerate ( sources ) :
# Start the thread to read frames from the video stream
print ( f ' { i + 1 } / { n } : { s } ... ' , end = ' ' )
url = eval ( s ) if s . isnumeric ( ) else s
2022-07-22 01:07:49 +08:00
if ' youtube.com/ ' in str ( url ) or ' youtu.be/ ' in str ( url ) : # if source is YouTube video
2022-07-06 23:23:27 +08:00
check_requirements ( ( ' pafy ' , ' youtube_dl ' ) )
import pafy
url = pafy . new ( url ) . getbest ( preftype = " mp4 " ) . url
cap = cv2 . VideoCapture ( url )
assert cap . isOpened ( ) , f ' Failed to open { s } '
w = int ( cap . get ( cv2 . CAP_PROP_FRAME_WIDTH ) )
h = int ( cap . get ( cv2 . CAP_PROP_FRAME_HEIGHT ) )
self . fps = cap . get ( cv2 . CAP_PROP_FPS ) % 100
_ , self . imgs [ i ] = cap . read ( ) # guarantee first frame
thread = Thread ( target = self . update , args = ( [ i , cap ] ) , daemon = True )
print ( f ' success ( { w } x { h } at { self . fps : .2f } FPS). ' )
thread . start ( )
print ( ' ' ) # newline
# check for common shapes
s = np . stack ( [ letterbox ( x , self . img_size , stride = self . stride ) [ 0 ] . shape for x in self . imgs ] , 0 ) # shapes
self . rect = np . unique ( s , axis = 0 ) . shape [ 0 ] == 1 # rect inference if all shapes equal
if not self . rect :
print ( ' WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams. ' )
def update ( self , index , cap ) :
# Read next stream frame in a daemon thread
n = 0
while cap . isOpened ( ) :
n + = 1
# _, self.imgs[index] = cap.read()
cap . grab ( )
if n == 4 : # read every 4th frame
success , im = cap . retrieve ( )
self . imgs [ index ] = im if success else self . imgs [ index ] * 0
n = 0
time . sleep ( 1 / self . fps ) # wait time
def __iter__ ( self ) :
self . count = - 1
return self
def __next__ ( self ) :
self . count + = 1
img0 = self . imgs . copy ( )
if cv2 . waitKey ( 1 ) == ord ( ' q ' ) : # q to quit
cv2 . destroyAllWindows ( )
raise StopIteration
# Letterbox
img = [ letterbox ( x , self . img_size , auto = self . rect , stride = self . stride ) [ 0 ] for x in img0 ]
# Stack
img = np . stack ( img , 0 )
# Convert
img = img [ : , : , : , : : - 1 ] . transpose ( 0 , 3 , 1 , 2 ) # BGR to RGB, to bsx3x416x416
img = np . ascontiguousarray ( img )
return self . sources , img , img0 , None
def __len__ ( self ) :
return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
def img2label_paths ( img_paths ) :
# Define label paths as a function of image paths
sa , sb = os . sep + ' images ' + os . sep , os . sep + ' labels ' + os . sep # /images/, /labels/ substrings
return [ ' txt ' . join ( x . replace ( sa , sb , 1 ) . rsplit ( x . split ( ' . ' ) [ - 1 ] , 1 ) ) for x in img_paths ]
class LoadImagesAndLabels ( Dataset ) : # for training/testing
def __init__ ( self , path , img_size = 640 , batch_size = 16 , augment = False , hyp = None , rect = False , image_weights = False ,
2024-08-15 20:40:01 +08:00
cache_images = False , single_cls = False , stride = 32 , pad = 0.0 , prefix = ' ' , rel_path_images = ' ' ,
2024-09-15 14:20:39 +08:00
scaling_type = ' standardization ' , input_channels = 3 ,
2024-11-04 16:08:35 +08:00
num_cls = - 1 , tir_channel_expansion = False , no_tir_signal = False , scaling_before_mosaic = False ) :
2024-08-15 20:40:01 +08:00
2024-11-04 16:08:35 +08:00
self . scaling_before_mosaic = scaling_before_mosaic
2022-07-06 23:23:27 +08:00
self . img_size = img_size
self . augment = augment
self . hyp = hyp
self . image_weights = image_weights
self . rect = False if image_weights else rect
2024-08-15 20:40:01 +08:00
self . mosaic = self . augment and not self . rect # load 4 images at a time into a mosaic (only during training) @@ HK TODO: disable mosaic implicitly by prob mosaic =0
2022-07-06 23:23:27 +08:00
self . mosaic_border = [ - img_size / / 2 , - img_size / / 2 ]
self . stride = stride
2024-08-15 20:40:01 +08:00
self . path = path
self . scaling_type = scaling_type
2024-09-15 14:20:39 +08:00
self . percentile = hyp [ ' img_percentile_removal ' ]
self . beta = hyp [ ' beta ' ]
2024-08-15 20:40:01 +08:00
self . input_channels = input_channels # in case GL image but NN is RGB hence replicate
2024-09-23 16:23:13 +08:00
self . tir_channel_expansion = tir_channel_expansion
self . is_tir_signal = not ( no_tir_signal )
2024-11-17 21:10:26 +08:00
self . random_pad = True
2024-08-15 20:40:01 +08:00
2022-07-06 23:23:27 +08:00
#self.albumentations = Albumentations() if augment else None
2024-10-14 19:02:24 +08:00
self . albumentations_gamma_contrast = Albumentations_gamma_contrast ( alb_prob = hyp [ ' gamma_liklihood ' ] ,
gamma_limit = [ hyp [ ' gamma ' ] ,
100 + 100 - hyp [ ' gamma ' ] ] )
2022-07-06 23:23:27 +08:00
try :
f = [ ] # image files
for p in path if isinstance ( path , list ) else [ path ] :
p = Path ( p ) # os-agnostic
if p . is_dir ( ) : # dir
f + = glob . glob ( str ( p / ' ** ' / ' *.* ' ) , recursive = True )
# f = list(p.rglob('**/*.*')) # pathlib
elif p . is_file ( ) : # file
with open ( p , ' r ' ) as t :
t = t . read ( ) . strip ( ) . splitlines ( )
parent = str ( p . parent ) + os . sep
2024-08-08 19:44:50 +08:00
if bool ( rel_path_images ) :
2024-08-27 18:58:16 +08:00
f + = [ os . path . join ( rel_path_images , x . replace ( ' ./ ' , ' ' ) ) . rstrip ( ) if x . startswith ( ' ./ ' ) else x for x in t ] # local to global path
2024-08-08 19:44:50 +08:00
else :
2024-10-14 19:02:24 +08:00
f + = [ x . replace ( ' ./ ' , parent ) . rstrip ( ) if x . startswith ( ' ./ ' ) else x for x in t ] # local to global path
2024-08-08 19:44:50 +08:00
2022-07-06 23:23:27 +08:00
# f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
else :
raise Exception ( f ' { prefix } { p } does not exist ' )
self . img_files = sorted ( [ x . replace ( ' / ' , os . sep ) for x in f if x . split ( ' . ' ) [ - 1 ] . lower ( ) in img_formats ] )
# self.img_files = sorted([x for x in f if x.suffix[1:].lower() in img_formats]) # pathlib
assert self . img_files , f ' { prefix } No images found '
except Exception as e :
raise Exception ( f ' { prefix } Error loading data from { path } : { e } \n See { help_url } ' )
2024-08-27 18:58:16 +08:00
# Check cache HK : cache is only for labels /annotations
2022-07-06 23:23:27 +08:00
self . label_files = img2label_paths ( self . img_files ) # labels
cache_path = ( p if p . is_file ( ) else Path ( self . label_files [ 0 ] ) . parent ) . with_suffix ( ' .cache ' ) # cached labels
if cache_path . is_file ( ) :
cache , exists = torch . load ( cache_path ) , True # load
#if cache['hash'] != get_hash(self.label_files + self.img_files) or 'version' not in cache: # changed
# cache, exists = self.cache_labels(cache_path, prefix), False # re-cache
else :
2024-08-27 18:58:16 +08:00
cache , exists = self . cache_labels ( num_cls , cache_path , prefix ) , False # cache
2022-07-06 23:23:27 +08:00
# Display cache
nf , nm , ne , nc , n = cache . pop ( ' results ' ) # found, missing, empty, corrupted, total
if exists :
d = f " Scanning ' { cache_path } ' images and labels... { nf } found, { nm } missing, { ne } empty, { nc } corrupted "
tqdm ( None , desc = prefix + d , total = n , initial = n ) # display cache results
assert nf > 0 or not augment , f ' { prefix } No labels in { cache_path } . Can not train without labels. See { help_url } '
# Read cache
cache . pop ( ' hash ' ) # remove hash
cache . pop ( ' version ' ) # remove version
labels , shapes , self . segments = zip ( * cache . values ( ) )
self . labels = list ( labels )
self . shapes = np . array ( shapes , dtype = np . float64 )
self . img_files = list ( cache . keys ( ) ) # update
self . label_files = img2label_paths ( cache . keys ( ) ) # update
if single_cls :
for x in self . labels :
x [ : , 0 ] = 0
n = len ( shapes ) # number of images
2022-12-28 11:39:34 +08:00
bi = np . floor ( np . arange ( n ) / batch_size ) . astype ( int ) # batch index
2022-07-06 23:23:27 +08:00
nb = bi [ - 1 ] + 1 # number of batches
self . batch = bi # batch index of image
self . n = n
self . indices = range ( n )
# Rectangular Training
if self . rect :
# Sort by aspect ratio
s = self . shapes # wh
ar = s [ : , 1 ] / s [ : , 0 ] # aspect ratio
irect = ar . argsort ( )
self . img_files = [ self . img_files [ i ] for i in irect ]
self . label_files = [ self . label_files [ i ] for i in irect ]
self . labels = [ self . labels [ i ] for i in irect ]
self . shapes = s [ irect ] # wh
ar = ar [ irect ]
# Set training image shapes
shapes = [ [ 1 , 1 ] ] * nb
for i in range ( nb ) :
ari = ar [ bi == i ]
mini , maxi = ari . min ( ) , ari . max ( )
if maxi < 1 :
shapes [ i ] = [ maxi , 1 ]
elif mini > 1 :
shapes [ i ] = [ 1 , 1 / mini ]
2024-09-23 16:23:13 +08:00
self . batch_shapes = np . ceil ( np . array ( shapes ) * img_size / stride + pad ) . astype ( int ) * stride #pad=0.5 https://github.com/ultralytics/ultralytics/issues/13271 : @123456dad the padding of 0.5 in the BaseDataset class, which results in resizing an image from 640x640 to 672x672, is primarily for maintaining aspect ratio and providing a buffer to apply various augmentations without losing important features at the edges. This padding can affect model performance, as seen in your observation where the .pt model shows a slightly higher mAP compared to the ONNX model.
2022-07-06 23:23:27 +08:00
# Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
self . imgs = [ None ] * n
if cache_images :
if cache_images == ' disk ' :
self . im_cache_dir = Path ( Path ( self . img_files [ 0 ] ) . parent . as_posix ( ) + ' _npy ' )
self . img_npy = [ self . im_cache_dir / Path ( f ) . with_suffix ( ' .npy ' ) . name for f in self . img_files ]
self . im_cache_dir . mkdir ( parents = True , exist_ok = True )
gb = 0 # Gigabytes of cached images
self . img_hw0 , self . img_hw = [ None ] * n , [ None ] * n
results = ThreadPool ( 8 ) . imap ( lambda x : load_image ( * x ) , zip ( repeat ( self ) , range ( n ) ) )
pbar = tqdm ( enumerate ( results ) , total = n )
for i , x in pbar :
if cache_images == ' disk ' :
if not self . img_npy [ i ] . exists ( ) :
np . save ( self . img_npy [ i ] . as_posix ( ) , x [ 0 ] )
gb + = self . img_npy [ i ] . stat ( ) . st_size
else :
self . imgs [ i ] , self . img_hw0 [ i ] , self . img_hw [ i ] = x
gb + = self . imgs [ i ] . nbytes
pbar . desc = f ' { prefix } Caching images ( { gb / 1E9 : .1f } GB) '
pbar . close ( )
2024-08-27 18:58:16 +08:00
def cache_labels ( self , num_cls , path = Path ( ' ./labels.cache ' ) , prefix = ' ' ) :
2022-07-06 23:23:27 +08:00
# Cache dataset labels, check images and read shapes
x = { } # dict
nm , nf , ne , nc = 0 , 0 , 0 , 0 # number missing, found, empty, duplicate
pbar = tqdm ( zip ( self . img_files , self . label_files ) , desc = ' Scanning images ' , total = len ( self . img_files ) )
for i , ( im_file , lb_file ) in enumerate ( pbar ) :
try :
# verify images
im = Image . open ( im_file )
im . verify ( ) # PIL verify
shape = exif_size ( im ) # image size
segments = [ ] # instance segments
assert ( shape [ 0 ] > 9 ) & ( shape [ 1 ] > 9 ) , f ' image size { shape } <10 pixels '
assert im . format . lower ( ) in img_formats , f ' invalid image format { im . format } '
# verify labels
if os . path . isfile ( lb_file ) :
nf + = 1 # label found
with open ( lb_file , ' r ' ) as f :
l = [ x . split ( ) for x in f . read ( ) . strip ( ) . splitlines ( ) ]
if any ( [ len ( x ) > 8 for x in l ] ) : # is segment
classes = np . array ( [ x [ 0 ] for x in l ] , dtype = np . float32 )
segments = [ np . array ( x [ 1 : ] , dtype = np . float32 ) . reshape ( - 1 , 2 ) for x in l ] # (cls, xy1...)
l = np . concatenate ( ( classes . reshape ( - 1 , 1 ) , segments2boxes ( segments ) ) , 1 ) # (cls, xywh)
l = np . array ( l , dtype = np . float32 )
if len ( l ) :
assert l . shape [ 1 ] == 5 , ' labels require 5 columns each '
assert ( l > = 0 ) . all ( ) , ' negative labels '
assert ( l [ : , 1 : ] < = 1 ) . all ( ) , ' non-normalized or out of bounds coordinate labels '
assert np . unique ( l , axis = 0 ) . shape [ 0 ] == l . shape [ 0 ] , ' duplicate labels '
2024-08-27 18:58:16 +08:00
assert ( l [ : , 0 ] . max ( ) < num_cls ) , ' class label out of range -- invalid ' # max label can't be greater than num of labels
# print(l[:, 0])
2022-07-06 23:23:27 +08:00
else :
ne + = 1 # label empty
l = np . zeros ( ( 0 , 5 ) , dtype = np . float32 )
else :
nm + = 1 # label missing
l = np . zeros ( ( 0 , 5 ) , dtype = np . float32 )
x [ im_file ] = [ l , shape , segments ]
except Exception as e :
nc + = 1
print ( f ' { prefix } WARNING: Ignoring corrupted image and/or label { im_file } : { e } ' )
pbar . desc = f " { prefix } Scanning ' { path . parent / path . stem } ' images and labels... " \
f " { nf } found, { nm } missing, { ne } empty, { nc } corrupted "
pbar . close ( )
if nf == 0 :
print ( f ' { prefix } WARNING: No labels found in { path } . See { help_url } ' )
x [ ' hash ' ] = get_hash ( self . label_files + self . img_files )
x [ ' results ' ] = nf , nm , ne , nc , i + 1
x [ ' version ' ] = 0.1 # cache version
torch . save ( x , path ) # save for next time
logging . info ( f ' { prefix } New cache created: { path } ' )
return x
def __len__ ( self ) :
return len ( self . img_files )
# def __iter__(self):
# self.count = -1
# print('ran dataset iter')
# #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
# return self
def __getitem__ ( self , index ) :
index = self . indices [ index ] # linear, shuffled, or image_weights
2024-11-04 16:08:35 +08:00
if self . is_tir_signal :
if self . scaling_before_mosaic :
filling_value = 0.5 # on borders or after perspective fill with 0.5 in [0 1] equals to 114 in [0 255]
else :
filling_value = 0 # on borders or after perspective better to have 0 thermal profile uint16 based on the DR of the image which is unknown TODO: better find an elegent way
else :
filling_value = 114
2022-07-06 23:23:27 +08:00
hyp = self . hyp
2024-10-14 19:02:24 +08:00
mosaic = self . mosaic and random . random ( ) < hyp [ ' mosaic ' ] and not ( self . tir_channel_expansion )
2022-07-06 23:23:27 +08:00
if mosaic :
# Load mosaic
if random . random ( ) < 0.8 :
2024-11-04 16:08:35 +08:00
img , labels = load_mosaic ( self , index , filling_value = filling_value )
2022-07-06 23:23:27 +08:00
else :
2024-11-04 16:08:35 +08:00
img , labels = load_mosaic9 ( self , index , filling_value = filling_value )
2022-07-06 23:23:27 +08:00
shapes = None
# MixUp https://arxiv.org/pdf/1710.09412.pdf
if random . random ( ) < hyp [ ' mixup ' ] :
if random . random ( ) < 0.8 :
2024-11-04 16:08:35 +08:00
img2 , labels2 = load_mosaic ( self , random . randint ( 0 , len ( self . labels ) - 1 ) , filling_value = filling_value )
2022-07-06 23:23:27 +08:00
else :
2024-11-04 16:08:35 +08:00
img2 , labels2 = load_mosaic9 ( self , random . randint ( 0 , len ( self . labels ) - 1 ) , filling_value = filling_value )
2022-07-06 23:23:27 +08:00
r = np . random . beta ( 8.0 , 8.0 ) # mixup ratio, alpha=beta=8.0
2024-09-23 16:23:13 +08:00
img = ( img * r + img2 * ( 1 - r ) ) . astype ( img . dtype ) #.astype(np.uint8)
2022-07-06 23:23:27 +08:00
labels = np . concatenate ( ( labels , labels2 ) , 0 )
else :
# Load image
img , ( h0 , w0 ) , ( h , w ) = load_image ( self , index )
# Letterbox
shape = self . batch_shapes [ self . batch [ index ] ] if self . rect else self . img_size # final letterboxed shape
2024-11-06 22:54:53 +08:00
# img, ratio, pad = letterbox(img, shape, color=(img.mean(), img.mean(), img.mean()), auto=False, scaleup=self.augment)
img , ratio , pad = letterbox ( img , shape , auto = False , scaleup = self . augment )
2022-07-06 23:23:27 +08:00
shapes = ( h0 , w0 ) , ( ( h / h0 , w / w0 ) , pad ) # for COCO mAP rescaling
labels = self . labels [ index ] . copy ( )
if labels . size : # normalized xywh to pixel xyxy format
labels [ : , 1 : ] = xywhn2xyxy ( labels [ : , 1 : ] , ratio [ 0 ] * w , ratio [ 1 ] * h , padw = pad [ 0 ] , padh = pad [ 1 ] )
2024-10-14 19:02:24 +08:00
if self . tir_channel_expansion : # HK @@ according to the paper this CE is a sort of augmentation hence no need to preliminary augment. One of the channels are inversion hence avoid channel inversion aug
img = np . repeat ( img [ np . newaxis , : , : ] , 3 , axis = 0 ) # convert GL to RGB by replication
img_ce = np . zeros_like ( img ) . astype ( ' float64 ' )
# CH1 hist equalization
img_chan = scaling_image ( img [ 0 , : , : ] , scaling_type = self . scaling_type ,
percentile = 0 , beta = self . beta )
img_ce [ 0 , : , : ] = img_chan . astype ( ' float64 ' )
img_chan = scaling_image ( img [ 1 , : , : ] , scaling_type = self . scaling_type ,
percentile = self . percentile , beta = self . beta )
img_ce [ 1 , : , : ] = img_chan . astype ( ' float64 ' )
img_chan = inversion_aug ( img_ce [ 1 , : , : ] ) # invert the DRC one
img_ce [ 2 , : , : ] = img_chan . astype ( ' float64 ' )
img = img_ce
2022-07-06 23:23:27 +08:00
if self . augment :
# Augment imagespace
if not mosaic :
2024-08-15 20:40:01 +08:00
if hyp [ ' random_perspective ' ] :
2024-08-07 23:03:48 +08:00
img , labels = random_perspective ( img , labels ,
degrees = hyp [ ' degrees ' ] ,
translate = hyp [ ' translate ' ] ,
scale = hyp [ ' scale ' ] ,
shear = hyp [ ' shear ' ] ,
2024-11-04 16:08:35 +08:00
perspective = hyp [ ' perspective ' ] ,
2024-11-06 22:54:53 +08:00
filling_value = filling_value ,
is_fill_by_mean_img = self . is_tir_signal )
2024-11-04 16:08:35 +08:00
2024-08-15 20:40:01 +08:00
if random . random ( ) < hyp [ ' inversion ' ] :
img = inversion_aug ( img )
2024-11-04 16:08:35 +08:00
# GL gain/attenuation
# Squeeze pdf (x-mu)*scl+mu
2022-07-06 23:23:27 +08:00
#img, labels = self.albumentations(img, labels)
2024-10-14 19:02:24 +08:00
img = self . albumentations_gamma_contrast ( img )
2024-10-21 20:57:07 +08:00
# if np.isnan(img).any():
# print('img is nan gamma')
2024-11-04 16:08:35 +08:00
if hyp [ ' hsv_h ' ] > 0 or hyp [ ' hsv_s ' ] > 0 or hyp [ ' hsv_v ' ] > 0 :
2022-07-06 23:23:27 +08:00
# Augment colorspace
2024-08-15 20:40:01 +08:00
augment_hsv ( img , hgain = hyp [ ' hsv_h ' ] , sgain = hyp [ ' hsv_s ' ] , vgain = hyp [ ' hsv_v ' ] )
2022-07-06 23:23:27 +08:00
# Apply cutouts
# if random.random() < 0.9:
# labels = cutout(img, labels)
if random . random ( ) < hyp [ ' paste_in ' ] :
sample_labels , sample_images , sample_masks = [ ] , [ ] , [ ]
while len ( sample_labels ) < 30 :
sample_labels_ , sample_images_ , sample_masks_ = load_samples ( self , random . randint ( 0 , len ( self . labels ) - 1 ) )
sample_labels + = sample_labels_
sample_images + = sample_images_
sample_masks + = sample_masks_
#print(len(sample_labels))
if len ( sample_labels ) == 0 :
break
labels = pastein ( img , labels , sample_labels , sample_images , sample_masks )
nL = len ( labels ) # number of labels
if nL :
labels [ : , 1 : 5 ] = xyxy2xywh ( labels [ : , 1 : 5 ] ) # convert xyxy to xywh
labels [ : , [ 2 , 4 ] ] / = img . shape [ 0 ] # normalized height 0-1
labels [ : , [ 1 , 3 ] ] / = img . shape [ 1 ] # normalized width 0-1
if self . augment :
# flip up-down
if random . random ( ) < hyp [ ' flipud ' ] :
img = np . flipud ( img )
if nL :
labels [ : , 2 ] = 1 - labels [ : , 2 ]
# flip left-right
if random . random ( ) < hyp [ ' fliplr ' ] :
img = np . fliplr ( img )
if nL :
labels [ : , 1 ] = 1 - labels [ : , 1 ]
labels_out = torch . zeros ( ( nL , 6 ) )
if nL :
labels_out [ : , 1 : ] = torch . from_numpy ( labels )
2024-09-23 16:23:13 +08:00
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od', 'img_ce.tiff'), 255*img.transpose(1,2,0).astype('uint8'))
2024-10-14 19:02:24 +08:00
if not self . tir_channel_expansion :
2024-09-23 16:23:13 +08:00
if self . is_tir_signal :
img = np . repeat ( img [ np . newaxis , : , : ] , self . input_channels , axis = 0 ) #convert GL to RGB by replication
else :
2024-09-15 14:20:39 +08:00
# Convert
img = img [ : , : , : : - 1 ] . transpose ( 2 , 0 , 1 ) # BGR to RGB, to 3x416x416
if 0 :
import matplotlib . pyplot as plt
plt . figure ( )
plt . hist ( img . ravel ( ) , bins = 128 )
plt . savefig ( os . path . join ( ' /home/hanoch/projects/tir_od/outputs ' , os . path . basename ( self . img_files [ index ] ) . split ( ' . ' ) [ 0 ] + ' pre_ ' + str ( self . scaling_type ) ) )
2024-11-04 16:08:35 +08:00
# import tifffile
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od', 'img_before_last_scaling.tiff'), img.transpose(1,2,0))
2024-09-15 14:20:39 +08:00
img = scaling_image ( img , scaling_type = self . scaling_type ,
percentile = self . percentile , beta = self . beta )
if 0 :
import matplotlib . pyplot as plt
plt . figure ( )
plt . hist ( img . ravel ( ) , bins = 128 )
plt . savefig ( os . path . join ( ' /home/hanoch/projects/tir_od/outputs ' , os . path . basename ( self . img_files [ index ] ) . split ( ' . ' ) [ 0 ] + ' post_ ' + str ( self . scaling_type ) ) )
2024-10-27 20:49:26 +08:00
# aa1 = np.repeat(img[1,:,:,:].cpu().permute(1,2,0).numpy(), 3, axis=2).astype('float32')
# cv2.imwrite('test/exp40/test_batch88_labels__1.jpg', aa1*255)
# aa1 = np.repeat(img.transpose(1,2,0), 3, axis=2).astype('float32')
2024-08-27 18:58:16 +08:00
# print('\n 1st', img.shape)
2024-11-04 16:08:35 +08:00
# if np.isnan(img).any():
# print('img {} index : {} is nan fin'.format(self.img_files[index], index))
# raise
2024-11-06 22:54:53 +08:00
# import tifffile
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od', 'img_before_last_scaling' + str(index) + '.tiff'),
# img.transpose(1, 2, 0))
2024-08-27 18:58:16 +08:00
img = np . ascontiguousarray ( img )
# print('\n 2nd', img.shape)
2022-07-06 23:23:27 +08:00
return torch . from_numpy ( img ) , labels_out , self . img_files [ index ] , shapes
@staticmethod
def collate_fn ( batch ) :
img , label , path , shapes = zip ( * batch ) # transposed
for i , l in enumerate ( label ) :
l [ : , 0 ] = i # add target image index for build_targets()
return torch . stack ( img , 0 ) , torch . cat ( label , 0 ) , path , shapes
@staticmethod
def collate_fn4 ( batch ) :
img , label , path , shapes = zip ( * batch ) # transposed
n = len ( shapes ) / / 4
img4 , label4 , path4 , shapes4 = [ ] , [ ] , path [ : n ] , shapes [ : n ]
ho = torch . tensor ( [ [ 0. , 0 , 0 , 1 , 0 , 0 ] ] )
wo = torch . tensor ( [ [ 0. , 0 , 1 , 0 , 0 , 0 ] ] )
s = torch . tensor ( [ [ 1 , 1 , .5 , .5 , .5 , .5 ] ] ) # scale
for i in range ( n ) : # zidane torch.zeros(16,3,720,1280) # BCHW
i * = 4
if random . random ( ) < 0.5 :
im = F . interpolate ( img [ i ] . unsqueeze ( 0 ) . float ( ) , scale_factor = 2. , mode = ' bilinear ' , align_corners = False ) [
0 ] . type ( img [ i ] . type ( ) )
l = label [ i ]
else :
im = torch . cat ( ( torch . cat ( ( img [ i ] , img [ i + 1 ] ) , 1 ) , torch . cat ( ( img [ i + 2 ] , img [ i + 3 ] ) , 1 ) ) , 2 )
l = torch . cat ( ( label [ i ] , label [ i + 1 ] + ho , label [ i + 2 ] + wo , label [ i + 3 ] + ho + wo ) , 0 ) * s
img4 . append ( im )
label4 . append ( l )
for i , l in enumerate ( label4 ) :
l [ : , 0 ] = i # add target image index for build_targets()
return torch . stack ( img4 , 0 ) , torch . cat ( label4 , 0 ) , path4 , shapes4
# Ancillary functions --------------------------------------------------------------------------------------------------
def load_image ( self , index ) :
# loads 1 image from dataset, returns img, original hw, resized hw
img = self . imgs [ index ]
if img is None : # not cached
path = self . img_files [ index ]
2024-08-15 20:40:01 +08:00
#16bit unsigned
if os . path . basename ( path ) . split ( ' . ' ) [ - 1 ] == ' tiff ' :
img = cv2 . imread ( path , - 1 )
2024-09-23 16:23:13 +08:00
img = img [ : , : , np . newaxis ] # (640,640, 1)
2024-08-15 20:40:01 +08:00
else :
img = cv2 . imread ( path ) # BGR
2022-07-06 23:23:27 +08:00
assert img is not None , ' Image Not Found ' + path
h0 , w0 = img . shape [ : 2 ] # orig hw
r = self . img_size / max ( h0 , w0 ) # resize image to img_size
if r != 1 : # always resize down, only resize up if training with augmentation
interp = cv2 . INTER_AREA if r < 1 and not self . augment else cv2 . INTER_LINEAR
img = cv2 . resize ( img , ( int ( w0 * r ) , int ( h0 * r ) ) , interpolation = interp )
return img , ( h0 , w0 ) , img . shape [ : 2 ] # img, hw_original, hw_resized
else :
return self . imgs [ index ] , self . img_hw0 [ index ] , self . img_hw [ index ] # img, hw_original, hw_resized
def augment_hsv ( img , hgain = 0.5 , sgain = 0.5 , vgain = 0.5 ) :
r = np . random . uniform ( - 1 , 1 , 3 ) * [ hgain , sgain , vgain ] + 1 # random gains
hue , sat , val = cv2 . split ( cv2 . cvtColor ( img , cv2 . COLOR_BGR2HSV ) )
dtype = img . dtype # uint8
x = np . arange ( 0 , 256 , dtype = np . int16 )
lut_hue = ( ( x * r [ 0 ] ) % 180 ) . astype ( dtype )
lut_sat = np . clip ( x * r [ 1 ] , 0 , 255 ) . astype ( dtype )
lut_val = np . clip ( x * r [ 2 ] , 0 , 255 ) . astype ( dtype )
img_hsv = cv2 . merge ( ( cv2 . LUT ( hue , lut_hue ) , cv2 . LUT ( sat , lut_sat ) , cv2 . LUT ( val , lut_val ) ) ) . astype ( dtype )
cv2 . cvtColor ( img_hsv , cv2 . COLOR_HSV2BGR , dst = img ) # no return needed
2024-08-15 20:40:01 +08:00
def inversion_aug ( img ) :
if img . dtype == np . uint16 or img . dtype == np . int8 :
img = np . iinfo ( img . dtype ) . max - img
return img
2024-09-23 16:23:13 +08:00
elif img . dtype == np . float32 or img . dtype == np . float64 :
img = 1.0 - img
return img
2024-08-15 20:40:01 +08:00
else :
raise ValueError ( " image type is not supported (int8, UINT16) {} " . format ( img . dtype ) )
2022-07-06 23:23:27 +08:00
def hist_equalize ( img , clahe = True , bgr = False ) :
# Equalize histogram on BGR image 'img' with img.shape(n,m,3) and range 0-255
yuv = cv2 . cvtColor ( img , cv2 . COLOR_BGR2YUV if bgr else cv2 . COLOR_RGB2YUV )
if clahe :
c = cv2 . createCLAHE ( clipLimit = 2.0 , tileGridSize = ( 8 , 8 ) )
yuv [ : , : , 0 ] = c . apply ( yuv [ : , : , 0 ] )
else :
yuv [ : , : , 0 ] = cv2 . equalizeHist ( yuv [ : , : , 0 ] ) # equalize Y channel histogram
return cv2 . cvtColor ( yuv , cv2 . COLOR_YUV2BGR if bgr else cv2 . COLOR_YUV2RGB ) # convert YUV image to RGB
2024-11-04 16:08:35 +08:00
def load_mosaic ( self , index , filling_value ) :
2022-07-06 23:23:27 +08:00
# loads images in a 4-mosaic
labels4 , segments4 = [ ] , [ ]
s = self . img_size
yc , xc = [ int ( random . uniform ( - x , 2 * s + x ) ) for x in self . mosaic_border ] # mosaic center x, y
indices = [ index ] + random . choices ( self . indices , k = 3 ) # 3 additional image indices
for i , index in enumerate ( indices ) :
# Load image
img , _ , ( h , w ) = load_image ( self , index )
2024-11-04 16:08:35 +08:00
if self . scaling_before_mosaic :
img = scaling_image ( img , scaling_type = self . scaling_type ,
percentile = self . percentile , beta = self . beta )
# place img in img4
2022-07-06 23:23:27 +08:00
if i == 0 : # top left
2024-09-23 16:23:13 +08:00
if self . is_tir_signal :
2024-11-04 16:08:35 +08:00
if self . scaling_before_mosaic :
img4 = np . full ( ( s * 2 , s * 2 , img . shape [ 2 ] ) , 0.5 , dtype = np . float32 ) # base image with 4 tiles
else :
2024-11-17 21:10:26 +08:00
if self . random_pad :
img4 = np . random . normal ( img . mean ( ) , 500 , ( s * 2 , s * 2 , img . shape [ 2 ] ) ) . astype ( ' uint16 ' )
else :
img4 = np . full ( ( s * 2 , s * 2 , img . shape [ 2 ] ) , img . mean ( ) , dtype = np . uint16 ) # base image with 4 tiles
2024-09-23 16:23:13 +08:00
else :
img4 = np . full ( ( s * 2 , s * 2 , img . shape [ 2 ] ) , 114 , dtype = np . uint8 ) # base image with 4 tiles
2022-07-06 23:23:27 +08:00
x1a , y1a , x2a , y2a = max ( xc - w , 0 ) , max ( yc - h , 0 ) , xc , yc # xmin, ymin, xmax, ymax (large image)
x1b , y1b , x2b , y2b = w - ( x2a - x1a ) , h - ( y2a - y1a ) , w , h # xmin, ymin, xmax, ymax (small image)
elif i == 1 : # top right
x1a , y1a , x2a , y2a = xc , max ( yc - h , 0 ) , min ( xc + w , s * 2 ) , yc
x1b , y1b , x2b , y2b = 0 , h - ( y2a - y1a ) , min ( w , x2a - x1a ) , h
elif i == 2 : # bottom left
x1a , y1a , x2a , y2a = max ( xc - w , 0 ) , yc , xc , min ( s * 2 , yc + h )
x1b , y1b , x2b , y2b = w - ( x2a - x1a ) , 0 , w , min ( y2a - y1a , h )
elif i == 3 : # bottom right
x1a , y1a , x2a , y2a = xc , yc , min ( xc + w , s * 2 ) , min ( s * 2 , yc + h )
x1b , y1b , x2b , y2b = 0 , 0 , min ( w , x2a - x1a ) , min ( y2a - y1a , h )
img4 [ y1a : y2a , x1a : x2a ] = img [ y1b : y2b , x1b : x2b ] # img4[ymin:ymax, xmin:xmax]
padw = x1a - x1b
padh = y1a - y1b
# Labels
labels , segments = self . labels [ index ] . copy ( ) , self . segments [ index ] . copy ( )
if labels . size :
labels [ : , 1 : ] = xywhn2xyxy ( labels [ : , 1 : ] , w , h , padw , padh ) # normalized xywh to pixel xyxy format
segments = [ xyn2xy ( x , w , h , padw , padh ) for x in segments ]
labels4 . append ( labels )
segments4 . extend ( segments )
# Concat/clip labels
labels4 = np . concatenate ( labels4 , 0 )
for x in ( labels4 [ : , 1 : ] , * segments4 ) :
np . clip ( x , 0 , 2 * s , out = x ) # clip when using random_perspective()
# img4, labels4 = replicate(img4, labels4) # replicate
# Augment
#img4, labels4, segments4 = remove_background(img4, labels4, segments4)
#sample_segments(img4, labels4, segments4, probability=self.hyp['copy_paste'])
img4 , labels4 , segments4 = copy_paste ( img4 , labels4 , segments4 , probability = self . hyp [ ' copy_paste ' ] )
img4 , labels4 = random_perspective ( img4 , labels4 , segments4 ,
degrees = self . hyp [ ' degrees ' ] ,
translate = self . hyp [ ' translate ' ] ,
scale = self . hyp [ ' scale ' ] ,
shear = self . hyp [ ' shear ' ] ,
perspective = self . hyp [ ' perspective ' ] ,
2024-11-04 16:08:35 +08:00
border = self . mosaic_border ,
2024-11-06 22:54:53 +08:00
filling_value = filling_value ,
is_fill_by_mean_img = self . is_tir_signal ) # border to remove
2022-07-06 23:23:27 +08:00
return img4 , labels4
2024-11-04 16:08:35 +08:00
def load_mosaic9 ( self , index , filling_value ) :
2022-07-06 23:23:27 +08:00
# loads images in a 9-mosaic
labels9 , segments9 = [ ] , [ ]
s = self . img_size
indices = [ index ] + random . choices ( self . indices , k = 8 ) # 8 additional image indices
for i , index in enumerate ( indices ) :
# Load image
img , _ , ( h , w ) = load_image ( self , index )
2024-11-04 16:08:35 +08:00
if self . scaling_before_mosaic :
img = scaling_image ( img , scaling_type = self . scaling_type ,
percentile = self . percentile , beta = self . beta )
2022-07-06 23:23:27 +08:00
# place img in img9
if i == 0 : # center
2024-09-23 16:23:13 +08:00
if self . is_tir_signal :
2024-11-04 16:08:35 +08:00
if self . scaling_before_mosaic :
img9 = np . full ( ( s * 3 , s * 3 , img . shape [ 2 ] ) , 0.5 , dtype = np . float32 ) # base image with 4 tiles
else :
2024-11-17 21:10:26 +08:00
if self . random_pad :
img9 = np . random . normal ( img . mean ( ) , 500 , ( s * 3 , s * 3 , img . shape [ 2 ] ) ) . astype ( ' uint16 ' )
else :
img9 = np . full ( ( s * 3 , s * 3 , img . shape [ 2 ] ) , img . mean ( ) , dtype = np . uint16 ) # base image with 4 tiles pad with : uint16 based on the DR of the image which is unknown
2024-09-23 16:23:13 +08:00
else :
img9 = np . full ( ( s * 3 , s * 3 , img . shape [ 2 ] ) , 114 , dtype = np . uint8 ) # base image with 4 tiles
2022-07-06 23:23:27 +08:00
h0 , w0 = h , w
c = s , s , s + w , s + h # xmin, ymin, xmax, ymax (base) coordinates
elif i == 1 : # top
c = s , s - h , s + w , s
elif i == 2 : # top right
c = s + wp , s - h , s + wp + w , s
elif i == 3 : # right
c = s + w0 , s , s + w0 + w , s + h
elif i == 4 : # bottom right
c = s + w0 , s + hp , s + w0 + w , s + hp + h
elif i == 5 : # bottom
c = s + w0 - w , s + h0 , s + w0 , s + h0 + h
elif i == 6 : # bottom left
c = s + w0 - wp - w , s + h0 , s + w0 - wp , s + h0 + h
elif i == 7 : # left
c = s - w , s + h0 - h , s , s + h0
elif i == 8 : # top left
c = s - w , s + h0 - hp - h , s , s + h0 - hp
padx , pady = c [ : 2 ]
x1 , y1 , x2 , y2 = [ max ( x , 0 ) for x in c ] # allocate coords
# Labels
labels , segments = self . labels [ index ] . copy ( ) , self . segments [ index ] . copy ( )
if labels . size :
labels [ : , 1 : ] = xywhn2xyxy ( labels [ : , 1 : ] , w , h , padx , pady ) # normalized xywh to pixel xyxy format
segments = [ xyn2xy ( x , w , h , padx , pady ) for x in segments ]
labels9 . append ( labels )
segments9 . extend ( segments )
# Image
img9 [ y1 : y2 , x1 : x2 ] = img [ y1 - pady : , x1 - padx : ] # img9[ymin:ymax, xmin:xmax]
hp , wp = h , w # height, width previous
# Offset
yc , xc = [ int ( random . uniform ( 0 , s ) ) for _ in self . mosaic_border ] # mosaic center x, y
img9 = img9 [ yc : yc + 2 * s , xc : xc + 2 * s ]
# Concat/clip labels
labels9 = np . concatenate ( labels9 , 0 )
labels9 [ : , [ 1 , 3 ] ] - = xc
labels9 [ : , [ 2 , 4 ] ] - = yc
c = np . array ( [ xc , yc ] ) # centers
segments9 = [ x - c for x in segments9 ]
for x in ( labels9 [ : , 1 : ] , * segments9 ) :
np . clip ( x , 0 , 2 * s , out = x ) # clip when using random_perspective()
# img9, labels9 = replicate(img9, labels9) # replicate
# Augment
#img9, labels9, segments9 = remove_background(img9, labels9, segments9)
img9 , labels9 , segments9 = copy_paste ( img9 , labels9 , segments9 , probability = self . hyp [ ' copy_paste ' ] )
2024-11-04 16:08:35 +08:00
# Perspective transformation can create holes in thermal better fill w/o reflection
2022-07-06 23:23:27 +08:00
img9 , labels9 = random_perspective ( img9 , labels9 , segments9 ,
degrees = self . hyp [ ' degrees ' ] ,
translate = self . hyp [ ' translate ' ] ,
scale = self . hyp [ ' scale ' ] ,
shear = self . hyp [ ' shear ' ] ,
perspective = self . hyp [ ' perspective ' ] ,
2024-11-04 16:08:35 +08:00
border = self . mosaic_border ,
2024-11-06 22:54:53 +08:00
filling_value = filling_value ,
is_fill_by_mean_img = self . is_tir_signal ) # border to remove
2022-07-06 23:23:27 +08:00
return img9 , labels9
def load_samples ( self , index ) :
# loads images in a 4-mosaic
labels4 , segments4 = [ ] , [ ]
s = self . img_size
yc , xc = [ int ( random . uniform ( - x , 2 * s + x ) ) for x in self . mosaic_border ] # mosaic center x, y
indices = [ index ] + random . choices ( self . indices , k = 3 ) # 3 additional image indices
for i , index in enumerate ( indices ) :
# Load image
img , _ , ( h , w ) = load_image ( self , index )
# place img in img4
if i == 0 : # top left
2024-11-04 16:08:35 +08:00
if self . is_tir_signal :
if self . scaling_before_mosaic :
img4 = np . full ( ( s * 2 , s * 2 , img . shape [ 2 ] ) , 0.5 , dtype = np . float32 ) # base image with 4 tiles fill with 0.5 in [0 1] equals to 114 in [0 255]
else :
2024-11-17 21:10:26 +08:00
if self . random_pad :
img4 = np . random . normal ( img . mean ( ) , 500 , ( s * 2 , s * 2 , img . shape [ 2 ] ) ) . astype ( ' uint16 ' )
else :
img4 = np . full ( ( s * 2 , s * 2 , img . shape [ 2 ] ) , img . mean ( ) , dtype = np . uint16 ) # base image with 4 tiles
2024-11-04 16:08:35 +08:00
else :
img4 = np . full ( ( s * 2 , s * 2 , img . shape [ 2 ] ) , 114 , dtype = np . uint8 ) # base image with 4 tiles # base image with 4 tiles
2022-07-06 23:23:27 +08:00
x1a , y1a , x2a , y2a = max ( xc - w , 0 ) , max ( yc - h , 0 ) , xc , yc # xmin, ymin, xmax, ymax (large image)
x1b , y1b , x2b , y2b = w - ( x2a - x1a ) , h - ( y2a - y1a ) , w , h # xmin, ymin, xmax, ymax (small image)
elif i == 1 : # top right
x1a , y1a , x2a , y2a = xc , max ( yc - h , 0 ) , min ( xc + w , s * 2 ) , yc
x1b , y1b , x2b , y2b = 0 , h - ( y2a - y1a ) , min ( w , x2a - x1a ) , h
elif i == 2 : # bottom left
x1a , y1a , x2a , y2a = max ( xc - w , 0 ) , yc , xc , min ( s * 2 , yc + h )
x1b , y1b , x2b , y2b = w - ( x2a - x1a ) , 0 , w , min ( y2a - y1a , h )
elif i == 3 : # bottom right
x1a , y1a , x2a , y2a = xc , yc , min ( xc + w , s * 2 ) , min ( s * 2 , yc + h )
x1b , y1b , x2b , y2b = 0 , 0 , min ( w , x2a - x1a ) , min ( y2a - y1a , h )
img4 [ y1a : y2a , x1a : x2a ] = img [ y1b : y2b , x1b : x2b ] # img4[ymin:ymax, xmin:xmax]
padw = x1a - x1b
padh = y1a - y1b
# Labels
labels , segments = self . labels [ index ] . copy ( ) , self . segments [ index ] . copy ( )
if labels . size :
labels [ : , 1 : ] = xywhn2xyxy ( labels [ : , 1 : ] , w , h , padw , padh ) # normalized xywh to pixel xyxy format
segments = [ xyn2xy ( x , w , h , padw , padh ) for x in segments ]
labels4 . append ( labels )
segments4 . extend ( segments )
# Concat/clip labels
labels4 = np . concatenate ( labels4 , 0 )
for x in ( labels4 [ : , 1 : ] , * segments4 ) :
np . clip ( x , 0 , 2 * s , out = x ) # clip when using random_perspective()
# img4, labels4 = replicate(img4, labels4) # replicate
# Augment
#img4, labels4, segments4 = remove_background(img4, labels4, segments4)
sample_labels , sample_images , sample_masks = sample_segments ( img4 , labels4 , segments4 , probability = 0.5 )
return sample_labels , sample_images , sample_masks
def copy_paste ( img , labels , segments , probability = 0.5 ) :
# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
n = len ( segments )
if probability and n :
h , w , c = img . shape # height, width, channels
im_new = np . zeros ( img . shape , np . uint8 )
for j in random . sample ( range ( n ) , k = round ( probability * n ) ) :
l , s = labels [ j ] , segments [ j ]
box = w - l [ 3 ] , l [ 2 ] , w - l [ 1 ] , l [ 4 ]
ioa = bbox_ioa ( box , labels [ : , 1 : 5 ] ) # intersection over area
if ( ioa < 0.30 ) . all ( ) : # allow 30% obscuration of existing labels
labels = np . concatenate ( ( labels , [ [ l [ 0 ] , * box ] ] ) , 0 )
segments . append ( np . concatenate ( ( w - s [ : , 0 : 1 ] , s [ : , 1 : 2 ] ) , 1 ) )
cv2 . drawContours ( im_new , [ segments [ j ] . astype ( np . int32 ) ] , - 1 , ( 255 , 255 , 255 ) , cv2 . FILLED )
result = cv2 . bitwise_and ( src1 = img , src2 = im_new )
result = cv2 . flip ( result , 1 ) # augment segments (flip left-right)
i = result > 0 # pixels to replace
# i[:, :] = result.max(2).reshape(h, w, 1) # act over ch
img [ i ] = result [ i ] # cv2.imwrite('debug.jpg', img) # debug
return img , labels , segments
def remove_background ( img , labels , segments ) :
# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
n = len ( segments )
h , w , c = img . shape # height, width, channels
im_new = np . zeros ( img . shape , np . uint8 )
img_new = np . ones ( img . shape , np . uint8 ) * 114
2024-11-04 16:08:35 +08:00
raise ValueError ( ' uint8 cast dosnot comply with TIR uint 16 ' )
2022-07-06 23:23:27 +08:00
for j in range ( n ) :
cv2 . drawContours ( im_new , [ segments [ j ] . astype ( np . int32 ) ] , - 1 , ( 255 , 255 , 255 ) , cv2 . FILLED )
result = cv2 . bitwise_and ( src1 = img , src2 = im_new )
i = result > 0 # pixels to replace
img_new [ i ] = result [ i ] # cv2.imwrite('debug.jpg', img) # debug
return img_new , labels , segments
def sample_segments ( img , labels , segments , probability = 0.5 ) :
# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
n = len ( segments )
sample_labels = [ ]
sample_images = [ ]
sample_masks = [ ]
if probability and n :
h , w , c = img . shape # height, width, channels
for j in random . sample ( range ( n ) , k = round ( probability * n ) ) :
l , s = labels [ j ] , segments [ j ]
box = l [ 1 ] . astype ( int ) . clip ( 0 , w - 1 ) , l [ 2 ] . astype ( int ) . clip ( 0 , h - 1 ) , l [ 3 ] . astype ( int ) . clip ( 0 , w - 1 ) , l [ 4 ] . astype ( int ) . clip ( 0 , h - 1 )
#print(box)
if ( box [ 2 ] < = box [ 0 ] ) or ( box [ 3 ] < = box [ 1 ] ) :
continue
sample_labels . append ( l [ 0 ] )
mask = np . zeros ( img . shape , np . uint8 )
cv2 . drawContours ( mask , [ segments [ j ] . astype ( np . int32 ) ] , - 1 , ( 255 , 255 , 255 ) , cv2 . FILLED )
sample_masks . append ( mask [ box [ 1 ] : box [ 3 ] , box [ 0 ] : box [ 2 ] , : ] )
result = cv2 . bitwise_and ( src1 = img , src2 = mask )
i = result > 0 # pixels to replace
mask [ i ] = result [ i ] # cv2.imwrite('debug.jpg', img) # debug
#print(box)
sample_images . append ( mask [ box [ 1 ] : box [ 3 ] , box [ 0 ] : box [ 2 ] , : ] )
return sample_labels , sample_images , sample_masks
def replicate ( img , labels ) :
# Replicate labels
h , w = img . shape [ : 2 ]
boxes = labels [ : , 1 : ] . astype ( int )
x1 , y1 , x2 , y2 = boxes . T
s = ( ( x2 - x1 ) + ( y2 - y1 ) ) / 2 # side length (pixels)
for i in s . argsort ( ) [ : round ( s . size * 0.5 ) ] : # smallest indices
x1b , y1b , x2b , y2b = boxes [ i ]
bh , bw = y2b - y1b , x2b - x1b
yc , xc = int ( random . uniform ( 0 , h - bh ) ) , int ( random . uniform ( 0 , w - bw ) ) # offset x, y
x1a , y1a , x2a , y2a = [ xc , yc , xc + bw , yc + bh ]
img [ y1a : y2a , x1a : x2a ] = img [ y1b : y2b , x1b : x2b ] # img4[ymin:ymax, xmin:xmax]
labels = np . append ( labels , [ [ labels [ i , 0 ] , x1a , y1a , x2a , y2a ] ] , axis = 0 )
return img , labels
def letterbox ( img , new_shape = ( 640 , 640 ) , color = ( 114 , 114 , 114 ) , auto = True , scaleFill = False , scaleup = True , stride = 32 ) :
# Resize and pad image while meeting stride-multiple constraints
shape = img . shape [ : 2 ] # current shape [height, width]
if isinstance ( new_shape , int ) :
new_shape = ( new_shape , new_shape )
# Scale ratio (new / old)
r = min ( new_shape [ 0 ] / shape [ 0 ] , new_shape [ 1 ] / shape [ 1 ] )
if not scaleup : # only scale down, do not scale up (for better test mAP)
r = min ( r , 1.0 )
# Compute padding
ratio = r , r # width, height ratios
new_unpad = int ( round ( shape [ 1 ] * r ) ) , int ( round ( shape [ 0 ] * r ) )
dw , dh = new_shape [ 1 ] - new_unpad [ 0 ] , new_shape [ 0 ] - new_unpad [ 1 ] # wh padding
if auto : # minimum rectangle
dw , dh = np . mod ( dw , stride ) , np . mod ( dh , stride ) # wh padding
elif scaleFill : # stretch
dw , dh = 0.0 , 0.0
new_unpad = ( new_shape [ 1 ] , new_shape [ 0 ] )
ratio = new_shape [ 1 ] / shape [ 1 ] , new_shape [ 0 ] / shape [ 0 ] # width, height ratios
dw / = 2 # divide padding into 2 sides
dh / = 2
if shape [ : : - 1 ] != new_unpad : # resize
img = cv2 . resize ( img , new_unpad , interpolation = cv2 . INTER_LINEAR )
top , bottom = int ( round ( dh - 0.1 ) ) , int ( round ( dh + 0.1 ) )
left , right = int ( round ( dw - 0.1 ) ) , int ( round ( dw + 0.1 ) )
img = cv2 . copyMakeBorder ( img , top , bottom , left , right , cv2 . BORDER_CONSTANT , value = color ) # add border
return img , ratio , ( dw , dh )
def random_perspective ( img , targets = ( ) , segments = ( ) , degrees = 10 , translate = .1 , scale = .1 , shear = 10 , perspective = 0.0 ,
2024-11-06 22:54:53 +08:00
border = ( 0 , 0 ) , filling_value = 114 , is_fill_by_mean_img = False ) :
2022-07-06 23:23:27 +08:00
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# targets = [cls, xyxy]
height = img . shape [ 0 ] + border [ 0 ] * 2 # shape(h,w,c)
width = img . shape [ 1 ] + border [ 1 ] * 2
# Center
C = np . eye ( 3 )
C [ 0 , 2 ] = - img . shape [ 1 ] / 2 # x translation (pixels)
C [ 1 , 2 ] = - img . shape [ 0 ] / 2 # y translation (pixels)
# Perspective
P = np . eye ( 3 )
P [ 2 , 0 ] = random . uniform ( - perspective , perspective ) # x perspective (about y)
P [ 2 , 1 ] = random . uniform ( - perspective , perspective ) # y perspective (about x)
# Rotation and Scale
R = np . eye ( 3 )
a = random . uniform ( - degrees , degrees )
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
2024-11-17 21:10:26 +08:00
s = random . uniform ( 1 - scale , 1.1 + scale ) #@@HK TODO why not symetric
2022-07-06 23:23:27 +08:00
# s = 2 ** random.uniform(-scale, scale)
R [ : 2 ] = cv2 . getRotationMatrix2D ( angle = a , center = ( 0 , 0 ) , scale = s )
# Shear
S = np . eye ( 3 )
S [ 0 , 1 ] = math . tan ( random . uniform ( - shear , shear ) * math . pi / 180 ) # x shear (deg)
S [ 1 , 0 ] = math . tan ( random . uniform ( - shear , shear ) * math . pi / 180 ) # y shear (deg)
# Translation
T = np . eye ( 3 )
T [ 0 , 2 ] = random . uniform ( 0.5 - translate , 0.5 + translate ) * width # x translation (pixels)
T [ 1 , 2 ] = random . uniform ( 0.5 - translate , 0.5 + translate ) * height # y translation (pixels)
# Combined rotation matrix
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
if ( border [ 0 ] != 0 ) or ( border [ 1 ] != 0 ) or ( M != np . eye ( 3 ) ) . any ( ) : # image changed
2024-11-06 22:54:53 +08:00
if is_fill_by_mean_img :
filling_value = int ( img . mean ( ) + 1 ) # filling value can be only an integer hance when scaling before mosaic signal is [0,1] then in the random perspective the posibilities for filling values are 0 or 1
2022-07-06 23:23:27 +08:00
if perspective :
2024-11-04 16:08:35 +08:00
img = cv2 . warpPerspective ( img , M , dsize = ( width , height ) , borderValue = ( filling_value , filling_value , filling_value ) )
2022-07-06 23:23:27 +08:00
else : # affine
2024-11-04 16:08:35 +08:00
img = cv2 . warpAffine ( img , M [ : 2 ] , dsize = ( width , height ) , borderValue = ( filling_value , filling_value , filling_value ) )
2022-07-06 23:23:27 +08:00
# Visualize
# import matplotlib.pyplot as plt
# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
# ax[0].imshow(img[:, :, ::-1]) # base
# ax[1].imshow(img2[:, :, ::-1]) # warped
# Transform label coordinates
n = len ( targets )
if n :
use_segments = any ( x . any ( ) for x in segments )
new = np . zeros ( ( n , 4 ) )
if use_segments : # warp segments
segments = resample_segments ( segments ) # upsample
for i , segment in enumerate ( segments ) :
xy = np . ones ( ( len ( segment ) , 3 ) )
xy [ : , : 2 ] = segment
xy = xy @ M . T # transform
xy = xy [ : , : 2 ] / xy [ : , 2 : 3 ] if perspective else xy [ : , : 2 ] # perspective rescale or affine
# clip
new [ i ] = segment2box ( xy , width , height )
else : # warp boxes
xy = np . ones ( ( n * 4 , 3 ) )
xy [ : , : 2 ] = targets [ : , [ 1 , 2 , 3 , 4 , 1 , 4 , 3 , 2 ] ] . reshape ( n * 4 , 2 ) # x1y1, x2y2, x1y2, x2y1
xy = xy @ M . T # transform
xy = ( xy [ : , : 2 ] / xy [ : , 2 : 3 ] if perspective else xy [ : , : 2 ] ) . reshape ( n , 8 ) # perspective rescale or affine
# create new boxes
x = xy [ : , [ 0 , 2 , 4 , 6 ] ]
y = xy [ : , [ 1 , 3 , 5 , 7 ] ]
new = np . concatenate ( ( x . min ( 1 ) , y . min ( 1 ) , x . max ( 1 ) , y . max ( 1 ) ) ) . reshape ( 4 , n ) . T
# clip
new [ : , [ 0 , 2 ] ] = new [ : , [ 0 , 2 ] ] . clip ( 0 , width )
new [ : , [ 1 , 3 ] ] = new [ : , [ 1 , 3 ] ] . clip ( 0 , height )
# filter candidates
i = box_candidates ( box1 = targets [ : , 1 : 5 ] . T * s , box2 = new . T , area_thr = 0.01 if use_segments else 0.10 )
targets = targets [ i ]
targets [ : , 1 : 5 ] = new [ i ]
return img , targets
def box_candidates ( box1 , box2 , wh_thr = 2 , ar_thr = 20 , area_thr = 0.1 , eps = 1e-16 ) : # box1(4,n), box2(4,n)
# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
w1 , h1 = box1 [ 2 ] - box1 [ 0 ] , box1 [ 3 ] - box1 [ 1 ]
w2 , h2 = box2 [ 2 ] - box2 [ 0 ] , box2 [ 3 ] - box2 [ 1 ]
ar = np . maximum ( w2 / ( h2 + eps ) , h2 / ( w2 + eps ) ) # aspect ratio
return ( w2 > wh_thr ) & ( h2 > wh_thr ) & ( w2 * h2 / ( w1 * h1 + eps ) > area_thr ) & ( ar < ar_thr ) # candidates
def bbox_ioa ( box1 , box2 ) :
# Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2
box2 = box2 . transpose ( )
# Get the coordinates of bounding boxes
b1_x1 , b1_y1 , b1_x2 , b1_y2 = box1 [ 0 ] , box1 [ 1 ] , box1 [ 2 ] , box1 [ 3 ]
b2_x1 , b2_y1 , b2_x2 , b2_y2 = box2 [ 0 ] , box2 [ 1 ] , box2 [ 2 ] , box2 [ 3 ]
# Intersection area
inter_area = ( np . minimum ( b1_x2 , b2_x2 ) - np . maximum ( b1_x1 , b2_x1 ) ) . clip ( 0 ) * \
( np . minimum ( b1_y2 , b2_y2 ) - np . maximum ( b1_y1 , b2_y1 ) ) . clip ( 0 )
# box2 area
box2_area = ( b2_x2 - b2_x1 ) * ( b2_y2 - b2_y1 ) + 1e-16
# Intersection over box2 area
return inter_area / box2_area
def cutout ( image , labels ) :
# Applies image cutout augmentation https://arxiv.org/abs/1708.04552
h , w = image . shape [ : 2 ]
# create random masks
scales = [ 0.5 ] * 1 + [ 0.25 ] * 2 + [ 0.125 ] * 4 + [ 0.0625 ] * 8 + [ 0.03125 ] * 16 # image size fraction
for s in scales :
mask_h = random . randint ( 1 , int ( h * s ) )
mask_w = random . randint ( 1 , int ( w * s ) )
# box
xmin = max ( 0 , random . randint ( 0 , w ) - mask_w / / 2 )
ymin = max ( 0 , random . randint ( 0 , h ) - mask_h / / 2 )
xmax = min ( w , xmin + mask_w )
ymax = min ( h , ymin + mask_h )
# apply random color mask
image [ ymin : ymax , xmin : xmax ] = [ random . randint ( 64 , 191 ) for _ in range ( 3 ) ]
# return unobscured labels
if len ( labels ) and s > 0.03 :
box = np . array ( [ xmin , ymin , xmax , ymax ] , dtype = np . float32 )
ioa = bbox_ioa ( box , labels [ : , 1 : 5 ] ) # intersection over area
labels = labels [ ioa < 0.60 ] # remove >60% obscured labels
return labels
def pastein ( image , labels , sample_labels , sample_images , sample_masks ) :
# Applies image cutout augmentation https://arxiv.org/abs/1708.04552
h , w = image . shape [ : 2 ]
# create random masks
scales = [ 0.75 ] * 2 + [ 0.5 ] * 4 + [ 0.25 ] * 4 + [ 0.125 ] * 4 + [ 0.0625 ] * 6 # image size fraction
for s in scales :
if random . random ( ) < 0.2 :
continue
mask_h = random . randint ( 1 , int ( h * s ) )
mask_w = random . randint ( 1 , int ( w * s ) )
# box
xmin = max ( 0 , random . randint ( 0 , w ) - mask_w / / 2 )
ymin = max ( 0 , random . randint ( 0 , h ) - mask_h / / 2 )
xmax = min ( w , xmin + mask_w )
ymax = min ( h , ymin + mask_h )
box = np . array ( [ xmin , ymin , xmax , ymax ] , dtype = np . float32 )
if len ( labels ) :
ioa = bbox_ioa ( box , labels [ : , 1 : 5 ] ) # intersection over area
else :
ioa = np . zeros ( 1 )
if ( ioa < 0.30 ) . all ( ) and len ( sample_labels ) and ( xmax > xmin + 20 ) and ( ymax > ymin + 20 ) : # allow 30% obscuration of existing labels
sel_ind = random . randint ( 0 , len ( sample_labels ) - 1 )
#print(len(sample_labels))
#print(sel_ind)
#print((xmax-xmin, ymax-ymin))
#print(image[ymin:ymax, xmin:xmax].shape)
#print([[sample_labels[sel_ind], *box]])
#print(labels.shape)
hs , ws , cs = sample_images [ sel_ind ] . shape
r_scale = min ( ( ymax - ymin ) / hs , ( xmax - xmin ) / ws )
r_w = int ( ws * r_scale )
r_h = int ( hs * r_scale )
if ( r_w > 10 ) and ( r_h > 10 ) :
r_mask = cv2 . resize ( sample_masks [ sel_ind ] , ( r_w , r_h ) )
r_image = cv2 . resize ( sample_images [ sel_ind ] , ( r_w , r_h ) )
temp_crop = image [ ymin : ymin + r_h , xmin : xmin + r_w ]
m_ind = r_mask > 0
2023-01-02 04:29:01 +08:00
if m_ind . astype ( np . int32 ) . sum ( ) > 60 :
2022-07-06 23:23:27 +08:00
temp_crop [ m_ind ] = r_image [ m_ind ]
#print(sample_labels[sel_ind])
#print(sample_images[sel_ind].shape)
#print(temp_crop.shape)
box = np . array ( [ xmin , ymin , xmin + r_w , ymin + r_h ] , dtype = np . float32 )
if len ( labels ) :
labels = np . concatenate ( ( labels , [ [ sample_labels [ sel_ind ] , * box ] ] ) , 0 )
else :
labels = np . array ( [ [ sample_labels [ sel_ind ] , * box ] ] )
image [ ymin : ymin + r_h , xmin : xmin + r_w ] = temp_crop
return labels
2024-10-14 19:02:24 +08:00
import albumentations as A
class Albumentations_gamma_contrast :
# YOLOv5 Albumentations class (optional, only used if package is installed)
def __init__ ( self , alb_prob = 0.01 , gamma_limit = [ 80 , 120 ] ) :
self . transform = None
self . transform = A . Compose ( [
# A.CLAHE(p=0.01),
2024-10-30 21:43:20 +08:00
A . RandomBrightnessContrast ( brightness_limit = 0.2 , contrast_limit = 0.2 , p = alb_prob ) , #Contrast adjustment: x' = clip((x - mean) * (1 + a) + mean) ; x'' = clip(x' * (1 + β))
2024-10-14 19:02:24 +08:00
A . RandomGamma ( gamma_limit = gamma_limit , p = alb_prob ) ] )
# A.Blur(p=0.01),
# A.MedianBlur(p=0.01),
# A.ToGray(p=0.01),
# A.ImageCompression(quality_lower=75, p=0.01),],
# bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))
#logging.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p))
def __call__ ( self , im , p = 1.0 ) :
if self . transform and random . random ( ) < p :
new = self . transform ( image = im ) # transformed
im = new [ ' image ' ]
return im
2022-07-06 23:23:27 +08:00
class Albumentations :
# YOLOv5 Albumentations class (optional, only used if package is installed)
def __init__ ( self ) :
self . transform = None
import albumentations as A
self . transform = A . Compose ( [
A . CLAHE ( p = 0.01 ) ,
A . RandomBrightnessContrast ( brightness_limit = 0.2 , contrast_limit = 0.2 , p = 0.01 ) ,
A . RandomGamma ( gamma_limit = [ 80 , 120 ] , p = 0.01 ) ,
A . Blur ( p = 0.01 ) ,
A . MedianBlur ( p = 0.01 ) ,
A . ToGray ( p = 0.01 ) ,
A . ImageCompression ( quality_lower = 75 , p = 0.01 ) , ] ,
bbox_params = A . BboxParams ( format = ' pascal_voc ' , label_fields = [ ' class_labels ' ] ) )
#logging.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p))
def __call__ ( self , im , labels , p = 1.0 ) :
if self . transform and random . random ( ) < p :
new = self . transform ( image = im , bboxes = labels [ : , 1 : ] , class_labels = labels [ : , 0 ] ) # transformed
im , labels = new [ ' image ' ] , np . array ( [ [ c , * b ] for c , b in zip ( new [ ' class_labels ' ] , new [ ' bboxes ' ] ) ] )
return im , labels
def create_folder ( path = ' ./new ' ) :
# Create folder
if os . path . exists ( path ) :
shutil . rmtree ( path ) # delete output folder
os . makedirs ( path ) # make new output folder
def flatten_recursive ( path = ' ../coco ' ) :
# Flatten a recursive directory by bringing all files to top level
new_path = Path ( path + ' _flat ' )
create_folder ( new_path )
for file in tqdm ( glob . glob ( str ( Path ( path ) ) + ' /**/*.* ' , recursive = True ) ) :
shutil . copyfile ( file , new_path / Path ( file ) . name )
def extract_boxes ( path = ' ../coco/ ' ) : # from utils.datasets import *; extract_boxes('../coco128')
# Convert detection dataset into classification dataset, with one directory per class
path = Path ( path ) # images dir
shutil . rmtree ( path / ' classifier ' ) if ( path / ' classifier ' ) . is_dir ( ) else None # remove existing
files = list ( path . rglob ( ' *.* ' ) )
n = len ( files ) # number of files
for im_file in tqdm ( files , total = n ) :
if im_file . suffix [ 1 : ] in img_formats :
# image
2024-10-27 20:49:26 +08:00
raise # not aligned to TIR 1 channel signal
2022-07-06 23:23:27 +08:00
im = cv2 . imread ( str ( im_file ) ) [ . . . , : : - 1 ] # BGR to RGB
h , w = im . shape [ : 2 ]
# labels
lb_file = Path ( img2label_paths ( [ str ( im_file ) ] ) [ 0 ] )
if Path ( lb_file ) . exists ( ) :
with open ( lb_file , ' r ' ) as f :
lb = np . array ( [ x . split ( ) for x in f . read ( ) . strip ( ) . splitlines ( ) ] , dtype = np . float32 ) # labels
for j , x in enumerate ( lb ) :
c = int ( x [ 0 ] ) # class
f = ( path / ' classifier ' ) / f ' { c } ' / f ' { path . stem } _ { im_file . stem } _ { j } .jpg ' # new filename
if not f . parent . is_dir ( ) :
f . parent . mkdir ( parents = True )
b = x [ 1 : ] * [ w , h , w , h ] # box
# b[2:] = b[2:].max() # rectangle to square
b [ 2 : ] = b [ 2 : ] * 1.2 + 3 # pad
b = xywh2xyxy ( b . reshape ( - 1 , 4 ) ) . ravel ( ) . astype ( np . int )
b [ [ 0 , 2 ] ] = np . clip ( b [ [ 0 , 2 ] ] , 0 , w ) # clip boxes outside of image
b [ [ 1 , 3 ] ] = np . clip ( b [ [ 1 , 3 ] ] , 0 , h )
assert cv2 . imwrite ( str ( f ) , im [ b [ 1 ] : b [ 3 ] , b [ 0 ] : b [ 2 ] ] ) , f ' box failure in { f } '
def autosplit ( path = ' ../coco ' , weights = ( 0.9 , 0.1 , 0.0 ) , annotated_only = False ) :
""" Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
Usage : from utils . datasets import * ; autosplit ( ' ../coco ' )
Arguments
path : Path to images directory
weights : Train , val , test weights ( list )
annotated_only : Only use images with an annotated txt file
"""
path = Path ( path ) # images dir
files = sum ( [ list ( path . rglob ( f " *. { img_ext } " ) ) for img_ext in img_formats ] , [ ] ) # image files only
n = len ( files ) # number of files
indices = random . choices ( [ 0 , 1 , 2 ] , weights = weights , k = n ) # assign each image to a split
txt = [ ' autosplit_train.txt ' , ' autosplit_val.txt ' , ' autosplit_test.txt ' ] # 3 txt files
[ ( path / x ) . unlink ( ) for x in txt if ( path / x ) . exists ( ) ] # remove existing
print ( f ' Autosplitting images from { path } ' + ' , using *.txt labeled images only ' * annotated_only )
for i , img in tqdm ( zip ( indices , files ) , total = n ) :
if not annotated_only or Path ( img2label_paths ( [ str ( img ) ] ) [ 0 ] ) . exists ( ) : # check label
with open ( path / txt [ i ] , ' a ' ) as f :
f . write ( str ( img ) + ' \n ' ) # add image to txt file
def load_segmentations ( self , index ) :
key = ' /work/handsomejw66/coco17/ ' + self . img_files [ index ]
#print(key)
# /work/handsomejw66/coco17/
return self . segs [ key ]