yolov7/utils/datasets.py

2235 lines
103 KiB
Python

# Dataset utils and dataloaders
import glob
import logging
import math
import os
import random
import shutil
import time
import warnings
from itertools import repeat
from multiprocessing.pool import ThreadPool
from pathlib import Path
from threading import Thread
import cv2
import numpy as np
import torch
import torch.nn.functional as F
from PIL import Image, ExifTags
from torch.utils.data import Dataset
from tqdm import tqdm
from scipy.stats import truncnorm
# from torchvision.transforms.functional import adjust_gamma
from skimage.exposure import adjust_gamma
import albumentations as A
import pickle
from copy import deepcopy
#from pycocotools import mask as maskUtils
from torchvision.utils import save_image
from torchvision.ops import roi_pool, roi_align, ps_roi_pool, ps_roi_align
from utils.general import check_requirements, xyxy2xywh, xywh2xyxy, xywhn2xyxy, xyn2xy, segment2box, segments2boxes, \
resample_segments, clean_str, check_file
from utils.torch_utils import torch_distributed_zero_first
# @@HK : pip install torch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 resolve h\lib\fbgemm.dll" or one of its dependencies on Windows
# Parameters
eps = 1e-5
import pandas as pd
def flatten(lst): return [x for l in lst for x in l]
help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp', 'mpo'] # acceptable image suffixes
vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes
logger = logging.getLogger(__name__)
def load_csv_xls_2_df(eileen_annot, index_col=False):
filename, file_extension = os.path.splitext(eileen_annot)
if file_extension == '.csv':
df_eilen = pd.read_csv(eileen_annot, index_col=index_col)
elif file_extension == '.xlsx':
df_eilen = pd.read_excel(eileen_annot, index_col=index_col, engine='openpyxl')
return df_eilen
# Get orientation exif tag
for orientation in ExifTags.TAGS.keys():
if ExifTags.TAGS[orientation] == 'Orientation':
break
def get_hash(files):
# Returns a single hash value of a list of files
return sum(os.path.getsize(f) for f in files if os.path.isfile(f))
def exif_size(img):
# Returns exif-corrected PIL size
s = img.size # (width, height)
try:
rotation = dict(img._getexif().items())[orientation]
if rotation == 6: # rotation 270
s = (s[1], s[0])
elif rotation == 8: # rotation 90
s = (s[1], s[0])
except:
pass
return s
# import warnings
# warnings.filterwarnings('error', category=RuntimeWarning)
def scaling_image(img, scaling_type, percentile:float =0.03,
beta:float =0.3, roi :tuple=(), img_size: int=640):
if scaling_type == 'no_norm':
if bool(roi):
raise
img = img
elif scaling_type == 'standardization': # default by repo
if bool(roi):
raise
img = img/ 255.0
elif scaling_type =="single_image_0_to_1":
if bool(roi):
raise
max_val = np.max(img.ravel())
min_val = np.min(img.ravel())
img = np.double(img - min_val) / (np.double(max_val - min_val) + eps)
img = np.minimum(np.maximum(img, 0), 1)
elif scaling_type == 'single_image_mean_std':
if bool(roi):
raise
img = (img - img.ravel().mean()) / img.ravel().std()
elif scaling_type == 'single_image_percentile_0_1':
if bool(roi):
dw, dh = img_size[1] - roi[1], img_size[0] - roi[0] # wh padding
dw /= 2 # divide padding into 2 sides
dh /= 2
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
if len(img.shape) == 2:
img_crop = img[bottom:-top, :]
else:
img_crop = img[:, bottom:-top, :]
min_val = np.percentile(img_crop.ravel(), percentile)
max_val = np.percentile(img_crop.ravel(), 100-percentile)
else:
min_val = np.percentile(img.ravel(), percentile)
max_val = np.percentile(img.ravel(), 100-percentile)
img = np.double(img - min_val) / (np.double(max_val - min_val) + eps)
img = np.minimum(np.maximum(img, 0), 1)
elif scaling_type == 'single_image_percentile_0_255':
# min_val = np.percentile(img.ravel(), percentile)
# max_val = np.percentile(img.ravel(), 100 - percentile)
# img = np.double(img - min_val) / np.double(max_val - min_val)
# img = np.uint8(np.minimum(np.maximum(img, 0), 1)*255)
if bool(roi):
raise
ImgMin = np.percentile(img, percentile)
ImgMax = np.percentile(img, 100-percentile)
ImgDRC = (np.double(img - ImgMin) / (np.double(ImgMax - ImgMin)) * 255 + eps)
img_temp = (np.uint8(np.minimum(np.maximum(ImgDRC, 0), 255)))
# img_temp = img_temp / 255.0
return img_temp
elif scaling_type == 'remove+global_outlier_0_1':
if bool(roi):
raise
img = np.double(img - img.min()*(beta))/np.double(img.max()*(1-beta) - img.min()*(beta)) # beta in [percentile]
img = np.double(np.minimum(np.maximum(img, 0), 1))
elif scaling_type == 'normalization_uint16':
raise ValueError("normalization norm image method was not imp yet.")
elif scaling_type == 'normalization':
raise ValueError("normalization norm image method was not imp yet.")
else:
raise ValueError("Unknown norm image method")
return img
def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False,
rank=-1, world_size=1, workers=8, image_weights=False, quad=False, prefix='',rel_path_images='', num_cls=-1):
# Make sure only the first process in DDP process the dataset first, and the following others can use the cache
if augment:
hyp['gamma_liklihood'] = opt.gamma_aug_prob
print("", 100 * '==')
print('gamma_liklihood was overriden by optional value ', opt.gamma_aug_prob)
with torch_distributed_zero_first(rank):
scaling_before_mosaic = bool(hyp.get('scaling_before_mosaic', False))
dataset = LoadImagesAndLabels(path, imgsz, batch_size,
augment=augment, # augment images
hyp=hyp, # augmentation hyperparameters
rect=rect, # rectangular training
cache_images=cache,
single_cls=opt.single_cls,
stride=int(stride),
pad=pad,
image_weights=image_weights,
prefix=prefix,
rel_path_images=rel_path_images,
scaling_type=opt.norm_type,
input_channels=opt.input_channels,
num_cls=num_cls,
tir_channel_expansion=opt.tir_channel_expansion,
no_tir_signal=opt.no_tir_signal,
scaling_before_mosaic=scaling_before_mosaic,
csv_metadata_path=opt.csv_metadata_path)
batch_size = min(batch_size, len(dataset))
nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers
sampler = torch.utils.data.distributed.DistributedSampler(dataset) if rank != -1 else None
loader = torch.utils.data.DataLoader if image_weights else InfiniteDataLoader
# Use torch.utils.data.DataLoader() if dataset.properties will update during training else InfiniteDataLoader()
dataloader = loader(dataset,
batch_size=batch_size,
num_workers=nw,
sampler=sampler,
pin_memory=True,
collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn)
return dataloader, dataset
class InfiniteDataLoader(torch.utils.data.dataloader.DataLoader):
""" Dataloader that reuses workers
Uses same syntax as vanilla DataLoader
"""
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
self.iterator = super().__iter__()
def __len__(self):
return len(self.batch_sampler.sampler)
def __iter__(self):
for i in range(len(self)):
yield next(self.iterator)
class _RepeatSampler(object):
""" Sampler that repeats forever
Args:
sampler (Sampler)
"""
def __init__(self, sampler):
self.sampler = sampler
def __iter__(self):
while True:
yield from iter(self.sampler)
class LoadImages: # for inference
def __init__(self, path, img_size=640, stride=32,
scaling_type='standardization', img_percentile_removal=0.3, beta=0.3, input_channels=3,
tir_channel_expansion=False, no_tir_signal=False,
rel_path_for_list_files=''):
p = str(Path(path).absolute()) # os-agnostic absolute path
if '*' in p:
files = sorted(glob.glob(p, recursive=True)) # glob
elif os.path.isdir(p):
files = sorted(glob.glob(os.path.join(p, '*.*'))) # dir
elif os.path.isfile(p):
if path.endswith('.txt'):
files = self.parse_image_file_names(path, rel_path_for_list_files)
else:
files = [p] # files
else:
raise Exception(f'ERROR: {p} does not exist')
images = [x for x in files if x.split('.')[-1].lower() in img_formats]
videos = [x for x in files if x.split('.')[-1].lower() in vid_formats]
ni, nv = len(images), len(videos)
self.img_size = img_size
self.stride = stride
self.files = images + videos
self.nf = ni + nv # number of files
self.video_flag = [False] * ni + [True] * nv
self.mode = 'image'
if any(videos):
self.new_video(videos[0]) # new video
else:
self.cap = None
assert self.nf > 0, f'No images or videos found in {p}. ' \
f'Supported formats are:\nimages: {img_formats}\nvideos: {vid_formats}'
self.scaling_type = scaling_type
self.percentile = img_percentile_removal
self.beta = beta
self.input_channels = input_channels
self.tir_channel_expansion = tir_channel_expansion
self.is_tir_signal = not (no_tir_signal)
def parse_image_file_names(self, path, rel_path_for_list_files):
try:
f = [] # image files
for p in path if isinstance(path, list) else [path]:
p = Path(p) # os-agnostic
if p.is_dir(): # dir
f += glob.glob(str(p / '**' / '*.*'), recursive=True)
# f = list(p.rglob('**/*.*')) # pathlib
elif p.is_file(): # file
with open(p, 'r') as t:
t = t.read().strip().splitlines()
parent = str(p.parent) + os.sep
if bool(rel_path_for_list_files):
f += [os.path.join(rel_path_for_list_files, x.replace('./', '')).rstrip() if x.startswith(
'./') else x for x
in t] # local to global path
else:
f += [x.replace('./', parent).rstrip() if x.startswith('./') else x for x in
t] # local to global path
# f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
else:
raise Exception(f'{p} does not exist')
self.img_files = sorted(
[x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats])
# self.img_files = sorted([x for x in f if x.suffix[1:].lower() in img_formats]) # pathlib
assert self.img_files, f' No images found'
except Exception as e:
raise Exception(f'Error loading data from {path}: {e}\nSee {help_url}')
return f
def __iter__(self):
self.count = 0
return self
def __next__(self):
if self.count == self.nf:
raise StopIteration
path = self.files[self.count]
if self.video_flag[self.count]:
# Read video
self.mode = 'video'
ret_val, img0 = self.cap.read()
if not ret_val:
self.count += 1
self.cap.release()
if self.count == self.nf: # last video
raise StopIteration
else:
path = self.files[self.count]
self.new_video(path)
ret_val, img0 = self.cap.read()
self.frame += 1
print(f'video {self.count + 1}/{self.nf} ({self.frame}/{self.nframes}) {path}: ', end='')
else:
# Read image
self.count += 1
# img0 = cv2.imread(path) # BGR
# 16bit unsigned
if os.path.basename(path).split('.')[-1] == 'tiff':
img0 = cv2.imread(path, -1)
else:
img0 = cv2.imread(path) # BGR
assert img0 is not None, 'Image Not Found ' + path
#print(f'image {self.count}/{self.nf} {path}: ', end='')
# Padded resize
img = letterbox(img0, self.img_size, stride=self.stride)[0]
if self.tir_channel_expansion: # HK @@ according to the paper this CE is a sort of augmentation hence no need to preliminary augment. One of the channels are inversion hence avoid channel inversion aug
img = np.repeat(img[np.newaxis, :, :], 3, axis=0) # convert GL to RGB by replication
img_ce = np.zeros_like(img).astype('float64')
# CH1 hist equalization
img_chan = scaling_image(img[0, :, :], scaling_type=self.scaling_type,
percentile=0, beta=self.beta)
img_ce[0, :, :] = img_chan.astype('float64')
img_chan = scaling_image(img[1, :, :], scaling_type=self.scaling_type,
percentile=self.percentile, beta=self.beta)
img_ce[1, :, :] = img_chan.astype('float64')
img_chan = inversion_aug(img_ce[1, :, :]) # invert the DRC one
img_ce[2, :, :] = img_chan.astype('float64')
img = img_ce
if not self.tir_channel_expansion:
if self.is_tir_signal:
img = np.repeat(img[np.newaxis, :, :], self.input_channels, axis=0) #convert GL to RGB by replication
else:
# Convert
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
# print('\n image file', self.img_files[index])
if 0:
import matplotlib.pyplot as plt
plt.figure()
plt.hist(img.ravel(), bins=128)
plt.savefig(os.path.join('/home/hanoch/projects/tir_od/outputs', os.path.basename(path).split('.')[0]+ 'pre'))
file_type = os.path.basename(path).split('.')[-1].lower()
if (file_type !='tiff' and file_type != 'png'):
print('!!!!!!!!!!!!!!!! index : {} {} unrecognized '.format(index, self.img_files[index]))
if file_type != 'png':
img = scaling_image(img, scaling_type=self.scaling_type,
percentile=self.percentile, beta=self.beta)
else:
img = scaling_image(img,
scaling_type='single_image_0_to_1') # safer in case double standartiozation one before mosaic and her the last one since mosaic is random based occurance
if 0:
import matplotlib.pyplot as plt
plt.figure()
plt.hist(img.ravel(), bins=128)
plt.savefig(os.path.join('/home/hanoch/projects/tir_od/outputs', os.path.basename(path).split('.')[0]+ 'post'))
img = np.ascontiguousarray(img)
return path, img, img0, self.cap
def new_video(self, path):
self.frame = 0
self.cap = cv2.VideoCapture(path)
self.nframes = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
def __len__(self):
return self.nf # number of files
class LoadWebcam: # for inference
def __init__(self, pipe='0', img_size=640, stride=32):
self.img_size = img_size
self.stride = stride
if pipe.isnumeric():
pipe = eval(pipe) # local camera
# pipe = 'rtsp://192.168.1.64/1' # IP camera
# pipe = 'rtsp://username:password@192.168.1.64/1' # IP camera with login
# pipe = 'http://wmccpinetop.axiscam.net/mjpg/video.mjpg' # IP golf camera
self.pipe = pipe
self.cap = cv2.VideoCapture(pipe) # video capture object
self.cap.set(cv2.CAP_PROP_BUFFERSIZE, 3) # set buffer size
def __iter__(self):
self.count = -1
return self
def __next__(self):
self.count += 1
if cv2.waitKey(1) == ord('q'): # q to quit
self.cap.release()
cv2.destroyAllWindows()
raise StopIteration
# Read frame
if self.pipe == 0: # local camera
ret_val, img0 = self.cap.read()
img0 = cv2.flip(img0, 1) # flip left-right
else: # IP camera
n = 0
while True:
n += 1
self.cap.grab()
if n % 30 == 0: # skip frames
ret_val, img0 = self.cap.retrieve()
if ret_val:
break
# Print
assert ret_val, f'Camera Error {self.pipe}'
img_path = 'webcam.jpg'
print(f'webcam {self.count}: ', end='')
# Padded resize
img = letterbox(img0, self.img_size, stride=self.stride)[0]
# Convert
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
img = np.ascontiguousarray(img)
return img_path, img, img0, None
def __len__(self):
return 0
class LoadStreams: # multiple IP or RTSP cameras
def __init__(self, sources='streams.txt', img_size=640, stride=32):
self.mode = 'stream'
self.img_size = img_size
self.stride = stride
if os.path.isfile(sources):
with open(sources, 'r') as f:
sources = [x.strip() for x in f.read().strip().splitlines() if len(x.strip())]
else:
sources = [sources]
n = len(sources)
self.imgs = [None] * n
self.sources = [clean_str(x) for x in sources] # clean source names for later
for i, s in enumerate(sources):
# Start the thread to read frames from the video stream
print(f'{i + 1}/{n}: {s}... ', end='')
url = eval(s) if s.isnumeric() else s
if 'youtube.com/' in str(url) or 'youtu.be/' in str(url): # if source is YouTube video
check_requirements(('pafy', 'youtube_dl'))
import pafy
url = pafy.new(url).getbest(preftype="mp4").url
cap = cv2.VideoCapture(url)
assert cap.isOpened(), f'Failed to open {s}'
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
self.fps = cap.get(cv2.CAP_PROP_FPS) % 100
_, self.imgs[i] = cap.read() # guarantee first frame
thread = Thread(target=self.update, args=([i, cap]), daemon=True)
print(f' success ({w}x{h} at {self.fps:.2f} FPS).')
thread.start()
print('') # newline
# check for common shapes
s = np.stack([letterbox(x, self.img_size, stride=self.stride)[0].shape for x in self.imgs], 0) # shapes
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
if not self.rect:
print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
def update(self, index, cap):
# Read next stream frame in a daemon thread
n = 0
while cap.isOpened():
n += 1
# _, self.imgs[index] = cap.read()
cap.grab()
if n == 4: # read every 4th frame
success, im = cap.retrieve()
self.imgs[index] = im if success else self.imgs[index] * 0
n = 0
time.sleep(1 / self.fps) # wait time
def __iter__(self):
self.count = -1
return self
def __next__(self):
self.count += 1
img0 = self.imgs.copy()
if cv2.waitKey(1) == ord('q'): # q to quit
cv2.destroyAllWindows()
raise StopIteration
# Letterbox
img = [letterbox(x, self.img_size, auto=self.rect, stride=self.stride)[0] for x in img0]
# Stack
img = np.stack(img, 0)
# Convert
img = img[:, :, :, ::-1].transpose(0, 3, 1, 2) # BGR to RGB, to bsx3x416x416
img = np.ascontiguousarray(img)
return self.sources, img, img0, None
def __len__(self):
return 0 # 1E12 frames = 32 streams at 30 FPS for 30 years
def img2label_paths(img_paths):
# Define label paths as a function of image paths
sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
return ['txt'.join(x.replace(sa, sb, 1).rsplit(x.split('.')[-1], 1)) for x in img_paths]
class LoadImagesAndLabels(Dataset): # for training/testing
def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
cache_images=False, single_cls=False, stride=32, pad=0.0, prefix='', rel_path_images='',
scaling_type='standardization', input_channels=3,
num_cls=-1, tir_channel_expansion=False, no_tir_signal=False, scaling_before_mosaic=False,
csv_metadata_path=''):
self.scaling_before_mosaic = scaling_before_mosaic
self.img_size = img_size
self.augment = augment
self.hyp = hyp
self.image_weights = image_weights
self.rect = False if image_weights else rect
self.mosaic = self.augment and not self.rect # load 4 images at a time into a mosaic (only during training)
self.mosaic_border = [-img_size // 2, -img_size // 2]
self.stride = stride
self.path = path
self.scaling_type = scaling_type
self.percentile = hyp['img_percentile_removal']
self.beta = hyp['beta']
self.input_channels = input_channels# in case GL image but NN is RGB hence replicate
self.tir_channel_expansion = tir_channel_expansion
self.is_tir_signal = not (no_tir_signal)
self.random_pad = hyp['random_pad']
self.use_csv_meta_data_file = False
if bool(csv_metadata_path):
self.csv_meta_data_file = check_file(csv_metadata_path)
self.use_csv_meta_data_file = True
if self.hyp['copy_paste'] >0 and self.random_pad:
raise ValueError('copy_paste and random_pad are mutually exclusive. not supported yet!!')
#self.albumentations = Albumentations() if augment else None
self.albumentations_gamma_contrast = Albumentations_gamma_contrast(alb_prob=hyp['gamma_liklihood'],
gamma_limit=[hyp['gamma'],
100 + 100-hyp['gamma']])
try:
f = [] # image files
for p in path if isinstance(path, list) else [path]:
p = Path(p) # os-agnostic
if p.is_dir(): # dir
f += glob.glob(str(p / '**' / '*.*'), recursive=True)
# f = list(p.rglob('**/*.*')) # pathlib
elif p.is_file(): # file
with open(p, 'r') as t:
t = t.read().strip().splitlines()
parent = str(p.parent) + os.sep
if bool(rel_path_images):
f += [os.path.join(rel_path_images, x.replace('./', '')).rstrip() if x.startswith('./') else x for x in t] # local to global path
else:
f += [x.replace('./', parent).rstrip() if x.startswith('./') else x for x in t] # local to global path
# f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
else:
raise Exception(f'{prefix}{p} does not exist')
self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats])
# self.img_files = sorted([x for x in f if x.suffix[1:].lower() in img_formats]) # pathlib
assert self.img_files, f'{prefix}No images found'
except Exception as e:
raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {help_url}')
# Check cache HK : cache is only for labels /annotations
self.label_files = img2label_paths(self.img_files) # labels
cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') # cached labels
if cache_path.is_file():
cache, exists = torch.load(cache_path), True # load
#if cache['hash'] != get_hash(self.label_files + self.img_files) or 'version' not in cache: # changed
# cache, exists = self.cache_labels(cache_path, prefix), False # re-cache
else:
cache, exists = self.cache_labels(num_cls, cache_path, prefix), False # cache
# Display cache
nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupted, total
if exists:
d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
tqdm(None, desc=prefix + d, total=n, initial=n) # display cache results
assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {help_url}'
# Read cache
cache.pop('hash') # remove hash
cache.pop('version') # remove version
labels, shapes, self.segments = zip(*cache.values())
# #@@HK TODO adding truncation ratio increase here
# if labels.shape[1] > 5:
# labels = labels[:,:5]
# self.truncation_ratio = labels[:,5]
self.labels = list(labels)
self.shapes = np.array(shapes, dtype=np.float64)
self.img_files = list(cache.keys()) # update
self.label_files = img2label_paths(cache.keys()) # update
if single_cls:
for x in self.labels:
x[:, 0] = 0
n = len(shapes) # number of images
bi = np.floor(np.arange(n) / batch_size).astype(int) # batch index
nb = bi[-1] + 1 # number of batches
self.batch = bi # batch index of image
self.n = n
self.indices = range(n)
# Rectangular Training
if self.rect:
# Sort by aspect ratio
s = self.shapes # wh
ar = s[:, 1] / s[:, 0] # aspect ratio
irect = ar.argsort()
self.img_files = [self.img_files[i] for i in irect]
self.label_files = [self.label_files[i] for i in irect]
self.labels = [self.labels[i] for i in irect]
self.shapes = s[irect] # wh
ar = ar[irect]
# Set training image shapes
shapes = [[1, 1]] * nb
for i in range(nb):
ari = ar[bi == i]
mini, maxi = ari.min(), ari.max()
if maxi < 1:
shapes[i] = [maxi, 1]
elif mini > 1:
shapes[i] = [1, 1 / mini]
self.batch_shapes = np.ceil(np.array(shapes) * img_size / stride + pad).astype(int) * stride #pad=0.5 https://github.com/ultralytics/ultralytics/issues/13271 : @123456dad the padding of 0.5 in the BaseDataset class, which results in resizing an image from 640x640 to 672x672, is primarily for maintaining aspect ratio and providing a buffer to apply various augmentations without losing important features at the edges. This padding can affect model performance, as seen in your observation where the .pt model shows a slightly higher mAP compared to the ONNX model.
# Cache images into memory for faster training (WARNING: large datasets may exceed system RAM)
self.imgs = [None] * n
if cache_images:
if cache_images == 'disk':
self.im_cache_dir = Path(Path(self.img_files[0]).parent.as_posix() + '_npy')
self.img_npy = [self.im_cache_dir / Path(f).with_suffix('.npy').name for f in self.img_files]
self.im_cache_dir.mkdir(parents=True, exist_ok=True)
gb = 0 # Gigabytes of cached images
self.img_hw0, self.img_hw = [None] * n, [None] * n
results = ThreadPool(8).imap(lambda x: load_image(*x), zip(repeat(self), range(n)))
pbar = tqdm(enumerate(results), total=n)
for i, x in pbar:
if cache_images == 'disk':
if not self.img_npy[i].exists():
np.save(self.img_npy[i].as_posix(), x[0])
gb += self.img_npy[i].stat().st_size
else:
self.imgs[i], self.img_hw0[i], self.img_hw[i] = x
gb += self.imgs[i].nbytes
pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB)'
pbar.close()
if self.use_csv_meta_data_file:
df = load_csv_xls_2_df(self.csv_meta_data_file)
self.df_metadata = pd.DataFrame(columns=['sensor_type', 'part_in_day', 'weather_condition', 'country', 'train_state', 'tir_frame_image_file_name'])
# TODO :HK @@ itereate tqdm(zip(self.img_files, self.label_files) and upon --force-csv-list remove missing entries from the csv in train/test lists!!!
for ix, fname in enumerate(self.img_files):
file_name = fname.split('/')[-1]
if not (df['tir_frame_image_file_name'] == file_name).any():
print('File name {} metadata hasnt found !!!'. format(file_name))
try:
self.df_metadata.loc[len(self.df_metadata)] = [df[df['tir_frame_image_file_name'] == file_name]['sensor_type'].item(),
df[df['tir_frame_image_file_name'] == file_name]['part_in_day'].item(),
df[df['tir_frame_image_file_name'] == file_name]['weather_condition'].item(),
df[df['tir_frame_image_file_name'] == file_name]['country'].item(),
df[df['tir_frame_image_file_name'] == file_name]['train_state'].item(),
df[df['tir_frame_image_file_name'] == file_name]['tir_frame_image_file_name'].item()]
except Exception as e:
print(f'{fname} fname WARNING: Ignoring corrupted image and/or label {file_name}: {e}')
def cache_labels(self, num_cls, path=Path('./labels.cache'), prefix=''):
# Cache dataset labels, check images and read shapes
x = {} # dict
nm, nf, ne, nc = 0, 0, 0, 0 # number missing, found, empty, duplicate
pbar = tqdm(zip(self.img_files, self.label_files), desc='Scanning images', total=len(self.img_files))
for i, (im_file, lb_file) in enumerate(pbar):
try:
# verify images
im = Image.open(im_file)
im.verify() # PIL verify
shape = exif_size(im) # image size
segments = [] # instance segments
assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'
assert im.format.lower() in img_formats, f'invalid image format {im.format}'
# verify labels
if os.path.isfile(lb_file):
nf += 1 # label found
with open(lb_file, 'r') as f:
l = [x.split() for x in f.read().strip().splitlines()]
if any([len(x) > 8 for x in l]): # is segment
classes = np.array([x[0] for x in l], dtype=np.float32)
segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l] # (cls, xy1...)
l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh)
l = np.array(l, dtype=np.float32)
# if (l[:, 0].max() >= num_cls):
# print('ka', i, l, lb_file, im_file)
l = np.array([lbl for lbl in l if lbl[0] < num_cls]) # take only labels index upto num of classes and omit others
if len(l):
assert l.shape[1] == 5, 'labels require 5 columns each' #@@HK TODO adding truncation ratio increase here : assert l.shape[1] == 6,
assert (l >= 0).all(), 'negative labels'
assert (l[:, 1:] <= 1).all(), 'non-normalized or out of bounds coordinate labels'
assert np.unique(l, axis=0).shape[0] == l.shape[0], 'duplicate labels'
assert (l[:, 0].max() < num_cls), 'class label out of range -- invalid' # max label can't be greater than num of labels
# print(l[:, 0])
else:
ne += 1 # label empty
l = np.zeros((0, 5), dtype=np.float32)
else:
nm += 1 # label missing
l = np.zeros((0, 5), dtype=np.float32)
x[im_file] = [l, shape, segments]
except Exception as e:
nc += 1
print(f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}')
pbar.desc = f"{prefix}Scanning '{path.parent / path.stem}' images and labels... " \
f"{nf} found, {nm} missing, {ne} empty, {nc} corrupted"
pbar.close()
if nf == 0:
print(f'{prefix}WARNING: No labels found in {path}. See {help_url}')
x['hash'] = get_hash(self.label_files + self.img_files)
x['results'] = nf, nm, ne, nc, i + 1
x['version'] = 0.1 # cache version
torch.save(x, path) # save for next time
logging.info(f'{prefix}New cache created: {path}')
return x
def __len__(self):
return len(self.img_files)
# def __iter__(self):
# self.count = -1
# print('ran dataset iter')
# #self.shuffled_vector = np.random.permutation(self.nF) if self.augment else np.arange(self.nF)
# return self
def __getitem__(self, index):
index = self.indices[index] # linear, shuffled, or image_weights
file_type = os.path.basename(self.img_files[index]).split('.')[-1].lower()
if (file_type !='tiff' and file_type != 'png'):
print('!!!!!!!!!!!!!!!! index : {} {} unrecognized '.format(index, self.img_files[index]))
if self.is_tir_signal:
if self.scaling_before_mosaic:
filling_value = 0.5 # on borders or after perspective fill with 0.5 in [0 1] equals to 114 in [0 255]
else:
filling_value = 0 # on borders or after perspective better to have 0 thermal profile uint16 based on the DR of the image which is unknown TODO: better find an elegent way
else:
filling_value = 114
hyp = self.hyp
mosaic = self.mosaic and random.random() < hyp['mosaic'] and not(self.tir_channel_expansion)
if mosaic:
# Load mosaic
if random.random() < 0.8:
img, labels = load_mosaic(self, index, filling_value=filling_value, file_type=file_type)
else:
img, labels = load_mosaic9(self, index, filling_value=filling_value, file_type=file_type)
shapes = None
# MixUp https://arxiv.org/pdf/1710.09412.pdf
if random.random() < hyp['mixup']:
if random.random() < 0.8:
img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1), filling_value=filling_value, file_type=file_type)
else:
img2, labels2 = load_mosaic9(self, random.randint(0, len(self.labels) - 1), filling_value=filling_value, file_type=file_type)
r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0
img = (img * r + img2 * (1 - r)).astype(img.dtype)#.astype(np.uint8)
labels = np.concatenate((labels, labels2), 0)
else:
# Load image
img, (h0, w0), (h, w) = load_image(self, index)
# Letterbox
shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
# img, ratio, pad = letterbox(img, shape, color=(img.mean(), img.mean(), img.mean()), auto=False, scaleup=self.augment)
img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment, random_pad=self.random_pad)
shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
labels = self.labels[index].copy()
if labels.size: # normalized xywh to pixel xyxy format
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
if self.tir_channel_expansion: # HK @@ according to the paper this CE is a sort of augmentation hence no need to preliminary augment. One of the channels are inversion hence avoid channel inversion aug
img = np.repeat(img[np.newaxis, :, :], 3, axis=0) # convert GL to RGB by replication
img_ce = np.zeros_like(img).astype('float64')
# CH1 hist equalization
img_chan = scaling_image(img[0, :, :], scaling_type=self.scaling_type,
percentile=0, beta=self.beta)
img_ce[0, :, :] = img_chan.astype('float64')
img_chan = scaling_image(img[1, :, :], scaling_type=self.scaling_type,
percentile=self.percentile, beta=self.beta)
img_ce[1, :, :] = img_chan.astype('float64')
img_chan = inversion_aug(img_ce[1, :, :]) # invert the DRC one
img_ce[2, :, :] = img_chan.astype('float64')
img = img_ce
if self.augment:
# Augment imagespace
if not mosaic:
if hyp['random_perspective']:
img, labels = random_perspective(img, labels,
degrees=hyp['degrees'],
translate=hyp['translate'],
scale=hyp['scale'],
shear=hyp['shear'],
perspective=hyp['perspective'],
filling_value=filling_value,
is_fill_by_mean_img=self.is_tir_signal,
random_pad=self.random_pad)
if np.isnan(img).any():
print('img is nan no mosaic after rand perspective')
if random.random() < hyp['inversion']:
img = inversion_aug(img)
if np.isnan(img).any():
print('img is nan gamma')
# print("std===",img.std(), img.mean())
# GL gain/attenuation
# Squeeze pdf (x-mu)*scl+mu
#img, labels = self.albumentations(img, labels)
img = self.albumentations_gamma_contrast(img) # apply RandomBrightnessContrast only since it has buggy response
if random.random() < hyp['gamma_liklihood']:
if img.dtype == np.uint16 or img.dtype == np.uint8:
img = img/np.iinfo(img.dtype).max
if (img.max()> 1.0).any():
img[img > 1.0] = 1.0
if (img < 0).any():
img[img < 0] = 0
gamma = np.random.uniform(hyp['gamma'], 200-hyp['gamma']) / 100.0
img = adjust_gamma(img, gamma, gain=1)
if np.isnan(img).any():
print('img is nan gamma')
if hyp['hsv_h'] > 0 or hyp['hsv_s'] > 0 or hyp['hsv_v'] > 0:
# Augment colorspace
augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
# Apply cutouts
# if random.random() < 0.9:
# labels = cutout(img, labels)
if random.random() < hyp['paste_in']:
sample_labels, sample_images, sample_masks = [], [], []
while len(sample_labels) < 30: # upto 30 tries to have mosaic of 4 images (anchor + 3 X random)
sample_labels_, sample_images_, sample_masks_ = load_samples(self, random.randint(0, len(self.labels) - 1), file_type=file_type)
sample_labels += sample_labels_
sample_images += sample_images_
sample_masks += sample_masks_
#print(len(sample_labels))
if len(sample_labels) == 0:
break
labels = pastein(img, labels, sample_labels, sample_images, sample_masks)
# try:
#
# tag='paste_in'
# import tifffile
# if len(img.shape) == 2:
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od/output', 'img_loaded__' + tag +'__' +str(self.img_files[index].split('/')[-1].split('.tiff')[0]) + '.tiff'),
# img[:,:,np.newaxis])
# else:
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od/output', 'img_loaded__' + tag +'__' +str(self.img_files[index].split('/')[-1].split('.tiff')[0]) + '.tiff'),
# img)
# except Exception as e:
# print(e)
nL = len(labels) # number of labels
if nL:
labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh
labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1
labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1
if self.augment:
# flip up-down
if random.random() < hyp['flipud']:
img = np.flipud(img)
if nL:
labels[:, 2] = 1 - labels[:, 2]
# flip left-right
if random.random() < hyp['fliplr']:
img = np.fliplr(img)
if nL:
labels[:, 1] = 1 - labels[:, 1]
labels_out = torch.zeros((nL, 6))
if nL:
labels_out[:, 1:] = torch.from_numpy(labels)
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od', 'img_ce.tiff'), 255*img.transpose(1,2,0).astype('uint8'))
if not self.tir_channel_expansion:
if self.is_tir_signal:
if len(img.shape) == 2:
img = np.repeat(img[np.newaxis, :, :], self.input_channels, axis=0) #convert GL to 3-ch if any RGB by replication
# print('Warning , TIR image should be 3dim by now (w,h,1)', 100*'*')
else:
img = np.repeat(img.transpose(2, 0, 1), self.input_channels, axis=0)
else:
# Convert
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
if 0:
import matplotlib.pyplot as plt
plt.figure()
plt.hist(img.ravel(), bins=128)
plt.savefig(os.path.join('/home/hanoch/projects/tir_od/output', os.path.basename(self.img_files[index]).split('.')[0]+ 'pre_' +str(self.scaling_type)))
# import tifffile
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od/output', 'img_loaded_before_scaling_' + '_' +str(str(img.max())) + '_' +str(self.img_files[index].split('/')[-1].split('.tiff')[0]) + '.tiff'),
# (img.transpose(1, 2, 0)))
# In case moasaic of mixed PNG and TIFF the TIFF is pre scaled while the PNG shouldn;t
if file_type != 'png':
# img_size, roi = self.rectangle_res_roi(index) # HK tried to normalize the image according to the real roi inside the square
# img = scaling_image(img, scaling_type=self.scaling_type,
# percentile=self.percentile, beta=self.beta,
# roi=roi, img_size=img_size)
img = scaling_image(img, scaling_type=self.scaling_type,
percentile=self.percentile, beta=self.beta)
else:
img = scaling_image(img, scaling_type='single_image_0_to_1') # safer in case double standartiozation one before mosaic and her the last one since mosaic is random based occurance
# print('ka')
if 0:
import matplotlib.pyplot as plt
# plt.figure()
plt.hist(img.ravel(), bins=128)
plt.savefig(os.path.join('/home/hanoch/projects/tir_od/output', os.path.basename(self.img_files[index]).split('.')[0] + '_hist_post_scaling_'+ str(self.scaling_type)))
# aa1 = np.repeat(img[1,:,:,:].cpu().permute(1,2,0).numpy(), 3, axis=2).astype('float32')
# cv2.imwrite('test/exp40/test_batch88_labels__1.jpg', aa1*255)
# aa1 = np.repeat(img.transpose(1,2,0), 3, axis=2).astype('float32')
# print('\n 1st', img.shape)
if np.isnan(img).any():
print('img {} index : {} is nan fin'.format(self.img_files[index], index))
# raise
# try:
# tag='full_rect'
# import tifffile
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od/output', 'img_loaded__' + tag +'__' +str(self.img_files[index].split('/')[-1].split('.tiff')[0]) + '.tiff'),
# (img.transpose(1, 2, 0)*2**16).astype('uint16'))
# except Exception as e:
# print(f'\nfailed reading: due to {str(e)}')
# #
img = np.ascontiguousarray(img)
# print('\n 2nd', img.shape)
return torch.from_numpy(img), labels_out, self.img_files[index], shapes
def rectangle_res_roi(self, index):
img_orig, _, _ = load_image(self, index)
loaded_img_shape = img_orig.shape[:2]
new_shape = self.img_size
if isinstance(self.img_size, int): # if list then the 2d dim is embedded
new_shape = (new_shape, new_shape)
if new_shape != loaded_img_shape:
roi = loaded_img_shape
img_size = new_shape
else: # don't do nothing normaliza the entire image
roi = ()
img_size = loaded_img_shape
if self.rect:
raise ValueError('not supported')
return img_size, roi
# Labels : When it comes to annotations, YOLOv8 uses relative coordinates rather than absolute pixel values for the
# bounding box positions. This means that the labels are in the range of 0 to 1 relative to the image width and height.
# Consequently, these labels will remain consistent regardless of image resizing. Hence, you do not need to change,
# adjust or resize the annotations or labels when the images are resized during training. The model will handle this
# process automatically.
@staticmethod
def collate_fn(batch):
img, label, path, shapes = zip(*batch) # transposed
for i, l in enumerate(label):
l[:, 0] = i # add target image index for build_targets()
return torch.stack(img, 0), torch.cat(label, 0), path, shapes
@staticmethod
def collate_fn4(batch):
img, label, path, shapes = zip(*batch) # transposed
n = len(shapes) // 4
img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]
ho = torch.tensor([[0., 0, 0, 1, 0, 0]])
wo = torch.tensor([[0., 0, 1, 0, 0, 0]])
s = torch.tensor([[1, 1, .5, .5, .5, .5]]) # scale
for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW
i *= 4
if random.random() < 0.5:
im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2., mode='bilinear', align_corners=False)[
0].type(img[i].type())
l = label[i]
else:
im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2)
l = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s
img4.append(im)
label4.append(l)
for i, l in enumerate(label4):
l[:, 0] = i # add target image index for build_targets()
return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4
class LoadImagesAddingNoiseAndLabels(LoadImagesAndLabels): # for training/testing
def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
cache_images=False, single_cls=False, stride=32, pad=0.0, prefix='', rel_path_images='',
scaling_type='standardization', input_channels=3,
num_cls=-1, tir_channel_expansion=False, no_tir_signal=False, scaling_before_mosaic=False,
path_noisy_samples=''):
super(LoadImagesAddingNoiseAndLabels, self).__init__(path, img_size=img_size, batch_size=batch_size, augment=augment, hyp=hyp,
rect=rect, image_weights=image_weights,
cache_images=cache_images, single_cls=single_cls, stride=stride, pad=pad, prefix=prefix, rel_path_images=rel_path_images,
scaling_type=scaling_type, input_channels=input_channels,
num_cls=-1, tir_channel_expansion=tir_channel_expansion, no_tir_signal=no_tir_signal, scaling_before_mosaic=scaling_before_mosaic)
self.path_noisy_samples=path_noisy_samples
self.noise_filenames = [os.path.join(self.path_noisy_samples, x) for x in os.listdir(self.path_noisy_samples)
if x.endswith('tiff')]
self.recorded_noise = False
def __getitem__(self, index):
index = self.indices[index] # linear, shuffled, or image_weights
file_type = os.path.basename(self.img_files[index]).split('.')[-1].lower()
if (file_type != 'tiff' and file_type != 'png'):
print('!!!!!!!!!!!!!!!! index : {} {} unrecognized '.format(index, self.img_files[index]))
if self.is_tir_signal:
if self.scaling_before_mosaic:
filling_value = 0.5 # on borders or after perspective fill with 0.5 in [0 1] equals to 114 in [0 255]
else:
filling_value = 0 # on borders or after perspective better to have 0 thermal profile uint16 based on the DR of the image which is unknown TODO: better find an elegent way
else:
filling_value = 114
hyp = self.hyp
mosaic = self.mosaic and random.random() < hyp['mosaic'] and not (self.tir_channel_expansion)
if mosaic:
# Load mosaic
if random.random() < 0.8:
img, labels = load_mosaic(self, index, filling_value=filling_value, file_type=file_type)
else:
img, labels = load_mosaic9(self, index, filling_value=filling_value, file_type=file_type)
shapes = None
# MixUp https://arxiv.org/pdf/1710.09412.pdf
if random.random() < hyp['mixup']:
if random.random() < 0.8:
img2, labels2 = load_mosaic(self, random.randint(0, len(self.labels) - 1),
filling_value=filling_value, file_type=file_type)
else:
img2, labels2 = load_mosaic9(self, random.randint(0, len(self.labels) - 1),
filling_value=filling_value, file_type=file_type)
r = np.random.beta(8.0, 8.0) # mixup ratio, alpha=beta=8.0
img = (img * r + img2 * (1 - r)).astype(img.dtype) # .astype(np.uint8)
labels = np.concatenate((labels, labels2), 0)
else:
# Load image
img, (h0, w0), (h, w) = load_image(self, index)
# Letterbox
shape = self.batch_shapes[self.batch[index]] if self.rect else self.img_size # final letterboxed shape
# img, ratio, pad = letterbox(img, shape, color=(img.mean(), img.mean(), img.mean()), auto=False, scaleup=self.augment)
img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment, random_pad=self.random_pad)
shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling
labels = self.labels[index].copy()
if labels.size: # normalized xywh to pixel xyxy format
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])
if self.tir_channel_expansion: # HK @@ according to the paper this CE is a sort of augmentation hence no need to preliminary augment. One of the channels are inversion hence avoid channel inversion aug
img = np.repeat(img[np.newaxis, :, :], 3, axis=0) # convert GL to RGB by replication
img_ce = np.zeros_like(img).astype('float64')
# CH1 hist equalization
img_chan = scaling_image(img[0, :, :], scaling_type=self.scaling_type,
percentile=0, beta=self.beta)
img_ce[0, :, :] = img_chan.astype('float64')
img_chan = scaling_image(img[1, :, :], scaling_type=self.scaling_type,
percentile=self.percentile, beta=self.beta)
img_ce[1, :, :] = img_chan.astype('float64')
img_chan = inversion_aug(img_ce[1, :, :]) # invert the DRC one
img_ce[2, :, :] = img_chan.astype('float64')
img = img_ce
if self.augment:
# Augment imagespace
if not mosaic:
if hyp['random_perspective']:
img, labels = random_perspective(img, labels,
degrees=hyp['degrees'],
translate=hyp['translate'],
scale=hyp['scale'],
shear=hyp['shear'],
perspective=hyp['perspective'],
filling_value=filling_value,
is_fill_by_mean_img=self.is_tir_signal,
random_pad=self.random_pad)
if np.isnan(img).any():
print('img is nan no mosaic after rand perspective')
if random.random() < hyp['inversion']:
img = inversion_aug(img)
if np.isnan(img).any():
print('img is nan gamma')
# print("std===",img.std(), img.mean())
# GL gain/attenuation
# Squeeze pdf (x-mu)*scl+mu
# img, labels = self.albumentations(img, labels)
img = self.albumentations_gamma_contrast(
img) # apply RandomBrightnessContrast only since it has buggy response
if random.random() < hyp['gamma_liklihood']:
if img.dtype == np.uint16 or img.dtype == np.uint8:
img = img / np.iinfo(img.dtype).max
if (img.max() > 1.0).any():
img[img > 1.0] = 1.0
if (img < 0).any():
img[img < 0] = 0
gamma = np.random.uniform(hyp['gamma'], 200 - hyp['gamma']) / 100.0
img = adjust_gamma(img, gamma, gain=1)
if np.isnan(img).any():
print('img is nan gamma')
if hyp['hsv_h'] > 0 or hyp['hsv_s'] > 0 or hyp['hsv_v'] > 0:
# Augment colorspace
augment_hsv(img, hgain=hyp['hsv_h'], sgain=hyp['hsv_s'], vgain=hyp['hsv_v'])
# Apply cutouts
# if random.random() < 0.9:
# labels = cutout(img, labels)
if random.random() < hyp['paste_in']:
sample_labels, sample_images, sample_masks = [], [], []
while len(sample_labels) < 30: # upto 30 tries to have mosaic of 4 images (anchor + 3 X random)
sample_labels_, sample_images_, sample_masks_ = load_samples(self, random.randint(0,
len(self.labels) - 1),
file_type=file_type)
sample_labels += sample_labels_
sample_images += sample_images_
sample_masks += sample_masks_
# print(len(sample_labels))
if len(sample_labels) == 0:
break
labels = pastein(img, labels, sample_labels, sample_images, sample_masks)
# try:
#
# tag='paste_in'
# import tifffile
# if len(img.shape) == 2:
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od/output', 'img_loaded__' + tag +'__' +str(self.img_files[index].split('/')[-1].split('.tiff')[0]) + '.tiff'),
# img[:,:,np.newaxis])
# else:
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od/output', 'img_loaded__' + tag +'__' +str(self.img_files[index].split('/')[-1].split('.tiff')[0]) + '.tiff'),
# img)
# except Exception as e:
# print(e)
nL = len(labels) # number of labels
if nL:
labels[:, 1:5] = xyxy2xywh(labels[:, 1:5]) # convert xyxy to xywh
labels[:, [2, 4]] /= img.shape[0] # normalized height 0-1
labels[:, [1, 3]] /= img.shape[1] # normalized width 0-1
if self.augment:
# flip up-down
if random.random() < hyp['flipud']:
img = np.flipud(img)
if nL:
labels[:, 2] = 1 - labels[:, 2]
# flip left-right
if random.random() < hyp['fliplr']:
img = np.fliplr(img)
if nL:
labels[:, 1] = 1 - labels[:, 1]
labels_out = torch.zeros((nL, 6))
if nL:
labels_out[:, 1:] = torch.from_numpy(labels)
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od', 'img_ce.tiff'), 255*img.transpose(1,2,0).astype('uint8'))
if not self.tir_channel_expansion:
if self.is_tir_signal:
if len(img.shape) == 2:
img = np.repeat(img[np.newaxis, :, :], self.input_channels,
axis=0) # convert GL to 3-ch if any RGB by replication
# print('Warning , TIR image should be 3dim by now (w,h,1)', 100*'*')
else:
img = np.repeat(img.transpose(2, 0, 1), self.input_channels, axis=0)
else:
# Convert
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
if 0:
import matplotlib.pyplot as plt
plt.figure()
plt.hist(img.ravel(), bins=128)
plt.savefig(os.path.join('/home/hanoch/projects/tir_od/output',
os.path.basename(self.img_files[index]).split('.')[0] + 'pre_' + str(
self.scaling_type)))
# import tifffile
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od/output', 'img_loaded_before_scaling_' + '_' +str(str(img.max())) + '_' +str(self.img_files[index].split('/')[-1].split('.tiff')[0]) + '.tiff'),
# (img.transpose(1, 2, 0)))
# In case moasaic of mixed PNG and TIFF the TIFF is pre scaled while the PNG shouldn;t
if file_type != 'png':
# img_size, roi = self.rectangle_res_roi(index) # HK tried to normalize the image according to the real roi inside the square
# img = scaling_image(img, scaling_type=self.scaling_type,
# percentile=self.percentile, beta=self.beta,
# roi=roi, img_size=img_size)
if self.recorded_noise:
index_noise_same = np.random.randint(0, len(self.noise_filenames ))
img_noise_path = self.noise_filenames[index_noise_same]
img_noise = cv2.imread(img_noise_path, -1)
if len(img.shape) == 3:
shape_tup = img.shape[1:]
else:
shape_tup = img.shape
img_noise = letterbox(img_noise, shape_tup, 32)[0] # reshpae presentation image for debug
img_noise = img_noise[np.newaxis, :, :] # (640,640, 1)
else:
min_val = np.percentile(img.ravel(), 0.5)
max_val = np.percentile(img.ravel(), 100 - 0.5)
density_per_scanline = int(0.5 + img.shape[1]*4/128)
pattern_len = 3
img_noise = np.zeros_like(img).astype('uint16')
for row in range (img.shape[1]):
pattern_location = random.choices(range(img.shape[1] - pattern_len), k=density_per_scanline)
pattern_location.sort()
noise_amp = np.random.randint(0, max_val- min_val, len(pattern_location))
for ix, noise_patt in enumerate(pattern_location):
img_noise[0,row,noise_patt:noise_patt+pattern_len] = np.array(pattern_len*[noise_amp[ix]])
img = img + img_noise
img = scaling_image(img, scaling_type=self.scaling_type,
percentile=self.percentile, beta=self.beta)
else:
img = scaling_image(img,
scaling_type='single_image_0_to_1') # safer in case double standartiozation one before mosaic and her the last one since mosaic is random based occurance
# print('ka')
if 0:
import matplotlib.pyplot as plt
# plt.figure()
plt.hist(img.ravel(), bins=128)
plt.savefig(os.path.join('/home/hanoch/projects/tir_od/output',
os.path.basename(self.img_files[index]).split('.')[
0] + '_hist_post_scaling_' + str(self.scaling_type)))
# aa1 = np.repeat(img[1,:,:,:].cpu().permute(1,2,0).numpy(), 3, axis=2).astype('float32')
# cv2.imwrite('test/exp40/test_batch88_labels__1.jpg', aa1*255)
# aa1 = np.repeat(img.transpose(1,2,0), 3, axis=2).astype('float32')
# print('\n 1st', img.shape)
if np.isnan(img).any():
print('img {} index : {} is nan fin'.format(self.img_files[index], index))
# raise
# try:
# tag='full_rect'
# import tifffile
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od/output', 'img_loaded__' + tag +'__' +str(self.img_files[index].split('/')[-1].split('.tiff')[0]) + '.tiff'),
# (img.transpose(1, 2, 0)*2**16).astype('uint16'))
# except Exception as e:
# print(f'\nfailed reading: due to {str(e)}')
# #
img = np.ascontiguousarray(img)
# print('\n 2nd', img.shape)
return torch.from_numpy(img), labels_out, self.img_files[index], shapes
# Ancillary functions --------------------------------------------------------------------------------------------------
def load_image(self, index):
# loads 1 image from dataset, returns img, original hw, resized hw
img = self.imgs[index]
if img is None: # not cached
path = self.img_files[index]
#16bit unsigned
if os.path.basename(path).split('.')[-1] == 'tiff':
img = cv2.imread(path, -1)
img = img[:, :, np.newaxis] # (640,640, 1)
else:
img = cv2.imread(path) # BGR
if self.is_tir_signal:
img = img[:,:,0] # channels are duplicated in the source
img = img[:, :, np.newaxis]
assert img is not None, 'Image Not Found ' + path
h0, w0 = img.shape[:2] # orig hw
r = self.img_size / max(h0, w0) # resize image to img_size
if r != 1: # always resize down, only resize up if training with augmentation
interp = cv2.INTER_AREA if r < 1 and not self.augment else cv2.INTER_LINEAR
img = cv2.resize(img, (int(w0 * r), int(h0 * r)), interpolation=interp)
return img, (h0, w0), img.shape[:2] # img, hw_original, hw_resized
else:
return self.imgs[index], self.img_hw0[index], self.img_hw[index] # img, hw_original, hw_resized
def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
r = np.random.uniform(-1, 1, 3) * [hgain, sgain, vgain] + 1 # random gains
hue, sat, val = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))
dtype = img.dtype # uint8
x = np.arange(0, 256, dtype=np.int16)
lut_hue = ((x * r[0]) % 180).astype(dtype)
lut_sat = np.clip(x * r[1], 0, 255).astype(dtype)
lut_val = np.clip(x * r[2], 0, 255).astype(dtype)
img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed
def inversion_aug(img):
if img.dtype == np.uint16 or img.dtype == np.uint8:
img = np.iinfo(img.dtype).max - img
return img
elif img.dtype == np.float32 or img.dtype == np.float64:
img = 1.0 - img
return img
else:
raise ValueError("image type is not supported (int8, UINT16) {}".format(img.dtype))
def hist_equalize(img, clahe=True, bgr=False):
# Equalize histogram on BGR image 'img' with img.shape(n,m,3) and range 0-255
yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
if clahe:
c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
yuv[:, :, 0] = c.apply(yuv[:, :, 0])
else:
yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram
return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB
def load_mosaic(self, index, filling_value, file_type='tiff'):
# loads images in a 4-mosaic
labels4, segments4 = [], []
s = self.img_size
yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices
for i, index in enumerate(indices):
# Load image
img, _, (h, w) = load_image(self, index)
if self.scaling_before_mosaic:
if file_type == 'png':
img = scaling_image(img, scaling_type='single_image_0_to_1')
else:
img = scaling_image(img, scaling_type=self.scaling_type,
percentile=self.percentile, beta=self.beta)
# import tifffile
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od/output',
# 'img_projective_' + str(self.img_files[index].split('/')[-1].split('.tiff')[0]) +'.tiff'), img)
# place img in img4
if i == 0: # top left
if self.is_tir_signal:
img4 = init_image_plane(self, img, s, n_div=2)
else:
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
elif i == 1: # top right
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
elif i == 2: # bottom left
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
elif i == 3: # bottom right
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
padw = x1a - x1b
padh = y1a - y1b
# Labels
labels, segments = self.labels[index].copy(), self.segments[index].copy()
if labels.size:
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format
segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
labels4.append(labels)
segments4.extend(segments)
# Concat/clip labels
labels4 = np.concatenate(labels4, 0)
for x in (labels4[:, 1:], *segments4):
np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
# img4, labels4 = replicate(img4, labels4) # replicate
# Augment
#img4, labels4, segments4 = remove_background(img4, labels4, segments4)
#sample_segments(img4, labels4, segments4, probability=self.hyp['copy_paste'])
img4, labels4, segments4 = copy_paste(img4, labels4, segments4, probability=self.hyp['copy_paste']) # mainly for instance segmentation ??!! #@@HK
img4, labels4 = random_perspective(img4, labels4, segments4,
degrees=self.hyp['degrees'],
translate=self.hyp['translate'],
scale=self.hyp['scale'],
shear=self.hyp['shear'],
perspective=self.hyp['perspective'],
border=self.mosaic_border,
filling_value=filling_value,
is_fill_by_mean_img=self.is_tir_signal)# mosaic has its own random padding hence no need to support inside perspective (scaling)
# border to remove
# import tifffile
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od/output',
# 'img_projective_' + str(self.img_files[indices[0]].split('/')[-1].split('.tiff')[0]) +'.tiff'), img)
#
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od/output',
# 'img_projective_' + str(
# self.img_files[indices[1]].split('/')[-1].split('.tiff')[0]) + '.tiff'), img)
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od/output',
# 'img_projective_' + str(
# self.img_files[indices[2]].split('/')[-1].split('.tiff')[0]) + '.tiff'), img)
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od/output',
# 'img_projective_' + str(
# self.img_files[indices[3]].split('/')[-1].split('.tiff')[0]) + '.tiff'), img)
return img4, labels4
def load_mosaic9(self, index, filling_value, file_type='tiff'):
# loads images in a 9-mosaic
labels9, segments9 = [], []
s = self.img_size
indices = [index] + random.choices(self.indices, k=8) # 8 additional image indices
for i, index in enumerate(indices):
# Load image
img, _, (h, w) = load_image(self, index)
if self.scaling_before_mosaic:
if file_type == 'png':
img = scaling_image(img, scaling_type='single_image_0_to_1')
else:
img = scaling_image(img, scaling_type=self.scaling_type,
percentile=self.percentile, beta=self.beta)
# place img in img9
if i == 0: # center
if self.is_tir_signal:
img9 = init_image_plane(self, img, s, n_div=3)
else:
img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
h0, w0 = h, w
c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates
elif i == 1: # top
c = s, s - h, s + w, s
elif i == 2: # top right
c = s + wp, s - h, s + wp + w, s
elif i == 3: # right
c = s + w0, s, s + w0 + w, s + h
elif i == 4: # bottom right
c = s + w0, s + hp, s + w0 + w, s + hp + h
elif i == 5: # bottom
c = s + w0 - w, s + h0, s + w0, s + h0 + h
elif i == 6: # bottom left
c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
elif i == 7: # left
c = s - w, s + h0 - h, s, s + h0
elif i == 8: # top left
c = s - w, s + h0 - hp - h, s, s + h0 - hp
padx, pady = c[:2]
x1, y1, x2, y2 = [max(x, 0) for x in c] # allocate coords
# Labels
labels, segments = self.labels[index].copy(), self.segments[index].copy()
if labels.size:
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format
segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
labels9.append(labels)
segments9.extend(segments)
# Image
img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax]
hp, wp = h, w # height, width previous
# Offset
yc, xc = [int(random.uniform(0, s)) for _ in self.mosaic_border] # mosaic center x, y
img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]
# Concat/clip labels
labels9 = np.concatenate(labels9, 0)
labels9[:, [1, 3]] -= xc
labels9[:, [2, 4]] -= yc
c = np.array([xc, yc]) # centers
segments9 = [x - c for x in segments9]
for x in (labels9[:, 1:], *segments9):
np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
# img9, labels9 = replicate(img9, labels9) # replicate
# Augment
#img9, labels9, segments9 = remove_background(img9, labels9, segments9)
img9, labels9, segments9 = copy_paste(img9, labels9, segments9, probability=self.hyp['copy_paste']) # mainly for instance segmentation ??!! #@@HK
# Perspective transformation can create holes in thermal better fill w/o reflection
img9, labels9 = random_perspective(img9, labels9, segments9,
degrees=self.hyp['degrees'],
translate=self.hyp['translate'],
scale=self.hyp['scale'],
shear=self.hyp['shear'],
perspective=self.hyp['perspective'],
border=self.mosaic_border,
filling_value=filling_value,
is_fill_by_mean_img=self.is_tir_signal)
return img9, labels9
def load_samples(self, index, file_type='tiff'):
# loads images in a 4-mosaic
labels4, segments4 = [], []
s = self.img_size
yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
indices = [index] + random.choices(self.indices, k=3) # 3 additional image indices
for i, index in enumerate(indices):
# Load image
img, _, (h, w) = load_image(self, index)
if self.scaling_before_mosaic:
if file_type == 'png':
img = scaling_image(img, scaling_type='single_image_0_to_1')
else:
img = scaling_image(img, scaling_type=self.scaling_type,
percentile=self.percentile, beta=self.beta)
# place img in img4
if i == 0: # top left
if self.is_tir_signal:
img4 = init_image_plane(self, img, s, n_div=2)
else:
img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles # base image with 4 tiles
x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc # xmin, ymin, xmax, ymax (large image)
x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h # xmin, ymin, xmax, ymax (small image)
elif i == 1: # top right
x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
elif i == 2: # bottom left
x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, w, min(y2a - y1a, h)
elif i == 3: # bottom right
x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
padw = x1a - x1b
padh = y1a - y1b
# Labels
labels, segments = self.labels[index].copy(), self.segments[index].copy()
if labels.size:
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format
segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
labels4.append(labels)
segments4.extend(segments)
# Concat/clip labels
labels4 = np.concatenate(labels4, 0)
for x in (labels4[:, 1:], *segments4):
np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
# img4, labels4 = replicate(img4, labels4) # replicate
# Augment
#img4, labels4, segments4 = remove_background(img4, labels4, segments4)
sample_labels, sample_images, sample_masks = sample_segments(img4, labels4, segments4, probability=0.5)
return sample_labels, sample_images, sample_masks
def init_random_image_plane(img, s, n_div=1):
if img.dtype == np.uint16:
std_ = 500
lower = 0
upper = 2 ** 16 - 1
filling_value = img.mean()
# img4 = np.random.normal(img.mean(), std_, (s * 2, s * 2, img.shape[2])).astype(img.dtype) # Random can goes beyond UINT16 and would be wrapped arround which is also random so OK
elif img.dtype == np.uint8:
std_ = 15
filling_value = 114
lower = 0
upper = 255
# img4 = truncnorm.rvs((lower - filling_value) / std_, (upper - filling_value) / std_, loc=filling_value,
# scale=std_, size=(s * 2, s * 2)).astype(img.dtype) # bounded random number
else:
std_ = 0.05
filling_value = 0.5
lower = 0
upper = 1
# img4 = truncnorm.rvs((0 - filling_value) / std_, (1 - filling_value) / std_, loc=filling_value,
# scale=std_, size=(s * 2, s * 2)).astype(img.dtype) # bounded random number
# img4 = np.random.normal(img.mean(), std_, (s * 2, s * 2, img.shape[2]))
if len(img.shape) == 3:
siz = s * n_div, s * n_div, img.shape[2]
else:
siz = s * n_div, s * n_div
img4 = truncnorm.rvs((lower - filling_value) / std_, (upper - filling_value) / std_,
loc=img.mean(), scale=std_, size=(siz)).astype(img.dtype) # bounded random number
return img4
def init_image_plane(self, img, s, n_div=2):
if self.random_pad:
img4 = init_random_image_plane(img=img, s=s, n_div=n_div)
else:
img4 = np.full((s * n_div, s * n_div, img.shape[2]), img.mean(),
dtype=img.dtype) # base image with 4 tiles fill with 0.5 in [0 1] equals to 114 in [0 255]
img4 = img4[:s*n_div, :s*n_div] # in case rectangle shape, AR>1, than crop the padding plane according to the right final shape
return img4
def copy_paste(img, labels, segments, probability=0.5):
# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
n = len(segments)
if probability and n:
h, w, c = img.shape # height, width, channels
im_new = np.zeros(img.shape, np.uint8)
for j in random.sample(range(n), k=round(probability * n)):
l, s = labels[j], segments[j]
box = w - l[3], l[2], w - l[1], l[4]
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
if (ioa < 0.30).all(): # allow 30% obscuration of existing labels
labels = np.concatenate((labels, [[l[0], *box]]), 0)
segments.append(np.concatenate((w - s[:, 0:1], s[:, 1:2]), 1))
cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
result = cv2.bitwise_and(src1=img, src2=im_new)
result = cv2.flip(result, 1) # augment segments (flip left-right)
i = result > 0 # pixels to replace
# i[:, :] = result.max(2).reshape(h, w, 1) # act over ch
img[i] = result[i] # cv2.imwrite('debug.jpg', img) # debug
return img, labels, segments
def remove_background(img, labels, segments):
# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
n = len(segments)
h, w, c = img.shape # height, width, channels
im_new = np.zeros(img.shape, np.uint8)
img_new = np.ones(img.shape, np.uint8) * 114
raise ValueError('uint8 cast dosnot comply with TIR uint 16')
for j in range(n):
cv2.drawContours(im_new, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
result = cv2.bitwise_and(src1=img, src2=im_new)
i = result > 0 # pixels to replace
img_new[i] = result[i] # cv2.imwrite('debug.jpg', img) # debug
return img_new, labels, segments
def sample_segments(img, labels, segments, probability=0.5):
# Implement Copy-Paste augmentation https://arxiv.org/abs/2012.07177, labels as nx5 np.array(cls, xyxy)
n = len(segments)
sample_labels = []
sample_images = []
sample_masks = []
if probability and n:
h, w, c = img.shape # height, width, channels
for j in random.sample(range(n), k=round(probability * n)):
l, s = labels[j], segments[j]
box = l[1].astype(int).clip(0,w-1), l[2].astype(int).clip(0,h-1), l[3].astype(int).clip(0,w-1), l[4].astype(int).clip(0,h-1)
#print(box)
if (box[2] <= box[0]) or (box[3] <= box[1]):
continue
sample_labels.append(l[0])
mask = np.zeros(img.shape, np.uint8)
cv2.drawContours(mask, [segments[j].astype(np.int32)], -1, (255, 255, 255), cv2.FILLED)
sample_masks.append(mask[box[1]:box[3],box[0]:box[2],:])
result = cv2.bitwise_and(src1=img, src2=mask)
i = result > 0 # pixels to replace
mask[i] = result[i] # cv2.imwrite('debug.jpg', img) # debug
#print(box)
sample_images.append(mask[box[1]:box[3],box[0]:box[2],:])
return sample_labels, sample_images, sample_masks
def replicate(img, labels):
# Replicate labels
h, w = img.shape[:2]
boxes = labels[:, 1:].astype(int)
x1, y1, x2, y2 = boxes.T
s = ((x2 - x1) + (y2 - y1)) / 2 # side length (pixels)
for i in s.argsort()[:round(s.size * 0.5)]: # smallest indices
x1b, y1b, x2b, y2b = boxes[i]
bh, bw = y2b - y1b, x2b - x1b
yc, xc = int(random.uniform(0, h - bh)), int(random.uniform(0, w - bw)) # offset x, y
x1a, y1a, x2a, y2a = [xc, yc, xc + bw, yc + bh]
img[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b] # img4[ymin:ymax, xmin:xmax]
labels = np.append(labels, [[labels[i, 0], x1a, y1a, x2a, y2a]], axis=0)
return img, labels
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114),
auto=True, scaleFill=False, scaleup=True, stride=32, random_pad=False):
# Resize and pad image while meeting stride-multiple constraints i.e. 32
shape = img.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better test mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
if random_pad and dh>0: # recatangle image with padding is expected
img_plane = init_random_image_plane(img, s=max(img.shape), n_div=1)
# img_plane = img_plane[:img.shape[0], :img.shape[1]] # in case rectangle shape, AR>1, than crop the padding plane according to the right final shape
img_plane[bottom:-top, :] = img
img = img_plane
# img[:bottom, :] = img_plane[:bottom, :]
# img[-top:, :] = img_plane[-top:, :]
else:
n_ch = img.shape[-1]
img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
if n_ch == 1 and len(img.shape) == 2: # fixing bug in cv2 where n_ch==1 no explicit consideration
img = img[..., None]
return img, ratio, (dw, dh)
def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
border=(0, 0), filling_value=114, is_fill_by_mean_img=False,
random_pad=False):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# targets = [cls, xyxy]
height = img.shape[0] + border[0] * 2 # shape(h,w,c)
width = img.shape[1] + border[1] * 2
# Center
C = np.eye(3)
C[0, 2] = -img.shape[1] / 2 # x translation (pixels)
C[1, 2] = -img.shape[0] / 2 # y translation (pixels)
# Perspective
P = np.eye(3)
P[2, 0] = random.uniform(-perspective, perspective) # x perspective (about y)
P[2, 1] = random.uniform(-perspective, perspective) # y perspective (about x)
# Rotation and Scale
R = np.eye(3)
a = random.uniform(-degrees, degrees)
# a += random.choice([-180, -90, 0, 90]) # add 90deg rotations to small rotations
s = random.uniform(1 - scale, 1.1 + scale) #@@HK TODO why not symetric
# s = 2 ** random.uniform(-scale, scale)
R[:2] = cv2.getRotationMatrix2D(angle=a, center=(0, 0), scale=s)
# Shear
S = np.eye(3)
S[0, 1] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # x shear (deg)
S[1, 0] = math.tan(random.uniform(-shear, shear) * math.pi / 180) # y shear (deg)
# Translation
T = np.eye(3)
T[0, 2] = random.uniform(0.5 - translate, 0.5 + translate) * width # x translation (pixels)
T[1, 2] = random.uniform(0.5 - translate, 0.5 + translate) * height # y translation (pixels)
# Combined rotation matrix
M = T @ S @ R @ P @ C # order of operations (right to left) is IMPORTANT
if (border[0] != 0) or (border[1] != 0) or (M != np.eye(3)).any(): # image changed
if is_fill_by_mean_img:
filling_value = int(img.mean()+1) # filling value can be only an integer hance when scaling before mosaic signal is [0,1] then in the random perspective the posibilities for filling values are 0 or 1
n_ch = img.shape[-1]
if perspective:
img = cv2.warpPerspective(img, M, dsize=(width, height), borderValue=(filling_value, filling_value, filling_value))
else: # affine
img = cv2.warpAffine(img, M[:2], dsize=(width, height), borderValue=(filling_value, filling_value, filling_value))
if n_ch == 1 and len(img.shape) == 2: # fixing bug in cv2 where n_ch==1 no explicit consideration
img = img[..., None]
# import tifffile
# unique_run_name = str(int(time.time_ns()))
#
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od/output', str(unique_run_name) + '_' + 'img_projective_before_pad' + str(s) + '_' + str(T[0, 2]) + '_' + str(T[1, 2]) + '.tiff'),
# img[:,:,np.newaxis])
if random_pad:
pad_w = int((width - np.round(width * s)) // 2)
pad_h = int((height - np.round(height * s)) // 2)
img_plane = init_random_image_plane(img, s=max(img.shape), n_div=1)
img_plane = img_plane[:img.shape[0], :img.shape[1]] # in case rectangle shape, AR>1, than crop the padding plane according to the right final shape
if pad_w + int(T[0, 2] - width/2) >0:
# Left padding
# img[:, :pad_w + max(0,int(T[0, 2] - width/2))] = img_plane[:, :pad_w + max(0,int(T[0, 2] - width/2))]
img[:, :pad_w + int(T[0, 2] - width / 2)] = img_plane[:, :pad_w + int(T[0, 2] - width / 2)]
# padding form left can be done even if trans goes right handed beyonf the resolution over the canvas 1280
img[:, width - pad_w + int(T[0, 2] - width/2):] = img_plane[:, width - pad_w + int(T[0, 2] - width/2):]
if pad_h + int(T[1, 2] - height / 2) > 0:
img[:pad_h + int(T[1, 2] - height/2), :] = img_plane[:pad_h + int(T[1, 2] - height/2), :]
# img[height-pad_h + max(0,int(T[1, 2] - height/2)):, :] = img_plane[height-pad_h + max(0,int(T[1, 2] - height/2)):, :]
img[height-pad_h + int(T[1, 2] - height/2):, :] = img_plane[height-pad_h + int(T[1, 2] - height/2):, :]
# print(pad_w + int(T[0, 2] - width/2), width - pad_w + int(T[0, 2] - width/2), pad_h + int(T[1, 2] - height/2), height-pad_h + int(T[1, 2] - height/2) ,str(T[0, 2]) + '_'+ str(s))
# import tifffile
# pad_w = int((width - np.round(width * s)) // 2)
# pad_h = int((height - np.round(height * s)) // 2)
# tifffile.imwrite(os.path.join('/home/hanoch/projects/tir_od/output', 'img_projective_' + str(s) + '_' + str(T[0, 2]) + '_' + str(pad_w) + '_' + str(T[1, 2]) + '_' + str(pad_h) +'.tiff'),
# img[:,:,np.newaxis])
# Visualize
# import matplotlib.pyplot as plt
# ax = plt.subplots(1, 2, figsize=(12, 6))[1].ravel()
# ax[0].imshow(img[:, :, ::-1]) # base
# ax[1].imshow(img2[:, :, ::-1]) # warped
# Transform label coordinates
n = len(targets)
if n:
use_segments = any(x.any() for x in segments)
new = np.zeros((n, 4))
if use_segments: # warp segments
segments = resample_segments(segments) # upsample
for i, segment in enumerate(segments):
xy = np.ones((len(segment), 3))
xy[:, :2] = segment
xy = xy @ M.T # transform
xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine
# clip
new[i] = segment2box(xy, width, height)
else: # warp boxes
xy = np.ones((n * 4, 3))
xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
xy = xy @ M.T # transform
xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
# create new boxes
x = xy[:, [0, 2, 4, 6]]
y = xy[:, [1, 3, 5, 7]]
new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
# clip
new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
# filter candidates
i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
targets = targets[i]
targets[:, 1:5] = new[i]
return img, targets
def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates
def bbox_ioa(box1, box2):
# Returns the intersection over box2 area given box1, box2. box1 is 4, box2 is nx4. boxes are x1y1x2y2
box2 = box2.transpose()
# Get the coordinates of bounding boxes
b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
# Intersection area
inter_area = (np.minimum(b1_x2, b2_x2) - np.maximum(b1_x1, b2_x1)).clip(0) * \
(np.minimum(b1_y2, b2_y2) - np.maximum(b1_y1, b2_y1)).clip(0)
# box2 area
box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + 1e-16
# Intersection over box2 area
return inter_area / box2_area
def cutout(image, labels):
# Applies image cutout augmentation https://arxiv.org/abs/1708.04552
h, w = image.shape[:2]
# create random masks
scales = [0.5] * 1 + [0.25] * 2 + [0.125] * 4 + [0.0625] * 8 + [0.03125] * 16 # image size fraction
for s in scales:
mask_h = random.randint(1, int(h * s))
mask_w = random.randint(1, int(w * s))
# box
xmin = max(0, random.randint(0, w) - mask_w // 2)
ymin = max(0, random.randint(0, h) - mask_h // 2)
xmax = min(w, xmin + mask_w)
ymax = min(h, ymin + mask_h)
# apply random color mask
image[ymin:ymax, xmin:xmax] = [random.randint(64, 191) for _ in range(3)]
# return unobscured labels
if len(labels) and s > 0.03:
box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
labels = labels[ioa < 0.60] # remove >60% obscured labels
return labels
def pastein(image, labels, sample_labels, sample_images, sample_masks):
# Applies image cutout augmentation https://arxiv.org/abs/1708.04552
h, w = image.shape[:2]
# create random masks
scales = [0.75] * 2 + [0.5] * 4 + [0.25] * 4 + [0.125] * 4 + [0.0625] * 6 # image size fraction
for s in scales:
if random.random() < 0.2:
continue
mask_h = random.randint(1, int(h * s))
mask_w = random.randint(1, int(w * s))
# box
xmin = max(0, random.randint(0, w) - mask_w // 2)
ymin = max(0, random.randint(0, h) - mask_h // 2)
xmax = min(w, xmin + mask_w)
ymax = min(h, ymin + mask_h)
box = np.array([xmin, ymin, xmax, ymax], dtype=np.float32)
if len(labels):
ioa = bbox_ioa(box, labels[:, 1:5]) # intersection over area
else:
ioa = np.zeros(1)
if (ioa < 0.30).all() and len(sample_labels) and (xmax > xmin+20) and (ymax > ymin+20): # allow 30% obscuration of existing labels
sel_ind = random.randint(0, len(sample_labels)-1)
#print(len(sample_labels))
#print(sel_ind)
#print((xmax-xmin, ymax-ymin))
#print(image[ymin:ymax, xmin:xmax].shape)
#print([[sample_labels[sel_ind], *box]])
#print(labels.shape)
hs, ws, cs = sample_images[sel_ind].shape
r_scale = min((ymax-ymin)/hs, (xmax-xmin)/ws)
r_w = int(ws*r_scale)
r_h = int(hs*r_scale)
if (r_w > 10) and (r_h > 10):
r_mask = cv2.resize(sample_masks[sel_ind], (r_w, r_h))
r_image = cv2.resize(sample_images[sel_ind], (r_w, r_h))
temp_crop = image[ymin:ymin+r_h, xmin:xmin+r_w]
m_ind = r_mask > 0
if m_ind.astype(np.int32).sum() > 60:
temp_crop[m_ind] = r_image[m_ind]
#print(sample_labels[sel_ind])
#print(sample_images[sel_ind].shape)
#print(temp_crop.shape)
box = np.array([xmin, ymin, xmin+r_w, ymin+r_h], dtype=np.float32)
if len(labels):
labels = np.concatenate((labels, [[sample_labels[sel_ind], *box]]), 0)
else:
labels = np.array([[sample_labels[sel_ind], *box]])
image[ymin:ymin+r_h, xmin:xmin+r_w] = temp_crop
return labels
import albumentations as A
class Albumentations_gamma_contrast:
# YOLOv5 Albumentations class (optional, only used if package is installed)
def __init__(self, alb_prob=0.01, gamma_limit=[80, 120]):
self.transform = None
self.transform = A.Compose([
# A.CLAHE(p=0.01),
A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=alb_prob), #Contrast adjustment: x' = clip((x - mean) * (1 + a) + mean) ; x'' = clip(x' * (1 + β))
])# A.RandomGamma(gamma_limit=gamma_limit, p=alb_prob)])
# A.Blur(p=0.01),
# A.MedianBlur(p=0.01),
# A.ToGray(p=0.01),
# A.ImageCompression(quality_lower=75, p=0.01),],
# bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))
#logging.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p))
def __call__(self, im, p=1.0):
if self.transform and random.random() < p:
new = self.transform(image=im) # transformed
im = new['image']
return im
class Albumentations:
# YOLOv5 Albumentations class (optional, only used if package is installed)
def __init__(self):
self.transform = None
import albumentations as A
self.transform = A.Compose([
A.CLAHE(p=0.01),
A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.01),
A.RandomGamma(gamma_limit=[80, 120], p=0.01),
A.Blur(p=0.01),
A.MedianBlur(p=0.01),
A.ToGray(p=0.01),
A.ImageCompression(quality_lower=75, p=0.01),],
bbox_params=A.BboxParams(format='pascal_voc', label_fields=['class_labels']))
#logging.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p))
def __call__(self, im, labels, p=1.0):
if self.transform and random.random() < p:
new = self.transform(image=im, bboxes=labels[:, 1:], class_labels=labels[:, 0]) # transformed
im, labels = new['image'], np.array([[c, *b] for c, b in zip(new['class_labels'], new['bboxes'])])
return im, labels
def create_folder(path='./new'):
# Create folder
if os.path.exists(path):
shutil.rmtree(path) # delete output folder
os.makedirs(path) # make new output folder
def flatten_recursive(path='../coco'):
# Flatten a recursive directory by bringing all files to top level
new_path = Path(path + '_flat')
create_folder(new_path)
for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)):
shutil.copyfile(file, new_path / Path(file).name)
def extract_boxes(path='../coco/'): # from utils.datasets import *; extract_boxes('../coco128')
# Convert detection dataset into classification dataset, with one directory per class
path = Path(path) # images dir
shutil.rmtree(path / 'classifier') if (path / 'classifier').is_dir() else None # remove existing
files = list(path.rglob('*.*'))
n = len(files) # number of files
for im_file in tqdm(files, total=n):
if im_file.suffix[1:] in img_formats:
# image
raise # not aligned to TIR 1 channel signal
im = cv2.imread(str(im_file))[..., ::-1] # BGR to RGB
h, w = im.shape[:2]
# labels
lb_file = Path(img2label_paths([str(im_file)])[0])
if Path(lb_file).exists():
with open(lb_file, 'r') as f:
lb = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels
for j, x in enumerate(lb):
c = int(x[0]) # class
f = (path / 'classifier') / f'{c}' / f'{path.stem}_{im_file.stem}_{j}.jpg' # new filename
if not f.parent.is_dir():
f.parent.mkdir(parents=True)
b = x[1:] * [w, h, w, h] # box
# b[2:] = b[2:].max() # rectangle to square
b[2:] = b[2:] * 1.2 + 3 # pad
b = xywh2xyxy(b.reshape(-1, 4)).ravel().astype(np.int)
b[[0, 2]] = np.clip(b[[0, 2]], 0, w) # clip boxes outside of image
b[[1, 3]] = np.clip(b[[1, 3]], 0, h)
assert cv2.imwrite(str(f), im[b[1]:b[3], b[0]:b[2]]), f'box failure in {f}'
def autosplit(path='../coco', weights=(0.9, 0.1, 0.0), annotated_only=False):
""" Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files
Usage: from utils.datasets import *; autosplit('../coco')
Arguments
path: Path to images directory
weights: Train, val, test weights (list)
annotated_only: Only use images with an annotated txt file
"""
path = Path(path) # images dir
files = sum([list(path.rglob(f"*.{img_ext}")) for img_ext in img_formats], []) # image files only
n = len(files) # number of files
indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split
txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files
[(path / x).unlink() for x in txt if (path / x).exists()] # remove existing
print(f'Autosplitting images from {path}' + ', using *.txt labeled images only' * annotated_only)
for i, img in tqdm(zip(indices, files), total=n):
if not annotated_only or Path(img2label_paths([str(img)])[0]).exists(): # check label
with open(path / txt[i], 'a') as f:
f.write(str(img) + '\n') # add image to txt file
def load_segmentations(self, index):
key = '/work/handsomejw66/coco17/' + self.img_files[index]
#print(key)
# /work/handsomejw66/coco17/
return self.segs[key]