From db3bbdde0ee6df78e44d9d7b3c17561303f74952 Mon Sep 17 00:00:00 2001 From: jdfr Date: Wed, 20 Oct 2021 19:25:33 +0200 Subject: [PATCH] autosplit: take image files with uppercase extensions into account (#5269) * take image files with uppercase extensions into account in autosplit * case fix * Refactor implementation Removes additional variable (capital variable names are also only for global variables), and uses the same methodology as implemented earlier in datasets.py L409. * Remove redundant rglob characters Co-authored-by: Glenn Jocher --- utils/datasets.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/datasets.py b/utils/datasets.py index 091d65336..cda69e326 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -396,7 +396,7 @@ class LoadImagesAndLabels(Dataset): p = Path(p) # os-agnostic if p.is_dir(): # dir f += glob.glob(str(p / '**' / '*.*'), recursive=True) - # f = list(p.rglob('**/*.*')) # pathlib + # f = list(p.rglob('*.*')) # pathlib elif p.is_file(): # file with open(p, 'r') as t: t = t.read().strip().splitlines() @@ -406,7 +406,7 @@ class LoadImagesAndLabels(Dataset): else: raise Exception(f'{prefix}{p} does not exist') self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in IMG_FORMATS]) - # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in img_formats]) # pathlib + # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib assert self.img_files, f'{prefix}No images found' except Exception as e: raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {HELP_URL}') @@ -866,7 +866,7 @@ def autosplit(path='../datasets/coco128/images', weights=(0.9, 0.1, 0.0), annota annotated_only: Only use images with an annotated txt file """ path = Path(path) # images dir - files = sum([list(path.rglob(f"*.{img_ext}")) for img_ext in IMG_FORMATS], []) # image files only + files = sorted([x for x in path.rglob('*.*') if x.suffix[1:].lower() in IMG_FORMATS]) # image files only n = len(files) # number of files random.seed(0) # for reproducibility indices = random.choices([0, 1, 2], weights=weights, k=n) # assign each image to a split