From 4798e66fdf624764f669586982fc095346a50315 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 23 Nov 2020 17:18:21 +0100 Subject: [PATCH] Autosplit (#1488) --- utils/datasets.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/utils/datasets.py b/utils/datasets.py index 7466ba48b..eb355e913 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -902,3 +902,20 @@ def flatten_recursive(path='../coco128'): create_folder(new_path) for file in tqdm(glob.glob(str(Path(path)) + '/**/*.*', recursive=True)): shutil.copyfile(file, new_path / Path(file).name) + + +def autosplit(path='../coco128', weights=(0.9, 0.1, 0.0)): # from utils.datasets import *; autosplit() + """ Autosplit a dataset into train/val/test splits and save *.txt files + # Arguments + path: Path to images directory + weights: Train, val, test weights (list) + """ + path = Path(path) # images dir + files = list(path.rglob('*.*')) + indices = random.choices([0, 1, 2], weights=weights, k=len(files)) # assign each image to a split + txt = ['autosplit_train.txt', 'autosplit_val.txt', 'autosplit_test.txt'] # 3 txt files + [(path / x).unlink() for x in txt if (path / x).exists()] # remove existing + for i, img in tqdm(zip(indices, files)): + if img.suffix[1:] in img_formats: + with open(path / txt[i], 'a') as f: + f.write(str(img) + '\n') # add image to txt file