From 94342acbe18fd6246183bc833390a6184b8831af Mon Sep 17 00:00:00 2001 From: lorenzomammana Date: Thu, 9 Jul 2020 11:52:12 +0200 Subject: [PATCH 1/6] Handle multiple datasets --- utils/datasets.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/utils/datasets.py b/utils/datasets.py index 1ebd70948..d96ae41c6 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -280,16 +280,28 @@ class LoadImagesAndLabels(Dataset): # for training/testing def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_images=False, single_cls=False, stride=32, pad=0.0): try: - path = str(Path(path)) # os-agnostic - parent = str(Path(path).parent) + os.sep - if os.path.isfile(path): # file - with open(path, 'r') as f: - f = f.read().splitlines() - f = [x.replace('./', parent) if x.startswith('./') else x for x in f] # local to global path - elif os.path.isdir(path): # folder - f = glob.iglob(path + os.sep + '*.*') + if type(path) is list: + # Multiple datasets handler + f = [] + for subpath in path: + with open(subpath, 'r') as t: + subpath = str(Path(subpath)) # os-agnostic + parent = str(Path(subpath).parent) + os.sep + t = t.read().splitlines() + t = [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path + f += t + path = str(Path(path[0])) # from now on treat multiple datasets as single else: - raise Exception('%s does not exist' % path) + path = str(Path(path)) # os-agnostic + parent = str(Path(path).parent) + os.sep + if os.path.isfile(path): # file + with open(path, 'r') as f: + f = f.read().splitlines() + f = [x.replace('./', parent) if x.startswith('./') else x for x in f] # local to global path + elif os.path.isdir(path): # folder + f = glob.iglob(path + os.sep + '*.*') + else: + raise Exception('%s does not exist' % path) self.img_files = [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats] except: raise Exception('Error loading data from %s. See %s' % (path, help_url)) From 54a9e4f8764aae695e42d62869563f2a9bbe8ec4 Mon Sep 17 00:00:00 2001 From: lorenzomammana Date: Thu, 9 Jul 2020 19:39:28 +0200 Subject: [PATCH 2/6] Refactor code to reduce duplication --- utils/datasets.py | 33 ++++++++++++++++----------------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/utils/datasets.py b/utils/datasets.py index d96ae41c6..d3bc5747d 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -280,32 +280,31 @@ class LoadImagesAndLabels(Dataset): # for training/testing def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False, cache_images=False, single_cls=False, stride=32, pad=0.0): try: - if type(path) is list: - # Multiple datasets handler - f = [] - for subpath in path: + f = [] + for subpath in path if isinstance(path, list) else [path]: + subpath = str(Path(subpath)) # os-agnostic + parent = str(Path(subpath).parent) + os.sep + if os.path.isfile(subpath): # file with open(subpath, 'r') as t: - subpath = str(Path(subpath)) # os-agnostic - parent = str(Path(subpath).parent) + os.sep t = t.read().splitlines() t = [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path f += t - path = str(Path(path[0])) # from now on treat multiple datasets as single - else: - path = str(Path(path)) # os-agnostic - parent = str(Path(path).parent) + os.sep - if os.path.isfile(path): # file - with open(path, 'r') as f: - f = f.read().splitlines() - f = [x.replace('./', parent) if x.startswith('./') else x for x in f] # local to global path - elif os.path.isdir(path): # folder - f = glob.iglob(path + os.sep + '*.*') + elif os.path.isdir(subpath): # folder + f = glob.iglob(subpath + os.sep + '*.*') + # Maybe change this to f += glob.glob, this should allow handling also multiple folders else: - raise Exception('%s does not exist' % path) + raise Exception('%s does not exist' % subpath) self.img_files = [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats] except: + # Maybe avoid handling bare exceptions raise Exception('Error loading data from %s. See %s' % (path, help_url)) + # Still need to do this for compatibility with the .npy and shape file saves + if isinstance(path, list): + path = str(Path(path[0])) + else: + path = str(Path(path)) + n = len(self.img_files) assert n > 0, 'No images found in %s. See %s' % (path, help_url) bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index From f9bab6b12761015b8bda6625c9bb8ec7ac35ec78 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 9 Jul 2020 13:36:23 -0700 Subject: [PATCH 3/6] Update datasets.py --- utils/datasets.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/utils/datasets.py b/utils/datasets.py index d3bc5747d..0d8852cc9 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -295,16 +295,11 @@ class LoadImagesAndLabels(Dataset): # for training/testing else: raise Exception('%s does not exist' % subpath) self.img_files = [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats] + path = subpath except: # Maybe avoid handling bare exceptions raise Exception('Error loading data from %s. See %s' % (path, help_url)) - # Still need to do this for compatibility with the .npy and shape file saves - if isinstance(path, list): - path = str(Path(path[0])) - else: - path = str(Path(path)) - n = len(self.img_files) assert n > 0, 'No images found in %s. See %s' % (path, help_url) bi = np.floor(np.arange(n) / batch_size).astype(np.int) # batch index From 47bf1730fa9217a2682ef727347520c4e3cba98b Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 9 Jul 2020 13:45:55 -0700 Subject: [PATCH 4/6] Update datasets.py --- utils/datasets.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/utils/datasets.py b/utils/datasets.py index 0d8852cc9..bd42bc091 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -281,21 +281,19 @@ class LoadImagesAndLabels(Dataset): # for training/testing cache_images=False, single_cls=False, stride=32, pad=0.0): try: f = [] - for subpath in path if isinstance(path, list) else [path]: - subpath = str(Path(subpath)) # os-agnostic - parent = str(Path(subpath).parent) + os.sep - if os.path.isfile(subpath): # file - with open(subpath, 'r') as t: + for p in path if isinstance(path, list) else [path]: + p = str(Path(p)) # os-agnostic + parent = str(Path(p).parent) + os.sep + if os.path.isfile(p): # file + with open(p, 'r') as t: t = t.read().splitlines() - t = [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path - f += t - elif os.path.isdir(subpath): # folder - f = glob.iglob(subpath + os.sep + '*.*') - # Maybe change this to f += glob.glob, this should allow handling also multiple folders + f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path + elif os.path.isdir(p): # folder + f += glob.iglob(p + os.sep + '*.*') else: - raise Exception('%s does not exist' % subpath) + raise Exception('%s does not exist' % p) + path = p # *.npy dir self.img_files = [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats] - path = subpath except: # Maybe avoid handling bare exceptions raise Exception('Error loading data from %s. See %s' % (path, help_url)) From 13a09fa68a073dd6504e1f3fd66b7176e5995cf9 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 9 Jul 2020 13:47:20 -0700 Subject: [PATCH 5/6] Update datasets.py --- utils/datasets.py | 1 - 1 file changed, 1 deletion(-) diff --git a/utils/datasets.py b/utils/datasets.py index bd42bc091..c8619f9e9 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -295,7 +295,6 @@ class LoadImagesAndLabels(Dataset): # for training/testing path = p # *.npy dir self.img_files = [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats] except: - # Maybe avoid handling bare exceptions raise Exception('Error loading data from %s. See %s' % (path, help_url)) n = len(self.img_files) From dd33d2ab77bb65845bb12a99fba81fabc03a457d Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Thu, 9 Jul 2020 16:28:20 -0700 Subject: [PATCH 6/6] Update datasets.py --- utils/datasets.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/datasets.py b/utils/datasets.py index c8619f9e9..9777e5c61 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -294,8 +294,8 @@ class LoadImagesAndLabels(Dataset): # for training/testing raise Exception('%s does not exist' % p) path = p # *.npy dir self.img_files = [x.replace('/', os.sep) for x in f if os.path.splitext(x)[-1].lower() in img_formats] - except: - raise Exception('Error loading data from %s. See %s' % (path, help_url)) + except Exception as e: + raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url)) n = len(self.img_files) assert n > 0, 'No images found in %s. See %s' % (path, help_url)