Multi-threaded VisDrone and VOC downloads (#7108)

* Multi-threaded VOC download * Update VOC.yaml * Update * Update general.py * Update general.py
2025-06-03 14:49:29 +08:00 · 2022-03-23 01:19:37 +01:00 · 2022-03-23 01:19:37 +01:00 · c3ae4e4af6
commit c3ae4e4af6
parent ecc2c7ba73
7 changed files with 13 additions and 6 deletions
--- a/data/GlobalWheat2020.yaml
+++ b/data/GlobalWheat2020.yaml
@ -34,6 +34,7 @@ names: ['wheat_head']  # class names
 download: |
  from utils.general import download, Path
  # Download
  dir = Path(yaml['path'])  # dataset root dir
  urls = ['https://zenodo.org/record/4298502/files/global-wheat-codalab-official.zip',
--- a/data/Objects365.yaml
+++ b/data/Objects365.yaml
@ -65,6 +65,7 @@ download: |
  from utils.general import Path, download, np, xyxy2xywhn
  # Make Directories
  dir = Path(yaml['path'])  # dataset root dir
  for p in 'images', 'labels':
--- a/data/SKU-110K.yaml
+++ b/data/SKU-110K.yaml
@ -24,6 +24,7 @@ download: |
  from tqdm import tqdm
  from utils.general import np, pd, Path, download, xyxy2xywh
  # Download
  dir = Path(yaml['path'])  # dataset root dir
  parent = Path(dir.parent)  # download dir
--- a/data/VOC.yaml
+++ b/data/VOC.yaml
@ -62,7 +62,7 @@ download: |
  urls = [url + 'VOCtrainval_06-Nov-2007.zip',  # 446MB, 5012 images
          url + 'VOCtest_06-Nov-2007.zip',  # 438MB, 4953 images
          url + 'VOCtrainval_11-May-2012.zip']  # 1.95GB, 17126 images
-  download(urls, dir=dir / 'images', delete=False)
+  download(urls, dir=dir / 'images', delete=False, threads=3)
  # Convert
  path = dir / f'images/VOCdevkit'
--- a/data/VisDrone.yaml
+++ b/data/VisDrone.yaml
@ -54,7 +54,7 @@ download: |
          'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-val.zip',
          'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-dev.zip',
          'https://github.com/ultralytics/yolov5/releases/download/v1.0/VisDrone2019-DET-test-challenge.zip']
-  download(urls, dir=dir)
+  download(urls, dir=dir, threads=4)
  # Convert
  for d in 'VisDrone2019-DET-train', 'VisDrone2019-DET-val', 'VisDrone2019-DET-test-dev':
--- a/data/coco.yaml
+++ b/data/coco.yaml
@ -30,6 +30,7 @@ names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 't
 download: |
  from utils.general import download, Path
  # Download labels
  segments = False  # segment or box labels
  dir = Path(yaml['path'])  # dataset root dir
--- a/utils/general.py
+++ b/utils/general.py
@ -449,8 +449,9 @@ def check_dataset(data, autodownload=True):
    if val:
        val = [Path(x).resolve() for x in (val if isinstance(val, list) else [val])]  # val path
        if not all(x.exists() for x in val):
-            LOGGER.info('\nDataset not found, missing paths: %s' % [str(x) for x in val if not x.exists()])
+            LOGGER.info(emojis('\nDataset not found ⚠️, missing paths %s' % [str(x) for x in val if not x.exists()]))
            if s and autodownload:  # download script
                t = time.time()
                root = path.parent if 'path' in data else '..'  # unzip directory i.e. '../'
                if s.startswith('http') and s.endswith('.zip'):  # URL
                    f = Path(s).name  # filename
@ -465,9 +466,11 @@ def check_dataset(data, autodownload=True):
                    r = os.system(s)
                else:  # python script
                    r = exec(s, {'yaml': data})  # return None
-                LOGGER.info(f"Dataset autodownload {f'success, saved to {root}' if r in (0, None) else 'failure'}\n")
+                dt = f'({round(time.time() - t, 1)}s)'
                s = f"success ✅ {dt}, saved to {colorstr('bold', root)}" if r in (0, None) else f"failure {dt} ❌"
                LOGGER.info(emojis(f"Dataset download {s}"))
            else:
-                raise Exception('Dataset not found.')
+                raise Exception(emojis('Dataset not found ❌'))
    return data  # dictionary
@ -491,7 +494,7 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
            if curl:
                os.system(f"curl -L '{url}' -o '{f}' --retry 9 -C -")  # curl download, retry and resume on fail
            else:
-                torch.hub.download_url_to_file(url, f, progress=True)  # torch download
+                torch.hub.download_url_to_file(url, f, progress=threads == 1)  # torch download
        if unzip and f.suffix in ('.zip', '.gz'):
            LOGGER.info(f'Unzipping {f}...')
            if f.suffix == '.zip':