download() ThreadPool update (#3027)
* download() ThreadPool update * update train image count * cid + 1pull/3030/head
parent
5189b3addb
commit
8cab907f60
|
@ -6,7 +6,7 @@
|
|||
# /yolov5
|
||||
|
||||
# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
|
||||
train: ../datasets/objects365/images/train # 1.7 Million images
|
||||
train: ../datasets/objects365/images/train # 1742289 images
|
||||
val: ../datasets/objects365/images/val # 5570 images
|
||||
|
||||
# number of classes
|
||||
|
@ -72,17 +72,22 @@ download: |
|
|||
|
||||
# Download
|
||||
url = "https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/train/"
|
||||
download([url + 'zhiyuan_objv2_train.tar.gz'], dir=dir) # annotations json
|
||||
download([url + 'zhiyuan_objv2_train.tar.gz'], dir=dir, delete=False) # annotations json
|
||||
download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train',
|
||||
curl=True, delete=False, threads=8)
|
||||
|
||||
# Move
|
||||
train = dir / 'images' / 'train'
|
||||
for f in tqdm(train.rglob('*.jpg'), desc=f'Moving images'):
|
||||
f.rename(train / f.name) # move to /images/train
|
||||
|
||||
# Labels
|
||||
coco = COCO(dir / 'zhiyuan_objv2_train.json')
|
||||
names = [x["name"] for x in coco.loadCats(coco.getCatIds())]
|
||||
for cid, cat in enumerate(names):
|
||||
catIds = coco.getCatIds(catNms=[cat])
|
||||
imgIds = coco.getImgIds(catIds=catIds)
|
||||
for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid}/{len(names)} {cat}'):
|
||||
for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid + 1}/{len(names)} {cat}'):
|
||||
width, height = im["width"], im["height"]
|
||||
path = Path(im["file_name"]) # image filename
|
||||
try:
|
||||
|
|
|
@ -217,7 +217,10 @@ def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1):
|
|||
dir = Path(dir)
|
||||
dir.mkdir(parents=True, exist_ok=True) # make directory
|
||||
if threads > 1:
|
||||
ThreadPool(threads).imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multi-threaded
|
||||
pool = ThreadPool(threads)
|
||||
pool.imap(lambda x: download_one(*x), zip(url, repeat(dir))) # multi-threaded
|
||||
pool.close()
|
||||
pool.join()
|
||||
else:
|
||||
for u in tuple(url) if isinstance(url, str) else url:
|
||||
download_one(u, dir)
|
||||
|
|
Loading…
Reference in New Issue