diff --git a/data/objects365.yaml b/data/objects365.yaml index 14464694f..5d19ab5ca 100644 --- a/data/objects365.yaml +++ b/data/objects365.yaml @@ -55,3 +55,43 @@ names: [ 'Person', 'Sneakers', 'Chair', 'Other Shoes', 'Hat', 'Car', 'Lamp', 'Gl 'Scallop', 'Noddles', 'Comb', 'Dumpling', 'Oyster', 'Table Tennis paddle', 'Cosmetics Brush/Eyeliner Pencil', 'Chainsaw', 'Eraser', 'Lobster', 'Durian', 'Okra', 'Lipstick', 'Cosmetics Mirror', 'Curling', 'Table Tennis' ] + +# download command/URL (optional) -------------------------------------------------------------------------------------- +download: | + from pycocotools.coco import COCO + from tqdm import tqdm + + from utils.general import download, Path + + # Make Directories + dir = Path('../datasets/objects365') # dataset directory + for p in 'images', 'labels': + (dir / p).mkdir(parents=True, exist_ok=True) + for q in 'train', 'val': + (dir / p / q).mkdir(parents=True, exist_ok=True) + + # Download + url = "https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/train/" + download([url + 'zhiyuan_objv2_train.tar.gz'], dir=dir) # annotations json + download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train', + curl=True, delete=False, threads=8) + + # Labels + coco = COCO(dir / 'zhiyuan_objv2_train.json') + names = [x["name"] for x in coco.loadCats(coco.getCatIds())] + for cid, cat in enumerate(names): + catIds = coco.getCatIds(catNms=[cat]) + imgIds = coco.getImgIds(catIds=catIds) + for im in tqdm(coco.loadImgs(imgIds), desc=f'Class {cid}/{len(names)} {cat}'): + width, height = im["width"], im["height"] + path = Path(im["file_name"]) # image filename + try: + with open(dir / 'labels' / 'train' / path.with_suffix('.txt').name, 'a') as file: + annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None) + for a in coco.loadAnns(annIds): + x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner) + x, y = x + w / 2, y + h / 2 # xy to center + file.write(f"{cid} {x / width:.5f} {y / height:.5f} {w / width:.5f} {h / height:.5f}\n") + + except Exception as e: + print(e) diff --git a/data/scripts/get_objects365.py b/data/scripts/get_objects365.py deleted file mode 100644 index d77b26d60..000000000 --- a/data/scripts/get_objects365.py +++ /dev/null @@ -1,45 +0,0 @@ -# Objects365 https://www.objects365.org labels JSON to YOLO script -# 1. Download Object 365 from the Object 365 website And unpack all images in datasets/object365/images -# 2. Place this file and zhiyuan_objv2_train.json file in datasets/objects365 -# 3. Execute this file from datasets/object365 path -# /datasets -# /objects365 -# /images -# /labels - - -from pycocotools.coco import COCO - -from utils.general import download, Path - -# Make Directories -dir = Path('../datasets/objects365') # dataset directory -for p in 'images', 'labels': - (dir / p).mkdir(parents=True, exist_ok=True) - for q in 'train', 'val': - (dir / p / q).mkdir(parents=True, exist_ok=True) - -# Download -url = "https://dorc.ks3-cn-beijing.ksyun.com/data-set/2020Objects365%E6%95%B0%E6%8D%AE%E9%9B%86/train/" -download([url + 'zhiyuan_objv2_train.tar.gz'], dir=dir) # annotations json -download([url + f for f in [f'patch{i}.tar.gz' for i in range(51)]], dir=dir / 'images' / 'train', curl=True, threads=8) - -# Labels -coco = COCO(dir / 'zhiyuan_objv2_train.json') -names = [x["name"] for x in coco.loadCats(coco.getCatIds())] -for categoryId, cat in enumerate(names): - catIds = coco.getCatIds(catNms=[cat]) - imgIds = coco.getImgIds(catIds=catIds) - for im in coco.loadImgs(imgIds): - width, height = im["width"], im["height"] - path = Path(im["file_name"]) # image filename - try: - with open(dir / 'labels' / 'train' / path.with_suffix('.txt').name, 'a') as file: - annIds = coco.getAnnIds(imgIds=im["id"], catIds=catIds, iscrowd=None) - for a in coco.loadAnns(annIds): - x, y, w, h = a['bbox'] # bounding box in xywh (xy top-left corner) - x, y = x + w / 2, y + h / 2 # xy to center - file.write(f"{categoryId} {x / width:.5f} {y / height:.5f} {w / width:.5f} {h / height:.5f}\n") - - except Exception as e: - print(e) diff --git a/utils/general.py b/utils/general.py index fd20a7e67..4de240a03 100755 --- a/utils/general.py +++ b/utils/general.py @@ -193,7 +193,7 @@ def check_dataset(dict): raise Exception('Dataset not found.') -def download(url, dir='.', unzip=True, curl=False, threads=1): +def download(url, dir='.', unzip=True, delete=True, curl=False, threads=1): # Multi-threaded file download and unzip function def download_one(url, dir): # Download 1 file @@ -207,9 +207,12 @@ def download(url, dir='.', unzip=True, curl=False, threads=1): if unzip and f.suffix in ('.zip', '.gz'): print(f'Unzipping {f}...') if f.suffix == '.zip': - os.system(f'unzip -qo {f} -d {dir} && rm {f}') # unzip -quiet -overwrite + s = f'unzip -qo {f} -d {dir} && rm {f}' # unzip -quiet -overwrite elif f.suffix == '.gz': - os.system(f'tar xfz {f} --directory {f.parent} && rm {f}') # unzip + s = f'tar xfz {f} --directory {f.parent}' # unzip + if delete: # delete zip file after unzip + s += f' && rm {f}' + os.system(s) dir = Path(dir) dir.mkdir(parents=True, exist_ok=True) # make directory