111 lines
3.7 KiB
Python
111 lines
3.7 KiB
Python
# Copyright (c) OpenMMLab. All rights reserved.
|
|
import argparse
|
|
import os
|
|
import os.path as osp
|
|
import tempfile
|
|
import zipfile
|
|
|
|
import mmcv
|
|
import numpy as np
|
|
from mmengine.utils import mkdir_or_exist
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(
|
|
description='Convert REFUGE dataset to mmsegmentation format')
|
|
parser.add_argument('--raw_data_root', help='the root path of raw data')
|
|
|
|
parser.add_argument('--tmp_dir', help='path of the temporary directory')
|
|
parser.add_argument('-o', '--out_dir', help='output path')
|
|
args = parser.parse_args()
|
|
return args
|
|
|
|
|
|
def extract_img(root: str,
|
|
cur_dir: str,
|
|
out_dir: str,
|
|
mode: str = 'train',
|
|
file_type: str = 'img') -> None:
|
|
"""_summary_
|
|
|
|
Args:
|
|
Args:
|
|
root (str): root where the extracted data is saved
|
|
cur_dir (cur_dir): dir where the zip_file exists
|
|
out_dir (str): root dir where the data is saved
|
|
|
|
mode (str, optional): Defaults to 'train'.
|
|
file_type (str, optional): Defaults to 'img',else to 'mask'.
|
|
"""
|
|
zip_file = zipfile.ZipFile(cur_dir)
|
|
zip_file.extractall(root)
|
|
for cur_dir, dirs, files in os.walk(root):
|
|
# filter child dirs and directories with "Illustration" and "MACOSX"
|
|
if len(dirs) == 0 and \
|
|
cur_dir.split('\\')[-1].find('Illustration') == -1 and \
|
|
cur_dir.find('MACOSX') == -1:
|
|
|
|
file_names = [
|
|
file for file in files
|
|
if file.endswith('.jpg') or file.endswith('.bmp')
|
|
]
|
|
for filename in sorted(file_names):
|
|
img = mmcv.imread(osp.join(cur_dir, filename))
|
|
|
|
if file_type == 'annotations':
|
|
img = img[:, :, 0]
|
|
img[np.where(img == 0)] = 1
|
|
img[np.where(img == 128)] = 2
|
|
img[np.where(img == 255)] = 0
|
|
mmcv.imwrite(
|
|
img,
|
|
osp.join(out_dir, file_type, mode,
|
|
osp.splitext(filename)[0] + '.png'))
|
|
|
|
|
|
def main():
|
|
args = parse_args()
|
|
|
|
raw_data_root = args.raw_data_root
|
|
if args.out_dir is None:
|
|
out_dir = osp.join('./data', 'REFUGE')
|
|
|
|
else:
|
|
out_dir = args.out_dir
|
|
|
|
print('Making directories...')
|
|
mkdir_or_exist(out_dir)
|
|
mkdir_or_exist(osp.join(out_dir, 'images'))
|
|
mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
|
|
mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
|
|
mkdir_or_exist(osp.join(out_dir, 'images', 'test'))
|
|
mkdir_or_exist(osp.join(out_dir, 'annotations'))
|
|
mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
|
|
mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))
|
|
mkdir_or_exist(osp.join(out_dir, 'annotations', 'test'))
|
|
|
|
print('Generating images and annotations...')
|
|
# process data from the child dir on the first rank
|
|
cur_dir, dirs, files = list(os.walk(raw_data_root))[0]
|
|
print('====================')
|
|
|
|
files = list(filter(lambda x: x.endswith('.zip'), files))
|
|
|
|
with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
|
|
for file in files:
|
|
# search data folders for training,validation,test
|
|
mode = list(
|
|
filter(lambda x: file.lower().find(x) != -1,
|
|
['training', 'test', 'validation']))[0]
|
|
file_root = osp.join(tmp_dir, file[:-4])
|
|
file_type = 'images' if file.find('Anno') == -1 and file.find(
|
|
'GT') == -1 else 'annotations'
|
|
extract_img(file_root, osp.join(cur_dir, file), out_dir, mode,
|
|
file_type)
|
|
|
|
print('Done!')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|