import argparse import glob import os import os.path as osp import xml.etree.ElementTree as ET from functools import partial import mmcv import numpy as np from shapely.geometry import Polygon def check_ignore_orientation(img_file): """Check if the image has orientation information. If yes, ignore it by converting the image format to png, otherwise return the original filename. Args: img_file(str): The image path Returns: The converted image filename with proper postfix """ assert isinstance(img_file, str) assert img_file # read imgs with ignoring orientations img = mmcv.imread(img_file, 'unchanged') # read imgs with orientations as dataloader does when training and testing img_color = mmcv.imread(img_file, 'color') # make sure imgs have no orientations info, or annotation gt is wrong. if img.shape[:2] == img_color.shape[:2]: return img_file else: target_file = osp.splitext(img_file)[0] + '.png' # read img with ignoring orientation information img = mmcv.imread(img_file, 'unchanged') mmcv.imwrite(img, target_file) os.remove(img_file) print( f'{img_file} has orientation info. Ingore it by converting to png') return target_file def is_not_png(img_file): """Check img_file is not png image. Args: img_file(str): The input image file name Returns: The bool flag indicating whether it is not png """ assert isinstance(img_file, str) assert img_file suffix = osp.splitext(img_file)[1] return (suffix not in ['.PNG', '.png']) def collect_files(img_dir, gt_dir, split): """Collect all images and their corresponding groundtruth files. Args: img_dir(str): The image directory gt_dir(str): The groundtruth directory split(str): The split of dataset. Namely: training or test Returns: files(list): The list of tuples (img_file, groundtruth_file) """ assert isinstance(img_dir, str) assert img_dir assert isinstance(gt_dir, str) assert gt_dir # note that we handle png and jpg only. Pls convert others such as gif to # jpg or png offline suffixes = ['.png', '.PNG', '.jpg', '.JPG', '.jpeg', '.JPEG'] # suffixes = ['.png'] imgs_list = [] for suffix in suffixes: imgs_list.extend(glob.glob(osp.join(img_dir, '*' + suffix))) imgs_list = [ check_ignore_orientation(f) if is_not_png(f) else f for f in imgs_list ] files = [] if split == 'training': for img_file in imgs_list: gt_file = gt_dir + '/' + osp.splitext( osp.basename(img_file))[0] + '.xml' files.append((img_file, gt_file)) assert len(files), f'No images found in {img_dir}' print(f'Loaded {len(files)} images from {img_dir}') elif split == 'test': for img_file in imgs_list: gt_file = gt_dir + '/000' + osp.splitext( osp.basename(img_file))[0] + '.txt' files.append((img_file, gt_file)) assert len(files), f'No images found in {img_dir}' print(f'Loaded {len(files)} images from {img_dir}') return files def collect_annotations(files, split, nproc=1): """Collect the annotation information. Args: files(list): The list of tuples (image_file, groundtruth_file) split(str): The split of dataset. Namely: training or test nproc(int): The number of process to collect annotations Returns: images(list): The list of image information dicts """ assert isinstance(files, list) assert isinstance(split, str) assert isinstance(nproc, int) load_img_info_with_split = partial(load_img_info, split=split) if nproc > 1: images = mmcv.track_parallel_progress( load_img_info_with_split, files, nproc=nproc) else: images = mmcv.track_progress(load_img_info_with_split, files) return images def load_txt_info(gt_file, img_info): with open(gt_file) as f: gt_list = f.readlines() anno_info = [] for line in gt_list: # each line has one ploygen (n vetices), and one text. # e.g., 695,885,866,888,867,1146,696,1143,####Latin 9 line = line.strip() strs = line.split(',') category_id = 1 assert strs[28][0] == '#' xy = [int(x) for x in strs[0:28]] assert len(xy) == 28 coordinates = np.array(xy).reshape(-1, 2) polygon = Polygon(coordinates) iscrowd = 0 area = polygon.area # convert to COCO style XYWH format minx, miny, maxx, maxy = polygon.bounds bbox = [minx, miny, maxx - minx, maxy - miny] anno = dict( iscrowd=iscrowd, category_id=category_id, bbox=bbox, area=area, segmentation=[xy]) anno_info.append(anno) img_info.update(anno_info=anno_info) return img_info def load_xml_info(gt_file, img_info): obj = ET.parse(gt_file) anno_info = [] for image in obj.getroot(): # image for box in image: # image h = box.attrib['height'] w = box.attrib['width'] x = box.attrib['left'] y = box.attrib['top'] # label = box[0].text segs = box[1].text pts = segs.strip().split(',') pts = [int(x) for x in pts] assert len(pts) == 28 # pts = [] # for iter in range(2,len(box)): # pts.extend([int(box[iter].attrib['x']), # int(box[iter].attrib['y'])]) iscrowd = 0 category_id = 1 bbox = [int(x), int(y), int(w), int(h)] coordinates = np.array(pts).reshape(-1, 2) polygon = Polygon(coordinates) area = polygon.area anno = dict( iscrowd=iscrowd, category_id=category_id, bbox=bbox, area=area, segmentation=[pts]) anno_info.append(anno) img_info.update(anno_info=anno_info) return img_info def load_img_info(files, split): """Load the information of one image. Args: files(tuple): The tuple of (img_file, groundtruth_file) split(str): The split of dataset: training or test Returns: img_info(dict): The dict of the img and annotation information """ assert isinstance(files, tuple) assert isinstance(split, str) img_file, gt_file = files # read imgs with ignoring orientations img = mmcv.imread(img_file, 'unchanged') # read imgs with orientations as dataloader does when training and testing img_color = mmcv.imread(img_file, 'color') # make sure imgs have no orientations info, or annotation gt is wrong. assert img.shape[0:2] == img_color.shape[0:2] split_name = osp.basename(osp.dirname(img_file)) img_info = dict( # remove img_prefix for filename file_name=osp.join(split_name, osp.basename(img_file)), height=img.shape[0], width=img.shape[1], # anno_info=anno_info, segm_file=osp.join(split_name, osp.basename(gt_file))) if split == 'training': img_info = load_xml_info(gt_file, img_info) elif split == 'test': img_info = load_txt_info(gt_file, img_info) else: raise NotImplementedError return img_info def convert_annotations(image_infos, out_json_name): """Convert the annotation into coco style. Args: image_infos(list): The list of image information dicts out_json_name(str): The output json filename Returns: out_json(dict): The coco style dict """ assert isinstance(image_infos, list) assert isinstance(out_json_name, str) assert out_json_name out_json = dict() img_id = 0 ann_id = 0 out_json['images'] = [] out_json['categories'] = [] out_json['annotations'] = [] for image_info in image_infos: image_info['id'] = img_id anno_infos = image_info.pop('anno_info') out_json['images'].append(image_info) for anno_info in anno_infos: anno_info['image_id'] = img_id anno_info['id'] = ann_id out_json['annotations'].append(anno_info) ann_id += 1 # if image_info['file_name'].find('png'): # img = mmcv.imread('data/ctw1500/imgs/'+ # image_info['file_name'], 'color') # show_img_boundary(img, anno_info['segmentation'] ) img_id += 1 print(img_id) cat = dict(id=1, name='text') out_json['categories'].append(cat) if len(out_json['annotations']) == 0: out_json.pop('annotations') mmcv.dump(out_json, out_json_name) return out_json def parse_args(): parser = argparse.ArgumentParser( description='Convert ctw1500 annotations to COCO format') parser.add_argument('root_path', help='ctw1500 root path') parser.add_argument('-o', '--out-dir', help='output path') parser.add_argument( '--split-list', nargs='+', help='a list of splits. e.g., "--split_list training test"') parser.add_argument( '--nproc', default=1, type=int, help='number of process') args = parser.parse_args() return args def main(): args = parse_args() root_path = args.root_path out_dir = args.out_dir if args.out_dir else root_path mmcv.mkdir_or_exist(out_dir) img_dir = osp.join(root_path, 'imgs') gt_dir = osp.join(root_path, 'annotations') set_name = {} for split in args.split_list: set_name.update({split: 'instances_' + split + '.json'}) assert osp.exists(osp.join(img_dir, split)) for split, json_name in set_name.items(): print(f'Converting {split} into {json_name}') with mmcv.Timer(print_tmpl='It takes {}s to convert icdar annotation'): files = collect_files( osp.join(img_dir, split), osp.join(gt_dir, split), split) image_infos = collect_annotations(files, split, nproc=args.nproc) convert_annotations(image_infos, osp.join(out_dir, json_name)) if __name__ == '__main__': main()