import argparse
import glob
import os
import os.path as osp
import xml.etree.ElementTree as ET
from functools import partial

import mmcv
import numpy as np
from shapely.geometry import Polygon


def check_ignore_orientation(img_file):
    """Check if the image has orientation information.

    If yes, ignore it by converting the image format to png, otherwise return
    the original filename.

    Args:
        img_file(str): The image path

    Returns:
        The converted image filename with proper postfix
    """
    assert isinstance(img_file, str)
    assert img_file

    # read imgs with ignoring orientations
    img = mmcv.imread(img_file, 'unchanged')
    # read imgs with orientations as dataloader does when training and testing
    img_color = mmcv.imread(img_file, 'color')
    # make sure imgs have no orientations info, or annotation gt is wrong.
    if img.shape[:2] == img_color.shape[:2]:
        return img_file
    else:
        target_file = osp.splitext(img_file)[0] + '.png'
        # read img with ignoring orientation information
        img = mmcv.imread(img_file, 'unchanged')
        mmcv.imwrite(img, target_file)
        os.remove(img_file)
        print(
            f'{img_file} has orientation info. Ingore it by converting to png')
        return target_file


def is_not_png(img_file):
    """Check img_file is not png image.

    Args:
        img_file(str): The input image file name

    Returns:
        The bool flag indicating whether it is not png
    """
    assert isinstance(img_file, str)
    assert img_file

    suffix = osp.splitext(img_file)[1]

    return (suffix not in ['.PNG', '.png'])


def collect_files(img_dir, gt_dir, split):
    """Collect all images and their corresponding groundtruth files.

    Args:
        img_dir(str): The image directory
        gt_dir(str): The groundtruth directory
        split(str): The split of dataset. Namely: training or test

    Returns:
        files(list): The list of tuples (img_file, groundtruth_file)
    """
    assert isinstance(img_dir, str)
    assert img_dir
    assert isinstance(gt_dir, str)
    assert gt_dir

    # note that we handle png and jpg only. Pls convert others such as gif to
    # jpg or png offline
    suffixes = ['.png', '.PNG', '.jpg', '.JPG', '.jpeg', '.JPEG']
    # suffixes = ['.png']

    imgs_list = []
    for suffix in suffixes:
        imgs_list.extend(glob.glob(osp.join(img_dir, '*' + suffix)))

    imgs_list = [
        check_ignore_orientation(f) if is_not_png(f) else f for f in imgs_list
    ]

    files = []
    if split == 'training':
        for img_file in imgs_list:
            gt_file = gt_dir + '/' + osp.splitext(
                osp.basename(img_file))[0] + '.xml'
            files.append((img_file, gt_file))
        assert len(files), f'No images found in {img_dir}'
        print(f'Loaded {len(files)} images from {img_dir}')
    elif split == 'test':
        for img_file in imgs_list:
            gt_file = gt_dir + '/000' + osp.splitext(
                osp.basename(img_file))[0] + '.txt'
            files.append((img_file, gt_file))
        assert len(files), f'No images found in {img_dir}'
        print(f'Loaded {len(files)} images from {img_dir}')

    return files


def collect_annotations(files, split, nproc=1):
    """Collect the annotation information.

    Args:
        files(list): The list of tuples (image_file, groundtruth_file)
        split(str): The split of dataset. Namely: training or test
        nproc(int): The number of process to collect annotations

    Returns:
        images(list): The list of image information dicts
    """
    assert isinstance(files, list)
    assert isinstance(split, str)
    assert isinstance(nproc, int)

    load_img_info_with_split = partial(load_img_info, split=split)
    if nproc > 1:
        images = mmcv.track_parallel_progress(
            load_img_info_with_split, files, nproc=nproc)
    else:
        images = mmcv.track_progress(load_img_info_with_split, files)

    return images


def load_txt_info(gt_file, img_info):
    with open(gt_file) as f:
        gt_list = f.readlines()

    anno_info = []
    for line in gt_list:
        # each line has one ploygen (n vetices), and one text.
        # e.g., 695,885,866,888,867,1146,696,1143,####Latin 9
        line = line.strip()
        strs = line.split(',')
        category_id = 1
        assert strs[28][0] == '#'
        xy = [int(x) for x in strs[0:28]]
        assert len(xy) == 28
        coordinates = np.array(xy).reshape(-1, 2)
        polygon = Polygon(coordinates)
        iscrowd = 0
        area = polygon.area
        # convert to COCO style XYWH format
        minx, miny, maxx, maxy = polygon.bounds
        bbox = [minx, miny, maxx - minx, maxy - miny]

        anno = dict(
            iscrowd=iscrowd,
            category_id=category_id,
            bbox=bbox,
            area=area,
            segmentation=[xy])
        anno_info.append(anno)
    img_info.update(anno_info=anno_info)
    return img_info


def load_xml_info(gt_file, img_info):

    obj = ET.parse(gt_file)
    anno_info = []
    for image in obj.getroot():  # image
        for box in image:  # image
            h = box.attrib['height']
            w = box.attrib['width']
            x = box.attrib['left']
            y = box.attrib['top']
            # label = box[0].text
            segs = box[1].text
            pts = segs.strip().split(',')
            pts = [int(x) for x in pts]
            assert len(pts) == 28
            # pts = []
            # for iter in range(2,len(box)):
            #    pts.extend([int(box[iter].attrib['x']),
            #  int(box[iter].attrib['y'])])
            iscrowd = 0
            category_id = 1
            bbox = [int(x), int(y), int(w), int(h)]

            coordinates = np.array(pts).reshape(-1, 2)
            polygon = Polygon(coordinates)
            area = polygon.area
            anno = dict(
                iscrowd=iscrowd,
                category_id=category_id,
                bbox=bbox,
                area=area,
                segmentation=[pts])
            anno_info.append(anno)

    img_info.update(anno_info=anno_info)

    return img_info


def load_img_info(files, split):
    """Load the information of one image.

    Args:
        files(tuple): The tuple of (img_file, groundtruth_file)
        split(str): The split of dataset: training or test

    Returns:
        img_info(dict): The dict of the img and annotation information
    """
    assert isinstance(files, tuple)
    assert isinstance(split, str)

    img_file, gt_file = files
    # read imgs with ignoring orientations
    img = mmcv.imread(img_file, 'unchanged')
    # read imgs with orientations as dataloader does when training and testing
    img_color = mmcv.imread(img_file, 'color')
    # make sure imgs have no orientations info, or annotation gt is wrong.
    assert img.shape[0:2] == img_color.shape[0:2]

    split_name = osp.basename(osp.dirname(img_file))
    img_info = dict(
        # remove img_prefix for filename
        file_name=osp.join(split_name, osp.basename(img_file)),
        height=img.shape[0],
        width=img.shape[1],
        # anno_info=anno_info,
        segm_file=osp.join(split_name, osp.basename(gt_file)))

    if split == 'training':
        img_info = load_xml_info(gt_file, img_info)
    elif split == 'test':
        img_info = load_txt_info(gt_file, img_info)
    else:
        raise NotImplementedError

    return img_info


def convert_annotations(image_infos, out_json_name):
    """Convert the annotation into coco style.

    Args:
        image_infos(list): The list of image information dicts
        out_json_name(str): The output json filename

    Returns:
        out_json(dict): The coco style dict
    """
    assert isinstance(image_infos, list)
    assert isinstance(out_json_name, str)
    assert out_json_name

    out_json = dict()
    img_id = 0
    ann_id = 0
    out_json['images'] = []
    out_json['categories'] = []
    out_json['annotations'] = []
    for image_info in image_infos:
        image_info['id'] = img_id
        anno_infos = image_info.pop('anno_info')
        out_json['images'].append(image_info)
        for anno_info in anno_infos:
            anno_info['image_id'] = img_id
            anno_info['id'] = ann_id
            out_json['annotations'].append(anno_info)
            ann_id += 1
            # if image_info['file_name'].find('png'):
            #    img = mmcv.imread('data/ctw1500/imgs/'+
            #        image_info['file_name'], 'color')
            #    show_img_boundary(img, anno_info['segmentation'] )
        img_id += 1
        print(img_id)
    cat = dict(id=1, name='text')
    out_json['categories'].append(cat)

    if len(out_json['annotations']) == 0:
        out_json.pop('annotations')

    mmcv.dump(out_json, out_json_name)

    return out_json


def parse_args():
    parser = argparse.ArgumentParser(
        description='Convert ctw1500 annotations to COCO format')
    parser.add_argument('root_path', help='ctw1500 root path')
    parser.add_argument('-o', '--out-dir', help='output path')
    parser.add_argument(
        '--split-list',
        nargs='+',
        help='a list of splits. e.g., "--split_list training test"')

    parser.add_argument(
        '--nproc', default=1, type=int, help='number of process')
    args = parser.parse_args()
    return args


def main():
    args = parse_args()
    root_path = args.root_path
    out_dir = args.out_dir if args.out_dir else root_path
    mmcv.mkdir_or_exist(out_dir)

    img_dir = osp.join(root_path, 'imgs')
    gt_dir = osp.join(root_path, 'annotations')

    set_name = {}
    for split in args.split_list:
        set_name.update({split: 'instances_' + split + '.json'})
        assert osp.exists(osp.join(img_dir, split))

    for split, json_name in set_name.items():
        print(f'Converting {split} into {json_name}')
        with mmcv.Timer(print_tmpl='It takes {}s to convert icdar annotation'):
            files = collect_files(
                osp.join(img_dir, split), osp.join(gt_dir, split), split)
            image_infos = collect_annotations(files, split, nproc=args.nproc)
            convert_annotations(image_infos, osp.join(out_dir, json_name))


if __name__ == '__main__':
    main()