mmsegmentation/tools/dataset_converters/stare.py

# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import gzip
import os
import os.path as osp
import tarfile
import tempfile

import mmcv
from mmengine.utils import mkdir_or_exist

STARE_LEN = 20
TRAINING_LEN = 10


def un_gz(src, dst):
    g_file = gzip.GzipFile(src)
    with open(dst, 'wb+') as f:
        f.write(g_file.read())
    g_file.close()


def parse_args():
    parser = argparse.ArgumentParser(
        description='Convert STARE dataset to mmsegmentation format')
    parser.add_argument('image_path', help='the path of stare-images.tar')
    parser.add_argument('labels_ah', help='the path of labels-ah.tar')
    parser.add_argument('labels_vk', help='the path of labels-vk.tar')
    parser.add_argument('--tmp_dir', help='path of the temporary directory')
    parser.add_argument('-o', '--out_dir', help='output path')
    args = parser.parse_args()
    return args


def main():
    args = parse_args()
    image_path = args.image_path
    labels_ah = args.labels_ah
    labels_vk = args.labels_vk
    if args.out_dir is None:
        out_dir = osp.join('data', 'STARE')
    else:
        out_dir = args.out_dir

    print('Making directories...')
    mkdir_or_exist(out_dir)
    mkdir_or_exist(osp.join(out_dir, 'images'))
    mkdir_or_exist(osp.join(out_dir, 'images', 'training'))
    mkdir_or_exist(osp.join(out_dir, 'images', 'validation'))
    mkdir_or_exist(osp.join(out_dir, 'annotations'))
    mkdir_or_exist(osp.join(out_dir, 'annotations', 'training'))
    mkdir_or_exist(osp.join(out_dir, 'annotations', 'validation'))

    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
        mkdir_or_exist(osp.join(tmp_dir, 'gz'))
        mkdir_or_exist(osp.join(tmp_dir, 'files'))

        print('Extracting stare-images.tar...')
        with tarfile.open(image_path) as f:
            f.extractall(osp.join(tmp_dir, 'gz'))

        for filename in os.listdir(osp.join(tmp_dir, 'gz')):
            un_gz(
                osp.join(tmp_dir, 'gz', filename),
                osp.join(tmp_dir, 'files',
                         osp.splitext(filename)[0]))

        now_dir = osp.join(tmp_dir, 'files')

        assert len(os.listdir(now_dir)) == STARE_LEN, \
            f'len(os.listdir(now_dir)) != {STARE_LEN}'

        for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
            img = mmcv.imread(osp.join(now_dir, filename))
            mmcv.imwrite(
                img,
                osp.join(out_dir, 'images', 'training',
                         osp.splitext(filename)[0] + '.png'))

        for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]:
            img = mmcv.imread(osp.join(now_dir, filename))
            mmcv.imwrite(
                img,
                osp.join(out_dir, 'images', 'validation',
                         osp.splitext(filename)[0] + '.png'))

        print('Removing the temporary files...')

    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
        mkdir_or_exist(osp.join(tmp_dir, 'gz'))
        mkdir_or_exist(osp.join(tmp_dir, 'files'))

        print('Extracting labels-ah.tar...')
        with tarfile.open(labels_ah) as f:
            f.extractall(osp.join(tmp_dir, 'gz'))

        for filename in os.listdir(osp.join(tmp_dir, 'gz')):
            un_gz(
                osp.join(tmp_dir, 'gz', filename),
                osp.join(tmp_dir, 'files',
                         osp.splitext(filename)[0]))

        now_dir = osp.join(tmp_dir, 'files')

        assert len(os.listdir(now_dir)) == STARE_LEN, \
            f'len(os.listdir(now_dir)) != {STARE_LEN}'

        for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
            img = mmcv.imread(osp.join(now_dir, filename))
            # The annotation img should be divided by 128, because some of
            # the annotation imgs are not standard. We should set a threshold
            # to convert the nonstandard annotation imgs. The value divided by
            # 128 equivalent to '1 if value >= 128 else 0'
            mmcv.imwrite(
                img[:, :, 0] // 128,
                osp.join(out_dir, 'annotations', 'training',
                         osp.splitext(filename)[0] + '.png'))

        for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]:
            img = mmcv.imread(osp.join(now_dir, filename))
            mmcv.imwrite(
                img[:, :, 0] // 128,
                osp.join(out_dir, 'annotations', 'validation',
                         osp.splitext(filename)[0] + '.png'))

        print('Removing the temporary files...')

    with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir:
        mkdir_or_exist(osp.join(tmp_dir, 'gz'))
        mkdir_or_exist(osp.join(tmp_dir, 'files'))

        print('Extracting labels-vk.tar...')
        with tarfile.open(labels_vk) as f:
            f.extractall(osp.join(tmp_dir, 'gz'))

        for filename in os.listdir(osp.join(tmp_dir, 'gz')):
            un_gz(
                osp.join(tmp_dir, 'gz', filename),
                osp.join(tmp_dir, 'files',
                         osp.splitext(filename)[0]))

        now_dir = osp.join(tmp_dir, 'files')

        assert len(os.listdir(now_dir)) == STARE_LEN, \
            f'len(os.listdir(now_dir)) != {STARE_LEN}'

        for filename in sorted(os.listdir(now_dir))[:TRAINING_LEN]:
            img = mmcv.imread(osp.join(now_dir, filename))
            mmcv.imwrite(
                img[:, :, 0] // 128,
                osp.join(out_dir, 'annotations', 'training',
                         osp.splitext(filename)[0] + '.png'))

        for filename in sorted(os.listdir(now_dir))[TRAINING_LEN:]:
            img = mmcv.imread(osp.join(now_dir, filename))
            mmcv.imwrite(
                img[:, :, 0] // 128,
                osp.join(out_dir, 'annotations', 'validation',
                         osp.splitext(filename)[0] + '.png'))

        print('Removing the temporary files...')

    print('Done!')


if __name__ == '__main__':
    main()