diff --git a/dataset_zoo/svt/metafile.yml b/dataset_zoo/svt/metafile.yml new file mode 100644 index 00000000..3636fd9b --- /dev/null +++ b/dataset_zoo/svt/metafile.yml @@ -0,0 +1,29 @@ +Name: 'Street View Text Dataset (SVT)' +Paper: + Title: Word Spotting in the Wild + URL: https://link.springer.com/content/pdf/10.1007/978-3-642-15549-9_43.pdf + Venue: ECCV + Year: '2010' + BibTeX: '@inproceedings{wang2010word, + title={Word spotting in the wild}, + author={Wang, Kai and Belongie, Serge}, + booktitle={European conference on computer vision}, + pages={591--604}, + year={2010}, + organization={Springer}}' +Data: + Website: http://www.iapr-tc11.org/mediawiki/index.php/The_Street_View_Text_Dataset + Language: + - English + Scene: + - Natural Scene + Granularity: + - Word + Tasks: + - textdet + - textrecog + - textspotting + License: + Type: N/A + Link: N/A + Format: .xml diff --git a/dataset_zoo/svt/sample_anno.md b/dataset_zoo/svt/sample_anno.md new file mode 100644 index 00000000..cc0e40d2 --- /dev/null +++ b/dataset_zoo/svt/sample_anno.md @@ -0,0 +1,23 @@ +**Text Detection/Recognition/Spotting** + +```xml + + img/14_03.jpg +
341 Southwest 10th Avenue Portland OR
+ + LIVING,ROOM,THEATERS,KENNY,ZUKE,DELICATESSEN,CLYDE,COMMON,ACE,HOTEL,PORTLAND,ROSE,CITY,BOOKS,STUMPTOWN,COFFEE,ROASTERS,RED,CAP,GARAGE,FISH,GROTTO,SEAFOOD,RESTAURANT,AURA,RESTAURANT,LOUNGE,ROCCO,PIZZA,PASTA,BUFFALO,EXCHANGE,MARK,SPENCER,LIGHT,FEZ,BALLROOM,READING,FRENZY,ROXY,SCANDALS,MARTINOTTI,CAFE,DELI,CROWSENBERG,HALF + + + + + LIVING + + + ROOM + + + THEATERS + + + +``` diff --git a/dataset_zoo/svt/textdet.py b/dataset_zoo/svt/textdet.py new file mode 100644 index 00000000..ab95ce32 --- /dev/null +++ b/dataset_zoo/svt/textdet.py @@ -0,0 +1,27 @@ +data_root = 'data/svt' +cache_path = 'data/cache' + +data_obtainer = dict( + type='NaiveDataObtainer', + cache_path=cache_path, + data_root=data_root, + files=[ + dict( + url='http://www.iapr-tc11.org/dataset/SVT/svt.zip', + save_name='svt.zip', + md5='42d19160010d990ae6223b14f45eff88', + split=['train', 'test'], + content=['image', 'annotations'], + mapping=[['svt/svt1/train.xml', 'annotations/train.xml'], + ['svt/svt1/test.xml', 'annotations/test.xml'], + ['svt/svt1/img', 'textdet_imgs/img']]), + ]) + +data_converter = dict( + type='TextDetDataConverter', + splits=['train', 'test'], + data_root=data_root, + gatherer=dict(type='mono_gather', mapping="f'{split}.xml'"), + parser=dict(type='SVTTextDetAnnParser', data_root=data_root), + dumper=dict(type='JsonDumper'), + delete=['annotations', 'svt']) diff --git a/dataset_zoo/svt/textrecog.py b/dataset_zoo/svt/textrecog.py new file mode 100644 index 00000000..e18f2f1f --- /dev/null +++ b/dataset_zoo/svt/textrecog.py @@ -0,0 +1,3 @@ +_base_ = ['textdet.py'] + +data_converter = dict(type='TextRecogCropConverter') diff --git a/dataset_zoo/svt/textspotting.py b/dataset_zoo/svt/textspotting.py new file mode 100644 index 00000000..413de5e8 --- /dev/null +++ b/dataset_zoo/svt/textspotting.py @@ -0,0 +1,3 @@ +_base_ = ['textdet.py'] + +data_converter = dict(type='TextSpottingDataConverter') diff --git a/mmocr/datasets/preparers/parsers/__init__.py b/mmocr/datasets/preparers/parsers/__init__.py index 83681eab..16ad41a0 100644 --- a/mmocr/datasets/preparers/parsers/__init__.py +++ b/mmocr/datasets/preparers/parsers/__init__.py @@ -2,11 +2,12 @@ from .coco_parser import COCOTextDetAnnParser from .icdar_txt_parser import (ICDARTxtTextDetAnnParser, ICDARTxtTextRecogAnnParser) +from .svt_parser import SVTTextDetAnnParser from .totaltext_parser import TotaltextTextDetAnnParser from .wildreceipt_parser import WildreceiptKIEAnnParser __all__ = [ 'ICDARTxtTextDetAnnParser', 'ICDARTxtTextRecogAnnParser', 'TotaltextTextDetAnnParser', 'WildreceiptKIEAnnParser', - 'COCOTextDetAnnParser' + 'COCOTextDetAnnParser', 'SVTTextDetAnnParser' ] diff --git a/mmocr/datasets/preparers/parsers/svt_parser.py b/mmocr/datasets/preparers/parsers/svt_parser.py new file mode 100644 index 00000000..2cd28522 --- /dev/null +++ b/mmocr/datasets/preparers/parsers/svt_parser.py @@ -0,0 +1,65 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import xml.etree.ElementTree as ET +from typing import List, Tuple + +from ..data_preparer import DATA_PARSERS +from .base import BaseParser + + +@DATA_PARSERS.register_module() +class SVTTextDetAnnParser(BaseParser): + """SVT Text Detection Parser. + + Args: + data_root (str): The root of the dataset. Defaults to None. + nproc (int): The number of processes to parse the annotation. Defaults + to 1. + """ + + def __init__(self, data_root: str = None, nproc: int = 1) -> None: + super().__init__(data_root=data_root, nproc=nproc) + + def parse_files(self, files: str, split: str) -> List: + """Parse annotations.""" + assert isinstance(files, str) + samples = list() + for img_name, instance in self.loader(files): + samples.append((img_name, instance)) + + return samples + + def loader(self, file_path: str) -> Tuple[str, List]: + """Load annotation from SVT xml format file. See annotation example in + dataset_zoo/svt/sample_anno.md. + + Args: + file_path (str): The path of the annotation file. + + Returns: + Tuple[str, List]: The image name and the annotation list. + + Yields: + Iterator[Tuple[str, List]]: The image name and the annotation list. + """ + tree = ET.parse(file_path) + root = tree.getroot() + for image in root.findall('image'): + image_name = osp.join(self.data_root, 'textdet_imgs', + image.find('imageName').text) + instances = list() + for rectangle in image.find('taggedRectangles'): + x = int(rectangle.get('x')) + y = int(rectangle.get('y')) + w = int(rectangle.get('width')) + h = int(rectangle.get('height')) + # The text annotation of this dataset is not case sensitive. + # All of the texts were labeled as upper case. We convert them + # to lower case for convenience. + text = rectangle.find('tag').text.lower() + instances.append( + dict( + poly=[x, y, x + w, y, x + w, y + h, x, y + h], + text=text, + ignore=False)) + yield image_name, instances diff --git a/tests/test_datasets/test_preparers/test_parsers/test_svt_parsers.py b/tests/test_datasets/test_preparers/test_parsers/test_svt_parsers.py new file mode 100644 index 00000000..03a238a5 --- /dev/null +++ b/tests/test_datasets/test_preparers/test_parsers/test_svt_parsers.py @@ -0,0 +1,52 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import tempfile +import unittest + +from mmocr.datasets.preparers.parsers.svt_parser import SVTTextDetAnnParser +from mmocr.utils import list_to_file + + +class TestSVTParsers(unittest.TestCase): + + def setUp(self) -> None: + self.root = tempfile.TemporaryDirectory() + + def _create_dummy_svt_det(self): + fake_anno = [ + '', + '', + ' ', + ' img/test.jpg', + ' ', + ' ', + ' ', # noqa: E501 + ' LIVING', + ' ', + ' ', # noqa: E501 + ' ROOM', + ' ', + ' ', # noqa: E501 + ' THEATERS', + ' ', + ' ', + ' ', + '', + ] + ann_file = osp.join(self.root.name, 'svt_det.xml') + list_to_file(ann_file, fake_anno) + return ann_file + + def test_textdet_parsers(self): + parser = SVTTextDetAnnParser(self.root.name) + file = self._create_dummy_svt_det() + samples = parser.parse_files(file, 'train') + self.assertEqual(len(samples), 1) + self.assertEqual(osp.basename(samples[0][0]), 'test.jpg') + self.assertEqual(len(samples[0][1]), 3) + self.assertEqual(samples[0][1][0]['text'], 'living') + self.assertEqual(samples[0][1][1]['text'], 'room') + self.assertEqual(samples[0][1][2]['text'], 'theaters') + self.assertEqual(samples[0][1][0]['poly'], + [375, 253, 611, 253, 611, 328, 375, 328]) + self.assertEqual(samples[0][1][0]['ignore'], False)