[Feature] Add SVT to dataset preparer ()

* add svt to data preparer

* add svt parser test
pull/1532/head
Xinyu Wang 2022-11-15 18:54:30 +08:00 committed by GitHub
parent baa2b4f863
commit 79a778689d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 204 additions and 1 deletions
mmocr/datasets/preparers/parsers
tests/test_datasets/test_preparers/test_parsers

View File

@ -0,0 +1,29 @@
Name: 'Street View Text Dataset (SVT)'
Paper:
Title: Word Spotting in the Wild
URL: https://link.springer.com/content/pdf/10.1007/978-3-642-15549-9_43.pdf
Venue: ECCV
Year: '2010'
BibTeX: '@inproceedings{wang2010word,
title={Word spotting in the wild},
author={Wang, Kai and Belongie, Serge},
booktitle={European conference on computer vision},
pages={591--604},
year={2010},
organization={Springer}}'
Data:
Website: http://www.iapr-tc11.org/mediawiki/index.php/The_Street_View_Text_Dataset
Language:
- English
Scene:
- Natural Scene
Granularity:
- Word
Tasks:
- textdet
- textrecog
- textspotting
License:
Type: N/A
Link: N/A
Format: .xml

View File

@ -0,0 +1,23 @@
**Text Detection/Recognition/Spotting**
```xml
<image>
<imageName>img/14_03.jpg</imageName>
<address>341 Southwest 10th Avenue Portland OR</address>
<lex>
LIVING,ROOM,THEATERS,KENNY,ZUKE,DELICATESSEN,CLYDE,COMMON,ACE,HOTEL,PORTLAND,ROSE,CITY,BOOKS,STUMPTOWN,COFFEE,ROASTERS,RED,CAP,GARAGE,FISH,GROTTO,SEAFOOD,RESTAURANT,AURA,RESTAURANT,LOUNGE,ROCCO,PIZZA,PASTA,BUFFALO,EXCHANGE,MARK,SPENCER,LIGHT,FEZ,BALLROOM,READING,FRENZY,ROXY,SCANDALS,MARTINOTTI,CAFE,DELI,CROWSENBERG,HALF
</lex>
<Resolution x="1280" y="880"/>
<taggedRectangles>
<taggedRectangle height="75" width="236" x="375" y="253">
<tag>LIVING</tag>
</taggedRectangle>
<taggedRectangle height="76" width="175" x="639" y="272">
<tag>ROOM</tag>
</taggedRectangle>
<taggedRectangle height="87" width="281" x="839" y="283">
<tag>THEATERS</tag>
</taggedRectangle>
</taggedRectangles>
</image>
```

View File

@ -0,0 +1,27 @@
data_root = 'data/svt'
cache_path = 'data/cache'
data_obtainer = dict(
type='NaiveDataObtainer',
cache_path=cache_path,
data_root=data_root,
files=[
dict(
url='http://www.iapr-tc11.org/dataset/SVT/svt.zip',
save_name='svt.zip',
md5='42d19160010d990ae6223b14f45eff88',
split=['train', 'test'],
content=['image', 'annotations'],
mapping=[['svt/svt1/train.xml', 'annotations/train.xml'],
['svt/svt1/test.xml', 'annotations/test.xml'],
['svt/svt1/img', 'textdet_imgs/img']]),
])
data_converter = dict(
type='TextDetDataConverter',
splits=['train', 'test'],
data_root=data_root,
gatherer=dict(type='mono_gather', mapping="f'{split}.xml'"),
parser=dict(type='SVTTextDetAnnParser', data_root=data_root),
dumper=dict(type='JsonDumper'),
delete=['annotations', 'svt'])

View File

@ -0,0 +1,3 @@
_base_ = ['textdet.py']
data_converter = dict(type='TextRecogCropConverter')

View File

@ -0,0 +1,3 @@
_base_ = ['textdet.py']
data_converter = dict(type='TextSpottingDataConverter')

View File

@ -2,11 +2,12 @@
from .coco_parser import COCOTextDetAnnParser
from .icdar_txt_parser import (ICDARTxtTextDetAnnParser,
ICDARTxtTextRecogAnnParser)
from .svt_parser import SVTTextDetAnnParser
from .totaltext_parser import TotaltextTextDetAnnParser
from .wildreceipt_parser import WildreceiptKIEAnnParser
__all__ = [
'ICDARTxtTextDetAnnParser', 'ICDARTxtTextRecogAnnParser',
'TotaltextTextDetAnnParser', 'WildreceiptKIEAnnParser',
'COCOTextDetAnnParser'
'COCOTextDetAnnParser', 'SVTTextDetAnnParser'
]

View File

@ -0,0 +1,65 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import xml.etree.ElementTree as ET
from typing import List, Tuple
from ..data_preparer import DATA_PARSERS
from .base import BaseParser
@DATA_PARSERS.register_module()
class SVTTextDetAnnParser(BaseParser):
"""SVT Text Detection Parser.
Args:
data_root (str): The root of the dataset. Defaults to None.
nproc (int): The number of processes to parse the annotation. Defaults
to 1.
"""
def __init__(self, data_root: str = None, nproc: int = 1) -> None:
super().__init__(data_root=data_root, nproc=nproc)
def parse_files(self, files: str, split: str) -> List:
"""Parse annotations."""
assert isinstance(files, str)
samples = list()
for img_name, instance in self.loader(files):
samples.append((img_name, instance))
return samples
def loader(self, file_path: str) -> Tuple[str, List]:
"""Load annotation from SVT xml format file. See annotation example in
dataset_zoo/svt/sample_anno.md.
Args:
file_path (str): The path of the annotation file.
Returns:
Tuple[str, List]: The image name and the annotation list.
Yields:
Iterator[Tuple[str, List]]: The image name and the annotation list.
"""
tree = ET.parse(file_path)
root = tree.getroot()
for image in root.findall('image'):
image_name = osp.join(self.data_root, 'textdet_imgs',
image.find('imageName').text)
instances = list()
for rectangle in image.find('taggedRectangles'):
x = int(rectangle.get('x'))
y = int(rectangle.get('y'))
w = int(rectangle.get('width'))
h = int(rectangle.get('height'))
# The text annotation of this dataset is not case sensitive.
# All of the texts were labeled as upper case. We convert them
# to lower case for convenience.
text = rectangle.find('tag').text.lower()
instances.append(
dict(
poly=[x, y, x + w, y, x + w, y + h, x, y + h],
text=text,
ignore=False))
yield image_name, instances

View File

@ -0,0 +1,52 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import tempfile
import unittest
from mmocr.datasets.preparers.parsers.svt_parser import SVTTextDetAnnParser
from mmocr.utils import list_to_file
class TestSVTParsers(unittest.TestCase):
def setUp(self) -> None:
self.root = tempfile.TemporaryDirectory()
def _create_dummy_svt_det(self):
fake_anno = [
'<?xml version="1.0" encoding="utf-8"?>',
'<tagset>',
' <image>',
' <imageName>img/test.jpg</imageName>',
' <Resolution x="1280" y="880"/>',
' <taggedRectangles>',
' <taggedRectangle height="75" width="236" x="375" y="253">', # noqa: E501
' <tag>LIVING</tag>',
' </taggedRectangle>',
' <taggedRectangle height="76" width="175" x="639" y="272">', # noqa: E501
' <tag>ROOM</tag>',
' </taggedRectangle>',
' <taggedRectangle height="87" width="281" x="839" y="283">', # noqa: E501
' <tag>THEATERS</tag>',
' </taggedRectangle>',
' </taggedRectangles>',
' </image>',
'</tagset>',
]
ann_file = osp.join(self.root.name, 'svt_det.xml')
list_to_file(ann_file, fake_anno)
return ann_file
def test_textdet_parsers(self):
parser = SVTTextDetAnnParser(self.root.name)
file = self._create_dummy_svt_det()
samples = parser.parse_files(file, 'train')
self.assertEqual(len(samples), 1)
self.assertEqual(osp.basename(samples[0][0]), 'test.jpg')
self.assertEqual(len(samples[0][1]), 3)
self.assertEqual(samples[0][1][0]['text'], 'living')
self.assertEqual(samples[0][1][1]['text'], 'room')
self.assertEqual(samples[0][1][2]['text'], 'theaters')
self.assertEqual(samples[0][1][0]['poly'],
[375, 253, 611, 253, 611, 328, 375, 328])
self.assertEqual(samples[0][1][0]['ignore'], False)