mirror of https://github.com/open-mmlab/mmocr.git
[Feature] Add SVT to dataset preparer (#1521)
* add svt to data preparer * add svt parser testpull/1532/head
parent
baa2b4f863
commit
79a778689d
mmocr/datasets/preparers/parsers
tests/test_datasets/test_preparers/test_parsers
|
@ -0,0 +1,29 @@
|
|||
Name: 'Street View Text Dataset (SVT)'
|
||||
Paper:
|
||||
Title: Word Spotting in the Wild
|
||||
URL: https://link.springer.com/content/pdf/10.1007/978-3-642-15549-9_43.pdf
|
||||
Venue: ECCV
|
||||
Year: '2010'
|
||||
BibTeX: '@inproceedings{wang2010word,
|
||||
title={Word spotting in the wild},
|
||||
author={Wang, Kai and Belongie, Serge},
|
||||
booktitle={European conference on computer vision},
|
||||
pages={591--604},
|
||||
year={2010},
|
||||
organization={Springer}}'
|
||||
Data:
|
||||
Website: http://www.iapr-tc11.org/mediawiki/index.php/The_Street_View_Text_Dataset
|
||||
Language:
|
||||
- English
|
||||
Scene:
|
||||
- Natural Scene
|
||||
Granularity:
|
||||
- Word
|
||||
Tasks:
|
||||
- textdet
|
||||
- textrecog
|
||||
- textspotting
|
||||
License:
|
||||
Type: N/A
|
||||
Link: N/A
|
||||
Format: .xml
|
|
@ -0,0 +1,23 @@
|
|||
**Text Detection/Recognition/Spotting**
|
||||
|
||||
```xml
|
||||
<image>
|
||||
<imageName>img/14_03.jpg</imageName>
|
||||
<address>341 Southwest 10th Avenue Portland OR</address>
|
||||
<lex>
|
||||
LIVING,ROOM,THEATERS,KENNY,ZUKE,DELICATESSEN,CLYDE,COMMON,ACE,HOTEL,PORTLAND,ROSE,CITY,BOOKS,STUMPTOWN,COFFEE,ROASTERS,RED,CAP,GARAGE,FISH,GROTTO,SEAFOOD,RESTAURANT,AURA,RESTAURANT,LOUNGE,ROCCO,PIZZA,PASTA,BUFFALO,EXCHANGE,MARK,SPENCER,LIGHT,FEZ,BALLROOM,READING,FRENZY,ROXY,SCANDALS,MARTINOTTI,CAFE,DELI,CROWSENBERG,HALF
|
||||
</lex>
|
||||
<Resolution x="1280" y="880"/>
|
||||
<taggedRectangles>
|
||||
<taggedRectangle height="75" width="236" x="375" y="253">
|
||||
<tag>LIVING</tag>
|
||||
</taggedRectangle>
|
||||
<taggedRectangle height="76" width="175" x="639" y="272">
|
||||
<tag>ROOM</tag>
|
||||
</taggedRectangle>
|
||||
<taggedRectangle height="87" width="281" x="839" y="283">
|
||||
<tag>THEATERS</tag>
|
||||
</taggedRectangle>
|
||||
</taggedRectangles>
|
||||
</image>
|
||||
```
|
|
@ -0,0 +1,27 @@
|
|||
data_root = 'data/svt'
|
||||
cache_path = 'data/cache'
|
||||
|
||||
data_obtainer = dict(
|
||||
type='NaiveDataObtainer',
|
||||
cache_path=cache_path,
|
||||
data_root=data_root,
|
||||
files=[
|
||||
dict(
|
||||
url='http://www.iapr-tc11.org/dataset/SVT/svt.zip',
|
||||
save_name='svt.zip',
|
||||
md5='42d19160010d990ae6223b14f45eff88',
|
||||
split=['train', 'test'],
|
||||
content=['image', 'annotations'],
|
||||
mapping=[['svt/svt1/train.xml', 'annotations/train.xml'],
|
||||
['svt/svt1/test.xml', 'annotations/test.xml'],
|
||||
['svt/svt1/img', 'textdet_imgs/img']]),
|
||||
])
|
||||
|
||||
data_converter = dict(
|
||||
type='TextDetDataConverter',
|
||||
splits=['train', 'test'],
|
||||
data_root=data_root,
|
||||
gatherer=dict(type='mono_gather', mapping="f'{split}.xml'"),
|
||||
parser=dict(type='SVTTextDetAnnParser', data_root=data_root),
|
||||
dumper=dict(type='JsonDumper'),
|
||||
delete=['annotations', 'svt'])
|
|
@ -0,0 +1,3 @@
|
|||
_base_ = ['textdet.py']
|
||||
|
||||
data_converter = dict(type='TextRecogCropConverter')
|
|
@ -0,0 +1,3 @@
|
|||
_base_ = ['textdet.py']
|
||||
|
||||
data_converter = dict(type='TextSpottingDataConverter')
|
|
@ -2,11 +2,12 @@
|
|||
from .coco_parser import COCOTextDetAnnParser
|
||||
from .icdar_txt_parser import (ICDARTxtTextDetAnnParser,
|
||||
ICDARTxtTextRecogAnnParser)
|
||||
from .svt_parser import SVTTextDetAnnParser
|
||||
from .totaltext_parser import TotaltextTextDetAnnParser
|
||||
from .wildreceipt_parser import WildreceiptKIEAnnParser
|
||||
|
||||
__all__ = [
|
||||
'ICDARTxtTextDetAnnParser', 'ICDARTxtTextRecogAnnParser',
|
||||
'TotaltextTextDetAnnParser', 'WildreceiptKIEAnnParser',
|
||||
'COCOTextDetAnnParser'
|
||||
'COCOTextDetAnnParser', 'SVTTextDetAnnParser'
|
||||
]
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import os.path as osp
|
||||
import xml.etree.ElementTree as ET
|
||||
from typing import List, Tuple
|
||||
|
||||
from ..data_preparer import DATA_PARSERS
|
||||
from .base import BaseParser
|
||||
|
||||
|
||||
@DATA_PARSERS.register_module()
|
||||
class SVTTextDetAnnParser(BaseParser):
|
||||
"""SVT Text Detection Parser.
|
||||
|
||||
Args:
|
||||
data_root (str): The root of the dataset. Defaults to None.
|
||||
nproc (int): The number of processes to parse the annotation. Defaults
|
||||
to 1.
|
||||
"""
|
||||
|
||||
def __init__(self, data_root: str = None, nproc: int = 1) -> None:
|
||||
super().__init__(data_root=data_root, nproc=nproc)
|
||||
|
||||
def parse_files(self, files: str, split: str) -> List:
|
||||
"""Parse annotations."""
|
||||
assert isinstance(files, str)
|
||||
samples = list()
|
||||
for img_name, instance in self.loader(files):
|
||||
samples.append((img_name, instance))
|
||||
|
||||
return samples
|
||||
|
||||
def loader(self, file_path: str) -> Tuple[str, List]:
|
||||
"""Load annotation from SVT xml format file. See annotation example in
|
||||
dataset_zoo/svt/sample_anno.md.
|
||||
|
||||
Args:
|
||||
file_path (str): The path of the annotation file.
|
||||
|
||||
Returns:
|
||||
Tuple[str, List]: The image name and the annotation list.
|
||||
|
||||
Yields:
|
||||
Iterator[Tuple[str, List]]: The image name and the annotation list.
|
||||
"""
|
||||
tree = ET.parse(file_path)
|
||||
root = tree.getroot()
|
||||
for image in root.findall('image'):
|
||||
image_name = osp.join(self.data_root, 'textdet_imgs',
|
||||
image.find('imageName').text)
|
||||
instances = list()
|
||||
for rectangle in image.find('taggedRectangles'):
|
||||
x = int(rectangle.get('x'))
|
||||
y = int(rectangle.get('y'))
|
||||
w = int(rectangle.get('width'))
|
||||
h = int(rectangle.get('height'))
|
||||
# The text annotation of this dataset is not case sensitive.
|
||||
# All of the texts were labeled as upper case. We convert them
|
||||
# to lower case for convenience.
|
||||
text = rectangle.find('tag').text.lower()
|
||||
instances.append(
|
||||
dict(
|
||||
poly=[x, y, x + w, y, x + w, y + h, x, y + h],
|
||||
text=text,
|
||||
ignore=False))
|
||||
yield image_name, instances
|
|
@ -0,0 +1,52 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import os.path as osp
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
from mmocr.datasets.preparers.parsers.svt_parser import SVTTextDetAnnParser
|
||||
from mmocr.utils import list_to_file
|
||||
|
||||
|
||||
class TestSVTParsers(unittest.TestCase):
|
||||
|
||||
def setUp(self) -> None:
|
||||
self.root = tempfile.TemporaryDirectory()
|
||||
|
||||
def _create_dummy_svt_det(self):
|
||||
fake_anno = [
|
||||
'<?xml version="1.0" encoding="utf-8"?>',
|
||||
'<tagset>',
|
||||
' <image>',
|
||||
' <imageName>img/test.jpg</imageName>',
|
||||
' <Resolution x="1280" y="880"/>',
|
||||
' <taggedRectangles>',
|
||||
' <taggedRectangle height="75" width="236" x="375" y="253">', # noqa: E501
|
||||
' <tag>LIVING</tag>',
|
||||
' </taggedRectangle>',
|
||||
' <taggedRectangle height="76" width="175" x="639" y="272">', # noqa: E501
|
||||
' <tag>ROOM</tag>',
|
||||
' </taggedRectangle>',
|
||||
' <taggedRectangle height="87" width="281" x="839" y="283">', # noqa: E501
|
||||
' <tag>THEATERS</tag>',
|
||||
' </taggedRectangle>',
|
||||
' </taggedRectangles>',
|
||||
' </image>',
|
||||
'</tagset>',
|
||||
]
|
||||
ann_file = osp.join(self.root.name, 'svt_det.xml')
|
||||
list_to_file(ann_file, fake_anno)
|
||||
return ann_file
|
||||
|
||||
def test_textdet_parsers(self):
|
||||
parser = SVTTextDetAnnParser(self.root.name)
|
||||
file = self._create_dummy_svt_det()
|
||||
samples = parser.parse_files(file, 'train')
|
||||
self.assertEqual(len(samples), 1)
|
||||
self.assertEqual(osp.basename(samples[0][0]), 'test.jpg')
|
||||
self.assertEqual(len(samples[0][1]), 3)
|
||||
self.assertEqual(samples[0][1][0]['text'], 'living')
|
||||
self.assertEqual(samples[0][1][1]['text'], 'room')
|
||||
self.assertEqual(samples[0][1][2]['text'], 'theaters')
|
||||
self.assertEqual(samples[0][1][0]['poly'],
|
||||
[375, 253, 611, 253, 611, 328, 375, 328])
|
||||
self.assertEqual(samples[0][1][0]['ignore'], False)
|
Loading…
Reference in New Issue