diff --git a/dataset_zoo/svt/metafile.yml b/dataset_zoo/svt/metafile.yml
new file mode 100644
index 00000000..3636fd9b
--- /dev/null
+++ b/dataset_zoo/svt/metafile.yml
@@ -0,0 +1,29 @@
+Name: 'Street View Text Dataset (SVT)'
+Paper:
+ Title: Word Spotting in the Wild
+ URL: https://link.springer.com/content/pdf/10.1007/978-3-642-15549-9_43.pdf
+ Venue: ECCV
+ Year: '2010'
+ BibTeX: '@inproceedings{wang2010word,
+ title={Word spotting in the wild},
+ author={Wang, Kai and Belongie, Serge},
+ booktitle={European conference on computer vision},
+ pages={591--604},
+ year={2010},
+ organization={Springer}}'
+Data:
+ Website: http://www.iapr-tc11.org/mediawiki/index.php/The_Street_View_Text_Dataset
+ Language:
+ - English
+ Scene:
+ - Natural Scene
+ Granularity:
+ - Word
+ Tasks:
+ - textdet
+ - textrecog
+ - textspotting
+ License:
+ Type: N/A
+ Link: N/A
+ Format: .xml
diff --git a/dataset_zoo/svt/sample_anno.md b/dataset_zoo/svt/sample_anno.md
new file mode 100644
index 00000000..cc0e40d2
--- /dev/null
+++ b/dataset_zoo/svt/sample_anno.md
@@ -0,0 +1,23 @@
+**Text Detection/Recognition/Spotting**
+
+```xml
+
+ img/14_03.jpg
+ 341 Southwest 10th Avenue Portland OR
+
+ LIVING,ROOM,THEATERS,KENNY,ZUKE,DELICATESSEN,CLYDE,COMMON,ACE,HOTEL,PORTLAND,ROSE,CITY,BOOKS,STUMPTOWN,COFFEE,ROASTERS,RED,CAP,GARAGE,FISH,GROTTO,SEAFOOD,RESTAURANT,AURA,RESTAURANT,LOUNGE,ROCCO,PIZZA,PASTA,BUFFALO,EXCHANGE,MARK,SPENCER,LIGHT,FEZ,BALLROOM,READING,FRENZY,ROXY,SCANDALS,MARTINOTTI,CAFE,DELI,CROWSENBERG,HALF
+
+
+
+
+ LIVING
+
+
+ ROOM
+
+
+ THEATERS
+
+
+
+```
diff --git a/dataset_zoo/svt/textdet.py b/dataset_zoo/svt/textdet.py
new file mode 100644
index 00000000..ab95ce32
--- /dev/null
+++ b/dataset_zoo/svt/textdet.py
@@ -0,0 +1,27 @@
+data_root = 'data/svt'
+cache_path = 'data/cache'
+
+data_obtainer = dict(
+ type='NaiveDataObtainer',
+ cache_path=cache_path,
+ data_root=data_root,
+ files=[
+ dict(
+ url='http://www.iapr-tc11.org/dataset/SVT/svt.zip',
+ save_name='svt.zip',
+ md5='42d19160010d990ae6223b14f45eff88',
+ split=['train', 'test'],
+ content=['image', 'annotations'],
+ mapping=[['svt/svt1/train.xml', 'annotations/train.xml'],
+ ['svt/svt1/test.xml', 'annotations/test.xml'],
+ ['svt/svt1/img', 'textdet_imgs/img']]),
+ ])
+
+data_converter = dict(
+ type='TextDetDataConverter',
+ splits=['train', 'test'],
+ data_root=data_root,
+ gatherer=dict(type='mono_gather', mapping="f'{split}.xml'"),
+ parser=dict(type='SVTTextDetAnnParser', data_root=data_root),
+ dumper=dict(type='JsonDumper'),
+ delete=['annotations', 'svt'])
diff --git a/dataset_zoo/svt/textrecog.py b/dataset_zoo/svt/textrecog.py
new file mode 100644
index 00000000..e18f2f1f
--- /dev/null
+++ b/dataset_zoo/svt/textrecog.py
@@ -0,0 +1,3 @@
+_base_ = ['textdet.py']
+
+data_converter = dict(type='TextRecogCropConverter')
diff --git a/dataset_zoo/svt/textspotting.py b/dataset_zoo/svt/textspotting.py
new file mode 100644
index 00000000..413de5e8
--- /dev/null
+++ b/dataset_zoo/svt/textspotting.py
@@ -0,0 +1,3 @@
+_base_ = ['textdet.py']
+
+data_converter = dict(type='TextSpottingDataConverter')
diff --git a/mmocr/datasets/preparers/parsers/__init__.py b/mmocr/datasets/preparers/parsers/__init__.py
index 83681eab..16ad41a0 100644
--- a/mmocr/datasets/preparers/parsers/__init__.py
+++ b/mmocr/datasets/preparers/parsers/__init__.py
@@ -2,11 +2,12 @@
from .coco_parser import COCOTextDetAnnParser
from .icdar_txt_parser import (ICDARTxtTextDetAnnParser,
ICDARTxtTextRecogAnnParser)
+from .svt_parser import SVTTextDetAnnParser
from .totaltext_parser import TotaltextTextDetAnnParser
from .wildreceipt_parser import WildreceiptKIEAnnParser
__all__ = [
'ICDARTxtTextDetAnnParser', 'ICDARTxtTextRecogAnnParser',
'TotaltextTextDetAnnParser', 'WildreceiptKIEAnnParser',
- 'COCOTextDetAnnParser'
+ 'COCOTextDetAnnParser', 'SVTTextDetAnnParser'
]
diff --git a/mmocr/datasets/preparers/parsers/svt_parser.py b/mmocr/datasets/preparers/parsers/svt_parser.py
new file mode 100644
index 00000000..2cd28522
--- /dev/null
+++ b/mmocr/datasets/preparers/parsers/svt_parser.py
@@ -0,0 +1,65 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import xml.etree.ElementTree as ET
+from typing import List, Tuple
+
+from ..data_preparer import DATA_PARSERS
+from .base import BaseParser
+
+
+@DATA_PARSERS.register_module()
+class SVTTextDetAnnParser(BaseParser):
+ """SVT Text Detection Parser.
+
+ Args:
+ data_root (str): The root of the dataset. Defaults to None.
+ nproc (int): The number of processes to parse the annotation. Defaults
+ to 1.
+ """
+
+ def __init__(self, data_root: str = None, nproc: int = 1) -> None:
+ super().__init__(data_root=data_root, nproc=nproc)
+
+ def parse_files(self, files: str, split: str) -> List:
+ """Parse annotations."""
+ assert isinstance(files, str)
+ samples = list()
+ for img_name, instance in self.loader(files):
+ samples.append((img_name, instance))
+
+ return samples
+
+ def loader(self, file_path: str) -> Tuple[str, List]:
+ """Load annotation from SVT xml format file. See annotation example in
+ dataset_zoo/svt/sample_anno.md.
+
+ Args:
+ file_path (str): The path of the annotation file.
+
+ Returns:
+ Tuple[str, List]: The image name and the annotation list.
+
+ Yields:
+ Iterator[Tuple[str, List]]: The image name and the annotation list.
+ """
+ tree = ET.parse(file_path)
+ root = tree.getroot()
+ for image in root.findall('image'):
+ image_name = osp.join(self.data_root, 'textdet_imgs',
+ image.find('imageName').text)
+ instances = list()
+ for rectangle in image.find('taggedRectangles'):
+ x = int(rectangle.get('x'))
+ y = int(rectangle.get('y'))
+ w = int(rectangle.get('width'))
+ h = int(rectangle.get('height'))
+ # The text annotation of this dataset is not case sensitive.
+ # All of the texts were labeled as upper case. We convert them
+ # to lower case for convenience.
+ text = rectangle.find('tag').text.lower()
+ instances.append(
+ dict(
+ poly=[x, y, x + w, y, x + w, y + h, x, y + h],
+ text=text,
+ ignore=False))
+ yield image_name, instances
diff --git a/tests/test_datasets/test_preparers/test_parsers/test_svt_parsers.py b/tests/test_datasets/test_preparers/test_parsers/test_svt_parsers.py
new file mode 100644
index 00000000..03a238a5
--- /dev/null
+++ b/tests/test_datasets/test_preparers/test_parsers/test_svt_parsers.py
@@ -0,0 +1,52 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os.path as osp
+import tempfile
+import unittest
+
+from mmocr.datasets.preparers.parsers.svt_parser import SVTTextDetAnnParser
+from mmocr.utils import list_to_file
+
+
+class TestSVTParsers(unittest.TestCase):
+
+ def setUp(self) -> None:
+ self.root = tempfile.TemporaryDirectory()
+
+ def _create_dummy_svt_det(self):
+ fake_anno = [
+ '',
+ '',
+ ' ',
+ ' img/test.jpg',
+ ' ',
+ ' ',
+ ' ', # noqa: E501
+ ' LIVING',
+ ' ',
+ ' ', # noqa: E501
+ ' ROOM',
+ ' ',
+ ' ', # noqa: E501
+ ' THEATERS',
+ ' ',
+ ' ',
+ ' ',
+ '',
+ ]
+ ann_file = osp.join(self.root.name, 'svt_det.xml')
+ list_to_file(ann_file, fake_anno)
+ return ann_file
+
+ def test_textdet_parsers(self):
+ parser = SVTTextDetAnnParser(self.root.name)
+ file = self._create_dummy_svt_det()
+ samples = parser.parse_files(file, 'train')
+ self.assertEqual(len(samples), 1)
+ self.assertEqual(osp.basename(samples[0][0]), 'test.jpg')
+ self.assertEqual(len(samples[0][1]), 3)
+ self.assertEqual(samples[0][1][0]['text'], 'living')
+ self.assertEqual(samples[0][1][1]['text'], 'room')
+ self.assertEqual(samples[0][1][2]['text'], 'theaters')
+ self.assertEqual(samples[0][1][0]['poly'],
+ [375, 253, 611, 253, 611, 328, 375, 328])
+ self.assertEqual(samples[0][1][0]['ignore'], False)