[Fix] Clear up some unused scripts (#1798)

pull/1803/head
Tong Gao 2023-03-22 14:00:55 +08:00 committed by GitHub
parent d0dc90253a
commit 1a379f2f1b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 0 additions and 2613 deletions

View File

@ -9,10 +9,8 @@ This page is a manual preparation guide for datasets not yet supported by [Datas
| Dataset | Images | | Annotation Files | | |
| :---------------: | :------------------------------------------------------: | :------------------------------------------------: | :-----------------------------------------------------------------: | :-----: | :-: |
| | | training | validation | testing | |
| CTW1500 | [homepage](https://github.com/Yuliang-Liu/Curve-Text-Detector) | - | - | - | |
| ICDAR2011 | [homepage](https://rrc.cvc.uab.es/?ch=1) | - | - | | |
| ICDAR2017 | [homepage](https://rrc.cvc.uab.es/?ch=8&com=downloads) | [instances_training.json](https://download.openmmlab.com/mmocr/data/icdar2017/instances_training.json) | [instances_val.json](https://download.openmmlab.com/mmocr/data/icdar2017/instances_val.json) | - | |
| Synthtext | [homepage](https://www.robots.ox.ac.uk/~vgg/data/scenetext/) | instances_training.lmdb ([data.mdb](https://download.openmmlab.com/mmocr/data/synthtext/instances_training.lmdb/data.mdb), [lock.mdb](https://download.openmmlab.com/mmocr/data/synthtext/instances_training.lmdb/lock.mdb)) | - | - | |
| CurvedSynText150k | [homepage](https://github.com/aim-uofa/AdelaiDet/blob/master/datasets/README.md) \| [Part1](https://drive.google.com/file/d/1OSJ-zId2h3t_-I7g_wUkrK-VqQy153Kj/view?usp=sharing) \| [Part2](https://drive.google.com/file/d/1EzkcOlIgEp5wmEubvHb7-J5EImHExYgY/view?usp=sharing) | [instances_training.json](https://download.openmmlab.com/mmocr/data/curvedsyntext/instances_training.json) | - | - | |
| DeText | [homepage](https://rrc.cvc.uab.es/?ch=9) | - | - | - | |
| Lecture Video DB | [homepage](https://cvit.iiit.ac.in/research/projects/cvit-projects/lecturevideodb) | - | - | - | |
@ -62,47 +60,6 @@ backend used in MMCV would read them and apply the rotation on the images. Howe
inconsistency results in false examples in the training set. Therefore, users should use `dict(type='LoadImageFromFile', color_type='color_ignore_orientation')` in pipelines to change MMCV's default loading behaviour. (see [DBNet's pipeline config](https://github.com/open-mmlab/mmocr/blob/main/configs/_base_/det_pipelines/dbnet_pipeline.py) for example)
```
## CTW1500
- Step0: Read [Important Note](#important-note)
- Step1: Download `train_images.zip`, `test_images.zip`, `train_labels.zip`, `test_labels.zip` from [github](https://github.com/Yuliang-Liu/Curve-Text-Detector)
```bash
mkdir ctw1500 && cd ctw1500
mkdir imgs && mkdir annotations
# For annotations
cd annotations
wget -O train_labels.zip https://universityofadelaide.box.com/shared/static/jikuazluzyj4lq6umzei7m2ppmt3afyw.zip
wget -O test_labels.zip https://cloudstor.aarnet.edu.au/plus/s/uoeFl0pCN9BOCN5/download
unzip train_labels.zip && mv ctw1500_train_labels training
unzip test_labels.zip -d test
cd ..
# For images
cd imgs
wget -O train_images.zip https://universityofadelaide.box.com/shared/static/py5uwlfyyytbb2pxzq9czvu6fuqbjdh8.zip
wget -O test_images.zip https://universityofadelaide.box.com/shared/static/t4w48ofnqkdw7jyc4t11nsukoeqk9c3d.zip
unzip train_images.zip && mv train_images training
unzip test_images.zip && mv test_images test
```
- Step2: Generate `instances_training.json` and `instances_test.json` with following command:
```bash
python tools/dataset_converters/textdet/ctw1500_converter.py /path/to/ctw1500 -o /path/to/ctw1500 --split-list training test
```
- The resulting directory structure looks like the following:
```text
├── ctw1500
│   ├── imgs
│   ├── annotations
│   ├── instances_training.json
│   └── instances_val.json
```
## ICDAR 2011 (Born-Digital Images)
- Step1: Download `Challenge1_Training_Task12_Images.zip`, `Challenge1_Training_Task1_GT.zip`, `Challenge1_Test_Task12_Images.zip`, and `Challenge1_Test_Task1_GT.zip` from [homepage](https://rrc.cvc.uab.es/?ch=1&com=downloads) `Task 1.1: Text Localization (2013 edition)`.
@ -156,22 +113,6 @@ inconsistency results in false examples in the training set. Therefore, users sh
│   └── instances_val.json
```
## SynthText
- Step1: Download SynthText.zip from \[homepage\](<https://www.robots.ox.ac.uk/~vgg/data/scenetext/> and extract its content to `synthtext/img`.
- Step2: Download [data.mdb](https://download.openmmlab.com/mmocr/data/synthtext/instances_training.lmdb/data.mdb) and [lock.mdb](https://download.openmmlab.com/mmocr/data/synthtext/instances_training.lmdb/lock.mdb) to `synthtext/instances_training.lmdb/`.
- The resulting directory structure looks like the following:
```text
├── synthtext
│   ├── imgs
│   └── instances_training.lmdb
│   ├── data.mdb
│   └── lock.mdb
```
## CurvedSynText150k
- Step1: Download [syntext1.zip](https://drive.google.com/file/d/1OSJ-zId2h3t_-I7g_wUkrK-VqQy153Kj/view?usp=sharing) and [syntext2.zip](https://drive.google.com/file/d/1EzkcOlIgEp5wmEubvHb7-J5EImHExYgY/view?usp=sharing) to `CurvedSynText150k/`.

View File

@ -11,7 +11,6 @@ This page is a manual preparation guide for datasets not yet supported by [Datas
| | | training | test |
| coco_text | [homepage](https://rrc.cvc.uab.es/?ch=5&com=downloads) | [train_labels.json](#TODO) | - |
| ICDAR2011 | [homepage](https://rrc.cvc.uab.es/?ch=1) | - | - |
| MJSynth (Syn90k) | [homepage](https://www.robots.ox.ac.uk/~vgg/data/text/) | [subset_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/subset_train_labels.json) \| [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/train_labels.json) | - |
| SynthAdd | [SynthText_Add.zip](https://pan.baidu.com/s/1uV0LtoNmcxbO-0YA7Ch4dg) (code:627x) | [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/synthtext_add/train_labels.json) | - |
| OpenVINO | [Open Images](https://github.com/cvdfoundation/open-images-dataset) | [annotations](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) | [annotations](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) |
| DeText | [homepage](https://rrc.cvc.uab.es/?ch=9) | - | - |
@ -110,44 +109,6 @@ For users in China, these datasets can also be downloaded from [OpenDataLab](htt
│ └── train_words
```
## MJSynth (Syn90k)
- Step1: Download `mjsynth.tar.gz` from [homepage](https://www.robots.ox.ac.uk/~vgg/data/text/)
- Step2: Download [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/train_labels.json) (8,919,273 annotations) and [subset_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/subset_train_labels.json) (2,400,000 randomly sampled annotations).
```{note}
Please make sure you're using the right annotation to train the model by checking its dataset specs in Model Zoo.
```
- Step3:
```bash
mkdir Syn90k && cd Syn90k
mv /path/to/mjsynth.tar.gz .
tar -xzf mjsynth.tar.gz
mv /path/to/subset_train_labels.json .
mv /path/to/train_labels.json .
# create soft link
cd /path/to/mmocr/data/recog/
ln -s /path/to/Syn90k Syn90k
```
- After running the above codes, the directory structure
should be as follows:
```text
├── Syn90k
│ ├── subset_train_labels.json
│ ├── train_labels.json
│ └── mnt
```
## SynthAdd
- Step1: Download `SynthText_Add.zip` from [SynthAdd](https://pan.baidu.com/s/1uV0LtoNmcxbO-0YA7Ch4dg) (code:627x))

View File

@ -1,147 +0,0 @@
# 文字检测
```{warning}
该页面版本落后于英文版文档,请切换至英文阅读最新文档。
```
```{note}
我们正努力往 [Dataset Preparer](./dataset_preparer.md) 中增加更多数据集。对于 [Dataset Preparer](./dataset_preparer.md) 暂未能完整支持的数据集,本页提供了一系列手动下载的步骤,供有需要的用户使用。
```
## 概览
| 数据集名称 | 数据图片 | | 标注文件 | |
| :--------: | :-----------------------------------------------: | :-------------------------------------------: | :------------------------------------------------: | :--------------------------------------------: |
| | | 训练集 (training) | 验证集 (validation) | 测试集 (testing) |
| CTW1500 | [下载地址](https://github.com/Yuliang-Liu/Curve-Text-Detector) | - | - | - |
| ICDAR2015 | [下载地址](https://rrc.cvc.uab.es/?ch=4&com=downloads) | [instances_training.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_training.json) | - | [instances_test.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_test.json) |
| ICDAR2017 | [下载地址](https://rrc.cvc.uab.es/?ch=8&com=downloads) | [instances_training.json](https://download.openmmlab.com/mmocr/data/icdar2017/instances_training.json) | [instances_val.json](https://download.openmmlab.com/mmocr/data/icdar2017/instances_val.json) | - |
| Synthtext | [下载地址](https://www.robots.ox.ac.uk/~vgg/data/scenetext/) | instances_training.lmdb ([data.mdb](https://download.openmmlab.com/mmocr/data/synthtext/instances_training.lmdb/data.mdb), [lock.mdb](https://download.openmmlab.com/mmocr/data/synthtext/instances_training.lmdb/lock.mdb)) | - | - |
| TextOCR | [下载地址](https://textvqa.org/textocr/dataset) | - | - | - |
| Totaltext | [下载地址](https://github.com/cs-chan/Total-Text-Dataset) | - | - | - |
对于中国境内的用户,我们也推荐使用开源数据平台[OpenDataLab](https://opendatalab.com/)来获取这些数据集,以获得更好的下载体验:
- [CTW1500](https://opendatalab.com/SCUT-CTW1500?source=OpenMMLab%20GitHub)
- [ICDAR2013](https://opendatalab.com/ICDAR_2013?source=OpenMMLab%20GitHub)
- [ICDAR2015](https://opendatalab.com/ICDAR2015?source=OpenMMLab%20GitHub)
- [Totaltext](https://opendatalab.com/TotalText?source=OpenMMLab%20GitHub)
- [MSRA-TD500](https://opendatalab.com/MSRA-TD500?source=OpenMMLab%20GitHub)
## 重要提醒
```{note}
**若用户需要在 CTW1500, ICDAR 2015/2017 或 Totaltext 数据集上训练模型**, 请注意这些数据集中有部分图片的 EXIF 信息里保存着方向信息。MMCV 采用的 OpenCV 后端会默认根据方向信息对图片进行旋转;而由于数据集的标注是在原图片上进行的,这种冲突会使得部分训练样本失效。因此,用户应该在配置 pipeline 时使用 `dict(type='LoadImageFromFile', color_type='color_ignore_orientation')` 以避免 MMCV 的这一行为。(配置文件可参考 [DBNet 的 pipeline 配置](https://github.com/open-mmlab/mmocr/blob/main/configs/_base_/det_pipelines/dbnet_pipeline.py)
```
## 准备步骤
### ICDAR 2015
- 第一步:从[下载地址](https://rrc.cvc.uab.es/?ch=4&com=downloads)下载 `ch4_training_images.zip`、`ch4_test_images.zip`、`ch4_training_localization_transcription_gt.zip`、`Challenge4_Test_Task1_GT.zip` 四个文件,分别对应训练集数据、测试集数据、训练集标注、测试集标注。
- 第二步:运行以下命令,移动数据集到对应文件夹
```bash
mkdir icdar2015 && cd icdar2015
mkdir imgs && mkdir annotations
# 移动数据到目录:
mv ch4_training_images imgs/training
mv ch4_test_images imgs/test
# 移动标注到目录:
mv ch4_training_localization_transcription_gt annotations/training
mv Challenge4_Test_Task1_GT annotations/test
```
- 第三步:下载 [instances_training.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_training.json) 和 [instances_test.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_test.json),并放入 `icdar2015` 文件夹里。或者也可以用以下命令直接生成 `instances_training.json``instances_test.json`:
```bash
python tools/data/textdet/icdar_converter.py /path/to/icdar2015 -o /path/to/icdar2015 -d icdar2015 --split-list training test
```
### ICDAR 2017
- 与上述步骤类似。
### CTW1500
- 第一步:执行以下命令,从 [下载地址](https://github.com/Yuliang-Liu/Curve-Text-Detector) 下载 `train_images.zip``test_images.zip``train_labels.zip``test_labels.zip` 四个文件并配置到对应目录:
```bash
mkdir ctw1500 && cd ctw1500
mkdir imgs && mkdir annotations
# 下载并配置标注
cd annotations
wget -O train_labels.zip https://universityofadelaide.box.com/shared/static/jikuazluzyj4lq6umzei7m2ppmt3afyw.zip
wget -O test_labels.zip https://cloudstor.aarnet.edu.au/plus/s/uoeFl0pCN9BOCN5/download
unzip train_labels.zip && mv ctw1500_train_labels training
unzip test_labels.zip -d test
cd ..
# 下载并配置数据
cd imgs
wget -O train_images.zip https://universityofadelaide.box.com/shared/static/py5uwlfyyytbb2pxzq9czvu6fuqbjdh8.zip
wget -O test_images.zip https://universityofadelaide.box.com/shared/static/t4w48ofnqkdw7jyc4t11nsukoeqk9c3d.zip
unzip train_images.zip && mv train_images training
unzip test_images.zip && mv test_images test
```
- 第二步:执行以下命令,生成 `instances_training.json``instances_test.json`
```bash
python tools/data/textdet/ctw1500_converter.py /path/to/ctw1500 -o /path/to/ctw1500 --split-list training test
```
### SynthText
- 下载 [data.mdb](https://download.openmmlab.com/mmocr/data/synthtext/instances_training.lmdb/data.mdb) 和 [lock.mdb](https://download.openmmlab.com/mmocr/data/synthtext/instances_training.lmdb/lock.mdb) 并放置到 `synthtext/instances_training.lmdb/` 中.
### TextOCR
- 第一步:下载 [train_val_images.zip](https://dl.fbaipublicfiles.com/textvqa/images/train_val_images.zip)[TextOCR_0.1_train.json](https://dl.fbaipublicfiles.com/textvqa/data/textocr/TextOCR_0.1_train.json) 和 [TextOCR_0.1_val.json](https://dl.fbaipublicfiles.com/textvqa/data/textocr/TextOCR_0.1_val.json) 到 `textocr` 文件夹里。
```bash
mkdir textocr && cd textocr
# 下载 TextOCR 数据集
wget https://dl.fbaipublicfiles.com/textvqa/images/train_val_images.zip
wget https://dl.fbaipublicfiles.com/textvqa/data/textocr/TextOCR_0.1_train.json
wget https://dl.fbaipublicfiles.com/textvqa/data/textocr/TextOCR_0.1_val.json
# 把图片移到对应目录
unzip -q train_val_images.zip
mv train_images train
```
- 第二步:生成 `instances_training.json``instances_val.json`:
```bash
python tools/data/textdet/textocr_converter.py /path/to/textocr
```
### Totaltext
- 第一步:从 [github dataset](https://github.com/cs-chan/Total-Text-Dataset/tree/master/Dataset) 下载 `totaltext.zip`,从 [github Groundtruth](https://github.com/cs-chan/Total-Text-Dataset/tree/master/Groundtruth/Text) 下载 `groundtruth_text.zip` 。(建议下载 `.mat` 格式的标注文件,因为我们提供的标注格式转换脚本 `totaltext_converter.py` 仅支持 `.mat` 格式。)
```bash
mkdir totaltext && cd totaltext
mkdir imgs && mkdir annotations
# 图像
# 在 ./totaltext 中执行
unzip totaltext.zip
mv Images/Train imgs/training
mv Images/Test imgs/test
# 标注文件
unzip groundtruth_text.zip
cd Groundtruth
mv Polygon/Train ../annotations/training
mv Polygon/Test ../annotations/test
```
- 第二步:用以下命令生成 `instances_training.json``instances_test.json`
```bash
python tools/data/textdet/totaltext_converter.py /path/to/totaltext -o /path/to/totaltext --split-list training test
```

View File

@ -1,314 +0,0 @@
# 文字识别
```{warning}
该页面版本落后于英文版文档,请切换至英文阅读最新文档。
```
```{note}
我们正努力往 [Dataset Preparer](./dataset_preparer.md) 中增加更多数据集。对于 [Dataset Preparer](./dataset_preparer.md) 暂未能完整支持的数据集,本页提供了一系列手动下载的步骤,供有需要的用户使用。
```
## 概览
**文字识别任务的数据集应按如下目录配置:**
```text
├── mixture
│   ├── coco_text
│ │ ├── train_label.txt
│ │ ├── train_words
│   ├── icdar_2011
│ │ ├── training_label.txt
│ │ ├── Challenge1_Training_Task3_Images_GT
│   ├── icdar_2013
│ │ ├── train_label.txt
│ │ ├── test_label_1015.txt
│ │ ├── test_label_1095.txt
│ │ ├── Challenge2_Training_Task3_Images_GT
│ │ ├── Challenge2_Test_Task3_Images
│   ├── icdar_2015
│ │ ├── train_label.txt
│ │ ├── test_label.txt
│ │ ├── ch4_training_word_images_gt
│ │ ├── ch4_test_word_images_gt
│   ├── III5K
│ │ ├── train_label.txt
│ │ ├── test_label.txt
│ │ ├── train
│ │ ├── test
│   ├── ct80
│ │ ├── test_label.txt
│ │ ├── image
│   ├── svt
│ │ ├── test_label.txt
│ │ ├── image
│   ├── svtp
│ │ ├── test_label.txt
│ │ ├── image
│   ├── Syn90k
│ │ ├── shuffle_labels.txt
│ │ ├── label.txt
│ │ ├── label.lmdb
│ │ ├── mnt
│   ├── SynthText
│ │ ├── alphanumeric_labels.txt
│ │ ├── shuffle_labels.txt
│ │ ├── instances_train.txt
│ │ ├── label.txt
│ │ ├── label.lmdb
│ │ ├── synthtext
│   ├── SynthAdd
│ │ ├── label.txt
│ │ ├── label.lmdb
│ │ ├── SynthText_Add
│   ├── TextOCR
│ │ ├── image
│ │ ├── train_label.txt
│ │ ├── val_label.txt
│   ├── Totaltext
│ │ ├── imgs
│ │ ├── annotations
│ │ ├── train_label.txt
│ │ ├── test_label.txt
│   ├── OpenVINO
│ │ ├── image_1
│ │ ├── image_2
│ │ ├── image_5
│ │ ├── image_f
│ │ ├── image_val
│ │ ├── train_1_label.txt
│ │ ├── train_2_label.txt
│ │ ├── train_5_label.txt
│ │ ├── train_f_label.txt
│ │ ├── val_label.txt
```
| 数据集名称 | 数据图片 | 标注文件 | 标注文件 |
| :-------------------: | :---------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: |
| | | 训练集(training) | 测试集(test) |
| coco_text | [下载地址](https://rrc.cvc.uab.es/?ch=5&com=downloads) | [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/coco_text/train_label.txt) | - |
| icdar_2011 | [下载地址](http://www.cvc.uab.es/icdar2011competition/?com=downloads) | [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2015/train_label.txt) | - |
| icdar_2013 | [下载地址](https://rrc.cvc.uab.es/?ch=2&com=downloads) | [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2013/train_label.txt) | [test_label_1015.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2013/test_label_1015.txt) |
| icdar_2015 | [下载地址](https://rrc.cvc.uab.es/?ch=4&com=downloads) | [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2015/train_label.txt) | [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2015/test_label.txt) |
| IIIT5K | [下载地址](http://cvit.iiit.ac.in/projects/SceneTextUnderstanding/IIIT5K.html) | [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/IIIT5K/train_label.txt) | [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/IIIT5K/test_label.txt) |
| ct80 | [下载地址](http://cs-chan.com/downloads_CUTE80_dataset.html) | - | [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/ct80/test_label.txt) |
| svt | [下载地址](http://www.iapr-tc11.org/mediawiki/index.php/The_Street_View_Text_Dataset) | - | [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/svt/test_label.txt) |
| svtp | [非官方下载地址\*](https://github.com/Jyouhou/Case-Sensitive-Scene-Text-Recognition-Datasets) | - | [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/svtp/test_label.txt) |
| MJSynth (Syn90k) | [下载地址](https://www.robots.ox.ac.uk/~vgg/data/text/) | [shuffle_labels.txt](https://download.openmmlab.com/mmocr/data/mixture/Syn90k/shuffle_labels.txt) \| [label.txt](https://download.openmmlab.com/mmocr/data/mixture/Syn90k/label.txt) | - |
| SynthText (Synth800k) | [下载地址](https://www.robots.ox.ac.uk/~vgg/data/scenetext/) | [alphanumeric_labels.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/alphanumeric_labels.txt) \| [shuffle_labels.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/shuffle_labels.txt) \| [instances_train.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/instances_train.txt) \| [label.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/label.txt) | - |
| SynthAdd | [SynthText_Add.zip](https://pan.baidu.com/s/1uV0LtoNmcxbO-0YA7Ch4dg) (code:627x) | [label.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthAdd/label.txt) | - |
| TextOCR | [下载地址](https://textvqa.org/textocr/dataset) | - | - |
| Totaltext | [下载地址](https://github.com/cs-chan/Total-Text-Dataset) | - | - |
| OpenVINO | [下载地址](https://github.com/cvdfoundation/open-images-dataset) | [下载地址](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) | [下载地址](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) |
(\*) 注:由于官方的下载地址已经无法访问,我们提供了一个非官方的地址以供参考,但我们无法保证数据的准确性。
对于中国境内的用户,我们也推荐使用开源数据平台[OpenDataLab](https://opendatalab.com/)来获取这些数据集,以获得更好的下载体验:
- [icdar_2013](https://opendatalab.com/ICDAR_2013?source=OpenMMLab%20GitHub)
- [icdar_2015](https://opendatalab.com/ICDAR2015?source=OpenMMLab%20GitHub)
- [IIIT5K](https://opendatalab.com/IIIT_5K?source=OpenMMLab%20GitHub)
- [ct80](https://opendatalab.com/CUTE_80?source=OpenMMLab%20GitHub)
- [svt](https://opendatalab.com/SVT?source=OpenMMLab%20GitHub)
- [Totaltext](https://opendatalab.com/TotalText?source=OpenMMLab%20GitHub)
- [IAM](https://opendatalab.com/IAM_Handwriting?source=OpenMMLab%20GitHub)
## 准备步骤
### ICDAR 2013
- 第一步:从 [下载地址](https://rrc.cvc.uab.es/?ch=2&com=downloads) 下载 `Challenge2_Test_Task3_Images.zip``Challenge2_Training_Task3_Images_GT.zip`
- 第二步:下载 [test_label_1015.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2013/test_label_1015.txt) 和 [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2013/train_label.txt)
### ICDAR 2015
- 第一步:从 [下载地址](https://rrc.cvc.uab.es/?ch=4&com=downloads) 下载 `ch4_training_word_images_gt.zip``ch4_test_word_images_gt.zip`
- 第二步:下载 [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2015/train_label.txt) and [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2015/test_label.txt)
### IIIT5K
- 第一步:从 [下载地址](http://cvit.iiit.ac.in/projects/SceneTextUnderstanding/IIIT5K.html) 下载 `IIIT5K-Word_V3.0.tar.gz`
- 第二步:下载 [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/IIIT5K/train_label.txt) 和 [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/IIIT5K/test_label.txt)
### svt
- 第一步:从 [下载地址](http://www.iapr-tc11.org/mediawiki/index.php/The_Street_View_Text_Dataset) 下载 `svt.zip`
- 第二步:下载 [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/svt/test_label.txt)
- 第三步:
```bash
python tools/data/textrecog/svt_converter.py <download_svt_dir_path>
```
### ct80
- 第一步:下载 [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/ct80/test_label.txt)
### svtp
- 第一步:下载 [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/svtp/test_label.txt)
### coco_text
- 第一步:从 [下载地址](https://rrc.cvc.uab.es/?ch=5&com=downloads) 下载文件
- 第二步:下载 [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/coco_text/train_label.txt)
### MJSynth (Syn90k)
- 第一步:从 [下载地址](https://www.robots.ox.ac.uk/~vgg/data/text/) 下载 `mjsynth.tar.gz`
- 第二步:下载 [shuffle_labels.txt](https://download.openmmlab.com/mmocr/data/mixture/Syn90k/shuffle_labels.txt)
- 第三步:
```bash
mkdir Syn90k && cd Syn90k
mv /path/to/mjsynth.tar.gz .
tar -xzf mjsynth.tar.gz
mv /path/to/shuffle_labels.txt .
mv /path/to/label.txt .
# 创建软链接
cd /path/to/mmocr/data/mixture
ln -s /path/to/Syn90k Syn90k
```
### SynthText (Synth800k)
- 第一步:下载 `SynthText.zip`: [下载地址](https://www.robots.ox.ac.uk/~vgg/data/scenetext/)
- 第二步:请根据你的实际需要,从下列标注中选择最适合的下载:[label.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/label.txt) 7,266,686个标注 [shuffle_labels.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/shuffle_labels.txt) 2,400,000个随机采样的标注[alphanumeric_labels.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/alphanumeric_labels.txt) 7,239,272个仅包含数字和字母的标注[instances_train.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/instances_train.txt) 7,266,686个字符级别的标注
- 第三步:
```bash
mkdir SynthText && cd SynthText
mv /path/to/SynthText.zip .
unzip SynthText.zip
mv SynthText synthtext
mv /path/to/shuffle_labels.txt .
mv /path/to/label.txt .
mv /path/to/alphanumeric_labels.txt .
mv /path/to/instances_train.txt .
# 创建软链接
cd /path/to/mmocr/data/mixture
ln -s /path/to/SynthText SynthText
```
- 第四步:生成裁剪后的图像和标注:
```bash
cd /path/to/mmocr
python tools/data/textrecog/synthtext_converter.py data/mixture/SynthText/gt.mat data/mixture/SynthText/ data/mixture/SynthText/synthtext/SynthText_patch_horizontal --n_proc 8
```
### SynthAdd
- 第一步:从 [SynthAdd](https://pan.baidu.com/s/1uV0LtoNmcxbO-0YA7Ch4dg) (code:627x) 下载 `SynthText_Add.zip`
- 第二步:下载 [label.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthAdd/label.txt)
- 第三步:
`````bash
mkdir SynthAdd && cd SynthAdd
mv /path/to/SynthText_Add.zip .
unzip SynthText_Add.zip
mv /path/to/label.txt .
# 创建软链接
cd /path/to/mmocr/data/mixture
````{tip}
运行以下命令,可以把 `.txt` 格式的标注文件转换成 `.lmdb` 格式:
```bash
python tools/data/utils/txt2lmdb.py -i <txt_label_path> -o <lmdb_label_path>
`````
例如:
```bash
python tools/data/utils/txt2lmdb.py -i data/mixture/Syn90k/label.txt -o data/mixture/Syn90k/label.lmdb
```
````
### TextOCR
- 第一步:下载 [train_val_images.zip](https://dl.fbaipublicfiles.com/textvqa/images/train_val_images.zip)[TextOCR_0.1_train.json](https://dl.fbaipublicfiles.com/textvqa/data/textocr/TextOCR_0.1_train.json) 和 [TextOCR_0.1_val.json](https://dl.fbaipublicfiles.com/textvqa/data/textocr/TextOCR_0.1_val.json) 到 `textocr/` 目录.
```bash
mkdir textocr && cd textocr
# 下载 TextOCR 数据集
wget https://dl.fbaipublicfiles.com/textvqa/images/train_val_images.zip
wget https://dl.fbaipublicfiles.com/textvqa/data/textocr/TextOCR_0.1_train.json
wget https://dl.fbaipublicfiles.com/textvqa/data/textocr/TextOCR_0.1_val.json
# 对于数据图像
unzip -q train_val_images.zip
mv train_images train
```
- 第二步:用四个并行进程剪裁图像然后生成 `train_label.txt``val_label.txt` ,可以使用以下命令:
```bash
python tools/data/textrecog/textocr_converter.py /path/to/textocr 4
```
### Totaltext
- 第一步:从 [github dataset](https://github.com/cs-chan/Total-Text-Dataset/tree/master/Dataset) 下载 `totaltext.zip`,然后从 [github Groundtruth](https://github.com/cs-chan/Total-Text-Dataset/tree/master/Groundtruth/Text) 下载 `groundtruth_text.zip` (我们建议下载 `.mat` 格式的标注文件,因为我们提供的 `totaltext_converter.py` 标注格式转换工具只支持 `.mat` 文件)
```bash
mkdir totaltext && cd totaltext
mkdir imgs && mkdir annotations
# 对于图像数据
# 在 ./totaltext 目录下运行
unzip totaltext.zip
mv Images/Train imgs/training
mv Images/Test imgs/test
# 对于标注文件
unzip groundtruth_text.zip
cd Groundtruth
mv Polygon/Train ../annotations/training
mv Polygon/Test ../annotations/test
```
- 第二步:用以下命令生成经剪裁后的标注文件 `train_label.txt``test_label.txt` (剪裁后的图像会被保存在目录 `data/totaltext/dst_imgs/`
```bash
python tools/data/textrecog/totaltext_converter.py /path/to/totaltext -o /path/to/totaltext --split-list training test
```
### OpenVINO
- 第零步:安装 [awscli](https://aws.amazon.com/cli/)。
- 第一步:下载 [Open Images](https://github.com/cvdfoundation/open-images-dataset#download-images-with-bounding-boxes-annotations) 的子数据集 `train_1``train_2``train_5``train_f``validation``openvino/`
```bash
mkdir openvino && cd openvino
# 下载 Open Images 的子数据集
for s in 1 2 5 f; do
aws s3 --no-sign-request cp s3://open-images-dataset/tar/train_${s}.tar.gz .
done
aws s3 --no-sign-request cp s3://open-images-dataset/tar/validation.tar.gz .
# 下载标注文件
for s in 1 2 5 f; do
wget https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text/text_spotting_openimages_v5_train_${s}.json
done
wget https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text/text_spotting_openimages_v5_validation.json
# 解压数据集
mkdir -p openimages_v5/val
for s in 1 2 5 f; do
tar zxf train_${s}.tar.gz -C openimages_v5
done
tar zxf validation.tar.gz -C openimages_v5/val
```
- 第二步: 运行以下的命令以用4个进程生成标注 `train_{1,2,5,f}_label.txt``val_label.txt` 并裁剪原图:
```bash
python tools/data/textrecog/openvino_converter.py /path/to/openvino 4
```
````

View File

@ -1,233 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import glob
import os.path as osp
import xml.etree.ElementTree as ET
from functools import partial
import mmcv
import mmengine
import numpy as np
from shapely.geometry import Polygon
from mmocr.utils import dump_ocr_data, list_from_file
def collect_files(img_dir, gt_dir, split):
"""Collect all images and their corresponding groundtruth files.
Args:
img_dir(str): The image directory
gt_dir(str): The groundtruth directory
split(str): The split of dataset. Namely: training or test
Returns:
files(list): The list of tuples (img_file, groundtruth_file)
"""
assert isinstance(img_dir, str)
assert img_dir
assert isinstance(gt_dir, str)
assert gt_dir
# note that we handle png and jpg only. Pls convert others such as gif to
# jpg or png offline
suffixes = ['.png', '.PNG', '.jpg', '.JPG', '.jpeg', '.JPEG']
imgs_list = []
for suffix in suffixes:
imgs_list.extend(glob.glob(osp.join(img_dir, '*' + suffix)))
files = []
if split == 'training':
for img_file in imgs_list:
gt_file = gt_dir + '/' + osp.splitext(
osp.basename(img_file))[0] + '.xml'
files.append((img_file, gt_file))
assert len(files), f'No images found in {img_dir}'
print(f'Loaded {len(files)} images from {img_dir}')
elif split == 'test':
for img_file in imgs_list:
gt_file = gt_dir + '/000' + osp.splitext(
osp.basename(img_file))[0] + '.txt'
files.append((img_file, gt_file))
assert len(files), f'No images found in {img_dir}'
print(f'Loaded {len(files)} images from {img_dir}')
return files
def collect_annotations(files, split, nproc=1):
"""Collect the annotation information.
Args:
files(list): The list of tuples (image_file, groundtruth_file)
split(str): The split of dataset. Namely: training or test
nproc(int): The number of process to collect annotations
Returns:
images(list): The list of image information dicts
"""
assert isinstance(files, list)
assert isinstance(split, str)
assert isinstance(nproc, int)
load_img_info_with_split = partial(load_img_info, split=split)
if nproc > 1:
images = mmengine.track_parallel_progress(
load_img_info_with_split, files, nproc=nproc)
else:
images = mmengine.track_progress(load_img_info_with_split, files)
return images
def load_txt_info(gt_file, img_info):
anno_info = []
for line in list_from_file(gt_file):
# each line has one ploygen (n vetices), and one text.
# e.g., 695,885,866,888,867,1146,696,1143,####Latin 9
line = line.strip()
strs = line.split(',')
category_id = 1
assert strs[28][0] == '#'
xy = [int(x) for x in strs[0:28]]
assert len(xy) == 28
coordinates = np.array(xy).reshape(-1, 2)
polygon = Polygon(coordinates)
iscrowd = 0
area = polygon.area
# convert to COCO style XYWH format
min_x, min_y, max_x, max_y = polygon.bounds
bbox = [min_x, min_y, max_x - min_x, max_y - min_y]
text = strs[28][4:]
anno = dict(
iscrowd=iscrowd,
category_id=category_id,
bbox=bbox,
area=area,
text=text,
segmentation=[xy])
anno_info.append(anno)
img_info.update(anno_info=anno_info)
return img_info
def load_xml_info(gt_file, img_info):
obj = ET.parse(gt_file)
anno_info = []
for image in obj.getroot(): # image
for box in image: # image
h = box.attrib['height']
w = box.attrib['width']
x = box.attrib['left']
y = box.attrib['top']
text = box[0].text
segs = box[1].text
pts = segs.strip().split(',')
pts = [int(x) for x in pts]
assert len(pts) == 28
# pts = []
# for iter in range(2,len(box)):
# pts.extend([int(box[iter].attrib['x']),
# int(box[iter].attrib['y'])])
iscrowd = 0
category_id = 1
bbox = [int(x), int(y), int(w), int(h)]
coordinates = np.array(pts).reshape(-1, 2)
polygon = Polygon(coordinates)
area = polygon.area
anno = dict(
iscrowd=iscrowd,
category_id=category_id,
bbox=bbox,
area=area,
text=text,
segmentation=[pts])
anno_info.append(anno)
img_info.update(anno_info=anno_info)
return img_info
def load_img_info(files, split):
"""Load the information of one image.
Args:
files(tuple): The tuple of (img_file, groundtruth_file)
split(str): The split of dataset: training or test
Returns:
img_info(dict): The dict of the img and annotation information
"""
assert isinstance(files, tuple)
assert isinstance(split, str)
img_file, gt_file = files
# read imgs with ignoring orientations
img = mmcv.imread(img_file, 'unchanged')
split_name = osp.basename(osp.dirname(img_file))
img_info = dict(
# remove img_prefix for filename
file_name=osp.join(split_name, osp.basename(img_file)),
height=img.shape[0],
width=img.shape[1],
# anno_info=anno_info,
segm_file=osp.join(split_name, osp.basename(gt_file)))
if split == 'training':
img_info = load_xml_info(gt_file, img_info)
elif split == 'test':
img_info = load_txt_info(gt_file, img_info)
else:
raise NotImplementedError
return img_info
def parse_args():
parser = argparse.ArgumentParser(
description='Convert ctw1500 annotations to COCO format')
parser.add_argument('root_path', help='ctw1500 root path')
parser.add_argument('-o', '--out-dir', help='output path')
parser.add_argument(
'--split-list',
nargs='+',
help='a list of splits. e.g., "--split-list training test"')
parser.add_argument(
'--nproc', default=1, type=int, help='number of process')
args = parser.parse_args()
return args
def main():
args = parse_args()
root_path = args.root_path
out_dir = args.out_dir if args.out_dir else root_path
mmengine.mkdir_or_exist(out_dir)
img_dir = osp.join(root_path, 'imgs')
gt_dir = osp.join(root_path, 'annotations')
set_name = {}
for split in args.split_list:
set_name.update({split: 'instances_' + split + '.json'})
assert osp.exists(osp.join(img_dir, split))
for split, json_name in set_name.items():
print(f'Converting {split} into {json_name}')
with mmengine.Timer(
print_tmpl='It takes {}s to convert icdar annotation'):
files = collect_files(
osp.join(img_dir, split), osp.join(gt_dir, split), split)
image_infos = collect_annotations(files, split, nproc=args.nproc)
dump_ocr_data(image_infos, osp.join(out_dir, json_name), 'textdet')
if __name__ == '__main__':
main()

View File

@ -1,167 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import os.path as osp
import mmcv
import mmengine
from mmocr.utils import dump_ocr_data
def collect_files(img_dir, gt_dir, split):
"""Collect all images and their corresponding groundtruth files.
Args:
img_dir (str): The image directory
gt_dir (str): The groundtruth directory
Returns:
files (list): The list of tuples (img_file, groundtruth_file)
"""
assert isinstance(img_dir, str)
assert img_dir
assert isinstance(gt_dir, str)
assert gt_dir
ann_list, imgs_list, splits = [], [], []
for img in os.listdir(img_dir):
img_path = osp.join(img_dir, img)
imgs_list.append(img_path)
ann_list.append(osp.join(gt_dir, 'gt_' + img.split('.')[0] + '.txt'))
splits.append(split)
files = list(zip(sorted(imgs_list), sorted(ann_list), splits))
assert len(files), f'No images found in {img_dir}'
print(f'Loaded {len(files)} images from {img_dir}')
return files
def collect_annotations(files, nproc=1):
"""Collect the annotation information.
Args:
files (list): The list of tuples (image_file, groundtruth_file)
nproc (int): The number of process to collect annotations
Returns:
images (list): The list of image information dicts
"""
assert isinstance(files, list)
assert isinstance(nproc, int)
if nproc > 1:
images = mmengine.track_parallel_progress(
load_img_info, files, nproc=nproc)
else:
images = mmengine.track_progress(load_img_info, files)
return images
def load_img_info(files):
"""Load the information of one image.
Args:
files (tuple): The tuple of (img_file, groundtruth_file, split)
Returns:
img_info (dict): The dict of the img and annotation information
"""
assert isinstance(files, tuple)
img_file, gt_file, split = files
# read imgs while ignoring orientations
img = mmcv.imread(img_file, 'unchanged')
img_info = dict(
file_name=osp.join(osp.basename(img_file)),
height=img.shape[0],
width=img.shape[1],
segm_file=osp.join(osp.basename(gt_file)))
# IC13 uses different separator in gt files
if split == 'training':
separator = ' '
elif split == 'test':
separator = ','
else:
raise NotImplementedError
if osp.splitext(gt_file)[1] == '.txt':
img_info = load_txt_info(gt_file, img_info, separator)
else:
raise NotImplementedError
return img_info
def load_txt_info(gt_file, img_info, separator):
"""Collect the annotation information.
The annotation format is as the following:
[train]
left top right bottom "transcription"
[test]
left, top, right, bottom, "transcription"
Args:
gt_file (str): The path to ground-truth
img_info (dict): The dict of the img and annotation information
Returns:
img_info (dict): The dict of the img and annotation information
"""
anno_info = []
with open(gt_file) as f:
lines = f.readlines()
for line in lines:
xmin, ymin, xmax, ymax = line.split(separator)[0:4]
x = max(0, int(xmin))
y = max(0, int(ymin))
w = int(xmax) - x
h = int(ymax) - y
bbox = [x, y, w, h]
segmentation = [x, y, x + w, y, x + w, y + h, x, y + h]
anno = dict(
iscrowd=0,
category_id=1,
bbox=bbox,
area=w * h,
segmentation=[segmentation])
anno_info.append(anno)
img_info.update(anno_info=anno_info)
return img_info
def parse_args():
parser = argparse.ArgumentParser(
description='Generate training and test set of IC13')
parser.add_argument('root_path', help='Root dir path of IC13')
parser.add_argument(
'--nproc', default=1, type=int, help='Number of process')
args = parser.parse_args()
return args
def main():
args = parse_args()
root_path = args.root_path
for split in ['training', 'test']:
print(f'Processing {split} set...')
with mmengine.Timer(
print_tmpl='It takes {}s to convert IC13 annotation'):
files = collect_files(
osp.join(root_path, 'imgs', split),
osp.join(root_path, 'annotations', split), split)
image_infos = collect_annotations(files, nproc=args.nproc)
dump_ocr_data(image_infos,
osp.join(root_path, 'instances_' + split + '.json'),
'textdet')
if __name__ == '__main__':
main()

View File

@ -1,185 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import glob
import os.path as osp
from functools import partial
import mmcv
import mmengine
import numpy as np
from shapely.geometry import Polygon
from mmocr.utils import dump_ocr_data, list_from_file
def collect_files(img_dir, gt_dir):
"""Collect all images and their corresponding groundtruth files.
Args:
img_dir(str): The image directory
gt_dir(str): The groundtruth directory
Returns:
files(list): The list of tuples (img_file, groundtruth_file)
"""
assert isinstance(img_dir, str)
assert img_dir
assert isinstance(gt_dir, str)
assert gt_dir
# note that we handle png and jpg only. Pls convert others such as gif to
# jpg or png offline
suffixes = ['.png', '.PNG', '.jpg', '.JPG', '.jpeg', '.JPEG']
imgs_list = []
for suffix in suffixes:
imgs_list.extend(glob.glob(osp.join(img_dir, '*' + suffix)))
files = []
for img_file in imgs_list:
gt_file = gt_dir + '/gt_' + osp.splitext(
osp.basename(img_file))[0] + '.txt'
files.append((img_file, gt_file))
assert len(files), f'No images found in {img_dir}'
print(f'Loaded {len(files)} images from {img_dir}')
return files
def collect_annotations(files, dataset, nproc=1):
"""Collect the annotation information.
Args:
files(list): The list of tuples (image_file, groundtruth_file)
dataset(str): The dataset name, icdar2015 or icdar2017
nproc(int): The number of process to collect annotations
Returns:
images(list): The list of image information dicts
"""
assert isinstance(files, list)
assert isinstance(dataset, str)
assert dataset
assert isinstance(nproc, int)
load_img_info_with_dataset = partial(load_img_info, dataset=dataset)
if nproc > 1:
images = mmengine.track_parallel_progress(
load_img_info_with_dataset, files, nproc=nproc)
else:
images = mmengine.track_progress(load_img_info_with_dataset, files)
return images
def load_img_info(files, dataset):
"""Load the information of one image.
Args:
files(tuple): The tuple of (img_file, groundtruth_file)
dataset(str): Dataset name, icdar2015 or icdar2017
Returns:
img_info(dict): The dict of the img and annotation information
"""
assert isinstance(files, tuple)
assert isinstance(dataset, str)
assert dataset
img_file, gt_file = files
# read imgs with ignoring orientations
img = mmcv.imread(img_file, 'unchanged')
if dataset == 'icdar2017':
gt_list = list_from_file(gt_file)
elif dataset == 'icdar2015':
gt_list = list_from_file(gt_file, encoding='utf-8-sig')
else:
raise NotImplementedError(f'Not support {dataset}')
anno_info = []
for line in gt_list:
# each line has one ploygen (4 vetices), and others.
# e.g., 695,885,866,888,867,1146,696,1143,Latin,9
line = line.strip()
strs = line.split(',')
category_id = 1
xy = [int(x) for x in strs[0:8]]
coordinates = np.array(xy).reshape(-1, 2)
polygon = Polygon(coordinates)
iscrowd = 0
# set iscrowd to 1 to ignore 1.
if (dataset == 'icdar2015'
and strs[8] == '###') or (dataset == 'icdar2017'
and strs[9] == '###'):
iscrowd = 1
print('ignore text')
area = polygon.area
# convert to COCO style XYWH format
min_x, min_y, max_x, max_y = polygon.bounds
bbox = [min_x, min_y, max_x - min_x, max_y - min_y]
anno = dict(
iscrowd=iscrowd,
category_id=category_id,
bbox=bbox,
area=area,
segmentation=[xy])
anno_info.append(anno)
split_name = osp.basename(osp.dirname(img_file))
img_info = dict(
# remove img_prefix for filename
file_name=osp.join(split_name, osp.basename(img_file)),
height=img.shape[0],
width=img.shape[1],
anno_info=anno_info,
segm_file=osp.join(split_name, osp.basename(gt_file)))
return img_info
def parse_args():
parser = argparse.ArgumentParser(
description='Convert Icdar2015 or Icdar2017 annotations to COCO format'
)
parser.add_argument('icdar_path', help='icdar root path')
parser.add_argument('-o', '--out-dir', help='output path')
parser.add_argument(
'-d', '--dataset', required=True, help='icdar2017 or icdar2015')
parser.add_argument(
'--split-list',
nargs='+',
help='a list of splits. e.g., "--split-list training test"')
parser.add_argument(
'--nproc', default=1, type=int, help='number of process')
args = parser.parse_args()
return args
def main():
args = parse_args()
icdar_path = args.icdar_path
out_dir = args.out_dir if args.out_dir else icdar_path
mmengine.mkdir_or_exist(out_dir)
img_dir = osp.join(icdar_path, 'imgs')
gt_dir = osp.join(icdar_path, 'annotations')
set_name = {}
for split in args.split_list:
set_name.update({split: 'instances_' + split + '.json'})
assert osp.exists(osp.join(img_dir, split))
for split, json_name in set_name.items():
print(f'Converting {split} into {json_name}')
with mmengine.Timer(
print_tmpl='It takes {}s to convert icdar annotation'):
files = collect_files(
osp.join(img_dir, split), osp.join(gt_dir, split))
image_infos = collect_annotations(
files, args.dataset, nproc=args.nproc)
dump_ocr_data(image_infos, osp.join(out_dir, json_name), 'textdet')
if __name__ == '__main__':
main()

View File

@ -1,181 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import json
import os.path as osp
import time
import lmdb
import mmcv
import mmengine
import numpy as np
from scipy.io import loadmat
from shapely.geometry import Polygon
from mmocr.utils import check_argument
def trace_boundary(char_boxes):
"""Trace the boundary point of text.
Args:
char_boxes (list[ndarray]): The char boxes for one text. Each element
is 4x2 ndarray.
Returns:
boundary (ndarray): The boundary point sets with size nx2.
"""
assert check_argument.is_type_list(char_boxes, np.ndarray)
# from top left to to right
p_top = [box[0:2] for box in char_boxes]
# from bottom right to bottom left
p_bottom = [
char_boxes[idx][[2, 3], :]
for idx in range(len(char_boxes) - 1, -1, -1)
]
p = p_top + p_bottom
boundary = np.concatenate(p).astype(int)
return boundary
def match_bbox_char_str(bboxes, char_bboxes, strs):
"""match the bboxes, char bboxes, and strs.
Args:
bboxes (ndarray): The text boxes of size (2, 4, num_box).
char_bboxes (ndarray): The char boxes of size (2, 4, num_char_box).
strs (ndarray): The string of size (num_strs,)
"""
assert isinstance(bboxes, np.ndarray)
assert isinstance(char_bboxes, np.ndarray)
assert isinstance(strs, np.ndarray)
bboxes = bboxes.astype(np.int32)
char_bboxes = char_bboxes.astype(np.int32)
if len(char_bboxes.shape) == 2:
char_bboxes = np.expand_dims(char_bboxes, axis=2)
char_bboxes = np.transpose(char_bboxes, (2, 1, 0))
if len(bboxes.shape) == 2:
bboxes = np.expand_dims(bboxes, axis=2)
bboxes = np.transpose(bboxes, (2, 1, 0))
chars = ''.join(strs).replace('\n', '').replace(' ', '')
num_boxes = bboxes.shape[0]
poly_list = [Polygon(bboxes[iter]) for iter in range(num_boxes)]
poly_box_list = [bboxes[iter] for iter in range(num_boxes)]
poly_char_list = [[] for iter in range(num_boxes)]
poly_char_idx_list = [[] for iter in range(num_boxes)]
poly_charbox_list = [[] for iter in range(num_boxes)]
words = []
for s in strs:
words += s.split()
words_len = [len(w) for w in words]
words_end_inx = np.cumsum(words_len)
start_inx = 0
for word_inx, end_inx in enumerate(words_end_inx):
for char_inx in range(start_inx, end_inx):
poly_char_idx_list[word_inx].append(char_inx)
poly_char_list[word_inx].append(chars[char_inx])
poly_charbox_list[word_inx].append(char_bboxes[char_inx])
start_inx = end_inx
for box_inx in range(num_boxes):
assert len(poly_charbox_list[box_inx]) > 0
poly_boundary_list = []
for item in poly_charbox_list:
boundary = np.ndarray((0, 2))
if len(item) > 0:
boundary = trace_boundary(item)
poly_boundary_list.append(boundary)
return (poly_list, poly_box_list, poly_boundary_list, poly_charbox_list,
poly_char_idx_list, poly_char_list)
def convert_annotations(root_path, gt_name, lmdb_name):
"""Convert the annotation into lmdb dataset.
Args:
root_path (str): The root path of dataset.
gt_name (str): The ground truth filename.
lmdb_name (str): The output lmdb filename.
"""
assert isinstance(root_path, str)
assert isinstance(gt_name, str)
assert isinstance(lmdb_name, str)
start_time = time.time()
gt = loadmat(gt_name)
img_num = len(gt['imnames'][0])
env = lmdb.open(lmdb_name, map_size=int(1e9 * 40))
with env.begin(write=True) as txn:
for img_id in range(img_num):
if img_id % 1000 == 0 and img_id > 0:
total_time_sec = time.time() - start_time
avg_time_sec = total_time_sec / img_id
eta_mins = (avg_time_sec * (img_num - img_id)) / 60
print(f'\ncurrent_img/total_imgs {img_id}/{img_num} | '
f'eta: {eta_mins:.3f} mins')
# for each img
img_file = osp.join(root_path, 'imgs', gt['imnames'][0][img_id][0])
img = mmcv.imread(img_file, 'unchanged')
height, width = img.shape[0:2]
img_json = {}
img_json['file_name'] = gt['imnames'][0][img_id][0]
img_json['height'] = height
img_json['width'] = width
img_json['annotations'] = []
wordBB = gt['wordBB'][0][img_id]
charBB = gt['charBB'][0][img_id]
txt = gt['txt'][0][img_id]
poly_list, _, poly_boundary_list, _, _, _ = match_bbox_char_str(
wordBB, charBB, txt)
for poly_inx in range(len(poly_list)):
polygon = poly_list[poly_inx]
min_x, min_y, max_x, max_y = polygon.bounds
bbox = [min_x, min_y, max_x - min_x, max_y - min_y]
anno_info = dict()
anno_info['iscrowd'] = 0
anno_info['category_id'] = 1
anno_info['bbox'] = bbox
anno_info['segmentation'] = [
poly_boundary_list[poly_inx].flatten().tolist()
]
img_json['annotations'].append(anno_info)
string = json.dumps(img_json)
txn.put(str(img_id).encode('utf8'), string.encode('utf8'))
key = b'total_number'
value = str(img_num).encode('utf8')
txn.put(key, value)
def parse_args():
parser = argparse.ArgumentParser(
description='Convert synthtext to lmdb dataset')
parser.add_argument('synthtext_path', help='synthetic root path')
parser.add_argument('-o', '--out-dir', help='output path')
args = parser.parse_args()
return args
# TODO: Refactor synthtext
def main():
args = parse_args()
synthtext_path = args.synthtext_path
out_dir = args.out_dir if args.out_dir else synthtext_path
mmengine.mkdir_or_exist(out_dir)
gt_name = osp.join(synthtext_path, 'gt.mat')
lmdb_name = 'synthtext.lmdb'
convert_annotations(synthtext_path, gt_name, osp.join(out_dir, lmdb_name))
if __name__ == '__main__':
main()

View File

@ -1,76 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import math
import os.path as osp
import mmengine
from mmocr.utils import dump_ocr_data
def parse_args():
parser = argparse.ArgumentParser(
description='Generate training and validation set of TextOCR ')
parser.add_argument('root_path', help='Root dir path of TextOCR')
args = parser.parse_args()
return args
def collect_textocr_info(root_path, annotation_filename, print_every=1000):
annotation_path = osp.join(root_path, annotation_filename)
if not osp.exists(annotation_path):
raise Exception(
f'{annotation_path} not exists, please check and try again.')
annotation = mmengine.load(annotation_path)
# img_idx = img_start_idx
img_infos = []
for i, img_info in enumerate(annotation['imgs'].values()):
if i > 0 and i % print_every == 0:
print(f'{i}/{len(annotation["imgs"].values())}')
img_info['segm_file'] = annotation_path
ann_ids = annotation['imgToAnns'][img_info['id']]
anno_info = []
for ann_id in ann_ids:
ann = annotation['anns'][ann_id]
# Ignore illegible or non-English words
text_label = ann['utf8_string']
iscrowd = 1 if text_label == '.' else 0
x, y, w, h = ann['bbox']
x, y = max(0, math.floor(x)), max(0, math.floor(y))
w, h = math.ceil(w), math.ceil(h)
bbox = [x, y, w, h]
segmentation = [max(0, int(x)) for x in ann['points']]
anno = dict(
iscrowd=iscrowd,
category_id=1,
bbox=bbox,
area=ann['area'],
segmentation=[segmentation])
anno_info.append(anno)
img_info.update(anno_info=anno_info)
img_infos.append(img_info)
return img_infos
def main():
args = parse_args()
root_path = args.root_path
print('Processing training set...')
training_infos = collect_textocr_info(root_path, 'TextOCR_0.1_train.json')
dump_ocr_data(training_infos,
osp.join(root_path, 'instances_training.json'), 'textdet')
print('Processing validation set...')
val_infos = collect_textocr_info(root_path, 'TextOCR_0.1_val.json')
dump_ocr_data(val_infos, osp.join(root_path, 'instances_val.json'),
'textdet')
print('Finish')
if __name__ == '__main__':
main()

View File

@ -1,410 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import glob
import os
import os.path as osp
import re
import cv2
import mmcv
import mmengine
import numpy as np
import scipy.io as scio
import yaml
from shapely.geometry import Polygon
from mmocr.utils import dump_ocr_data
def collect_files(img_dir, gt_dir):
"""Collect all images and their corresponding groundtruth files.
Args:
img_dir (str): The image directory
gt_dir (str): The groundtruth directory
Returns:
files (list): The list of tuples (img_file, groundtruth_file)
"""
assert isinstance(img_dir, str)
assert img_dir
assert isinstance(gt_dir, str)
assert gt_dir
# note that we handle png and jpg only. Pls convert others such as gif to
# jpg or png offline
suffixes = ['.png', '.PNG', '.jpg', '.JPG', '.jpeg', '.JPEG']
# suffixes = ['.png']
imgs_list = []
for suffix in suffixes:
imgs_list.extend(glob.glob(osp.join(img_dir, '*' + suffix)))
imgs_list = sorted(imgs_list)
ann_list = sorted(
osp.join(gt_dir, gt_file) for gt_file in os.listdir(gt_dir))
files = list(zip(imgs_list, ann_list))
assert len(files), f'No images found in {img_dir}'
print(f'Loaded {len(files)} images from {img_dir}')
return files
def collect_annotations(files, nproc=1):
"""Collect the annotation information.
Args:
files (list): The list of tuples (image_file, groundtruth_file)
nproc (int): The number of process to collect annotations
Returns:
images (list): The list of image information dicts
"""
assert isinstance(files, list)
assert isinstance(nproc, int)
if nproc > 1:
images = mmengine.track_parallel_progress(
load_img_info, files, nproc=nproc)
else:
images = mmengine.track_progress(load_img_info, files)
return images
def get_contours_mat(gt_path):
"""Get the contours and words for each ground_truth mat file.
Args:
gt_path (str): The relative path of the ground_truth mat file
Returns:
contours (list[lists]): A list of lists of contours
for the text instances
words (list[list]): A list of lists of words (string)
for the text instances
"""
assert isinstance(gt_path, str)
contours = []
words = []
data = scio.loadmat(gt_path)
# 'gt' for the latest version; 'polygt' for the legacy version
keys = data.keys()
if 'gt' in keys:
data_polygt = data.get('gt')
elif 'polygt' in keys:
data_polygt = data.get('polygt')
else:
raise NotImplementedError
for i, lines in enumerate(data_polygt):
X = np.array(lines[1])
Y = np.array(lines[3])
point_num = len(X[0])
word = lines[4]
if len(word) == 0 or word == '#':
word = '###'
else:
word = word[0]
words.append(word)
arr = np.concatenate([X, Y]).T
contour = []
for i in range(point_num):
contour.append(arr[i][0])
contour.append(arr[i][1])
contours.append(np.asarray(contour))
return contours, words
def load_mat_info(img_info, gt_file):
"""Load the information of one ground truth in .mat format.
Args:
img_info (dict): The dict of only the image information
gt_file (str): The relative path of the ground_truth mat
file for one image
Returns:
img_info(dict): The dict of the img and annotation information
"""
assert isinstance(img_info, dict)
assert isinstance(gt_file, str)
contours, texts = get_contours_mat(gt_file)
anno_info = []
for contour, text in zip(contours, texts):
if contour.shape[0] == 2:
continue
category_id = 1
coordinates = np.array(contour).reshape(-1, 2)
polygon = Polygon(coordinates)
iscrowd = 1 if text == '###' else 0
area = polygon.area
# convert to COCO style XYWH format
min_x, min_y, max_x, max_y = polygon.bounds
bbox = [min_x, min_y, max_x - min_x, max_y - min_y]
anno = dict(
iscrowd=iscrowd,
category_id=category_id,
bbox=bbox,
area=area,
text=text,
segmentation=[contour])
anno_info.append(anno)
img_info.update(anno_info=anno_info)
return img_info
def process_line(line, contours, words):
"""Get the contours and words by processing each line in the gt file.
Args:
line(str): The line in gt file containing annotation info
contours(list[lists]): A list of lists of contours
for the text instances
words(list[list]): A list of lists of words (string)
for the text instances
Returns:
contours (list[lists]): A list of lists of contours
for the text instances
words (list[list]): A list of lists of words (string)
for the text instances
"""
line = '{' + line.replace('[[', '[').replace(']]', ']') + '}'
ann_dict = re.sub('([0-9]) +([0-9])', r'\1,\2', line)
ann_dict = re.sub('([0-9]) +([ 0-9])', r'\1,\2', ann_dict)
ann_dict = re.sub('([0-9]) -([0-9])', r'\1,-\2', ann_dict)
ann_dict = ann_dict.replace("[u',']", "[u'#']")
ann_dict = yaml.safe_load(ann_dict)
X = np.array([ann_dict['x']])
Y = np.array([ann_dict['y']])
if len(ann_dict['transcriptions']) == 0:
word = '###'
else:
word = ann_dict['transcriptions'][0]
if len(ann_dict['transcriptions']) > 1:
for ann_word in ann_dict['transcriptions'][1:]:
word += ',' + ann_word
word = str(eval(word))
words.append(word)
point_num = len(X[0])
arr = np.concatenate([X, Y]).T
contour = []
for i in range(point_num):
contour.append(arr[i][0])
contour.append(arr[i][1])
contours.append(np.asarray(contour))
return contours, words
def get_contours_txt(gt_path):
"""Get the contours and words for each ground_truth txt file.
Args:
gt_path (str): The relative path of the ground_truth mat file
Returns:
contours (list[lists]): A list of lists of contours
for the text instances
words (list[list]): A list of lists of words (string)
for the text instances
"""
assert isinstance(gt_path, str)
contours = []
words = []
with open(gt_path) as f:
tmp_line = ''
for idx, line in enumerate(f):
line = line.strip()
if idx == 0:
tmp_line = line
continue
if not line.startswith('x:'):
tmp_line += ' ' + line
continue
else:
complete_line = tmp_line
tmp_line = line
contours, words = process_line(complete_line, contours, words)
if tmp_line != '':
contours, words = process_line(tmp_line, contours, words)
words = ['###' if word == '#' else word for word in words]
return contours, words
def load_txt_info(gt_file, img_info):
"""Load the information of one ground truth in .txt format.
Args:
img_info (dict): The dict of only the image information
gt_file (str): The relative path of the ground_truth mat
file for one image
Returns:
img_info(dict): The dict of the img and annotation information
"""
contours, texts = get_contours_txt(gt_file)
anno_info = []
for contour, text in zip(contours, texts):
if contour.shape[0] == 2:
continue
category_id = 1
coordinates = np.array(contour).reshape(-1, 2)
polygon = Polygon(coordinates)
iscrowd = 1 if text == '###' else 0
area = polygon.area
# convert to COCO style XYWH format
min_x, min_y, max_x, max_y = polygon.bounds
bbox = [min_x, min_y, max_x - min_x, max_y - min_y]
anno = dict(
iscrowd=iscrowd,
category_id=category_id,
bbox=bbox,
area=area,
text=text,
segmentation=[contour])
anno_info.append(anno)
img_info.update(anno_info=anno_info)
return img_info
def load_png_info(gt_file, img_info):
"""Load the information of one ground truth in .png format.
Args:
gt_file (str): The relative path of the ground_truth file for one image
img_info (dict): The dict of only the image information
Returns:
img_info (dict): The dict of the img and annotation information
"""
assert isinstance(gt_file, str)
assert isinstance(img_info, dict)
gt_img = cv2.imread(gt_file, 0)
contours, _ = cv2.findContours(gt_img, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
anno_info = []
for contour in contours:
if contour.shape[0] == 2:
continue
category_id = 1
xy = np.array(contour).flatten().tolist()
coordinates = np.array(contour).reshape(-1, 2)
polygon = Polygon(coordinates)
iscrowd = 0
area = polygon.area
# convert to COCO style XYWH format
min_x, min_y, max_x, max_y = polygon.bounds
bbox = [min_x, min_y, max_x - min_x, max_y - min_y]
anno = dict(
iscrowd=iscrowd,
category_id=category_id,
bbox=bbox,
area=area,
segmentation=[xy])
anno_info.append(anno)
img_info.update(anno_info=anno_info)
return img_info
def load_img_info(files):
"""Load the information of one image.
Args:
files (tuple): The tuple of (img_file, groundtruth_file)
Returns:
img_info (dict): The dict of the img and annotation information
"""
assert isinstance(files, tuple)
img_file, gt_file = files
# read imgs while ignoring orientations
img = mmcv.imread(img_file, 'unchanged')
split_name = osp.basename(osp.dirname(img_file))
img_info = dict(
# remove img_prefix for filename
file_name=osp.join(split_name, osp.basename(img_file)),
height=img.shape[0],
width=img.shape[1],
# anno_info=anno_info,
segm_file=osp.join(split_name, osp.basename(gt_file)))
if osp.splitext(gt_file)[1] == '.mat':
img_info = load_mat_info(img_info, gt_file)
elif osp.splitext(gt_file)[1] == '.txt':
img_info = load_txt_info(gt_file, img_info)
else:
raise NotImplementedError
return img_info
def parse_args():
parser = argparse.ArgumentParser(
description='Convert totaltext annotations to COCO format')
parser.add_argument('root_path', help='Totaltext root path')
parser.add_argument(
'--nproc', default=1, type=int, help='Number of process')
args = parser.parse_args()
return args
def main():
args = parse_args()
root_path = args.root_path
img_dir = osp.join(root_path, 'imgs')
gt_dir = osp.join(root_path, 'annotations')
set_name = {}
for split in ['training', 'test']:
set_name.update({split: 'instances_' + split + '.json'})
assert osp.exists(osp.join(img_dir, split))
for split, json_name in set_name.items():
print(f'Converting {split} into {json_name}')
with mmengine.Timer(
print_tmpl='It takes {}s to convert totaltext annotation'):
files = collect_files(
osp.join(img_dir, split), osp.join(gt_dir, split))
image_infos = collect_annotations(files, nproc=args.nproc)
dump_ocr_data(image_infos, osp.join(root_path, json_name),
'textdet')
if __name__ == '__main__':
main()

View File

@ -1,67 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os.path as osp
from mmocr.utils import dump_ocr_data
def convert_annotations(root_path, split):
"""Convert original annotations to mmocr format.
The annotation format is as the following:
word_1.png, "flying"
word_2.png, "today"
word_3.png, "means"
See the format of converted annotation in mmocr.utils.dump_ocr_data.
Args:
root_path (str): The root path of the dataset
split (str): The split of dataset. Namely: training or test
"""
assert isinstance(root_path, str)
assert isinstance(split, str)
img_info = []
with open(
osp.join(root_path, 'annotations',
f'Challenge2_{split}_Task3_GT.txt'),
encoding='"utf-8-sig') as f:
annos = f.readlines()
for anno in annos:
seg = ' ' if split == 'Test1015' else ', "'
# text may contain comma ','
dst_img_name, word = anno.split(seg)
word = word.replace('"\n', '')
img_info.append({
'file_name': osp.basename(dst_img_name),
'anno_info': [{
'text': word
}]
})
return img_info
def parse_args():
parser = argparse.ArgumentParser(
description='Generate training and test set of IC13')
parser.add_argument('root_path', help='Root dir path of IC13')
args = parser.parse_args()
return args
def main():
args = parse_args()
root_path = args.root_path
for split in ['Train', 'Test', 'Test1015']:
img_info = convert_annotations(root_path, split)
dump_ocr_data(img_info,
osp.join(root_path, f'{split.lower()}_label.json'),
'textrecog')
print(f'{split} split converted.')
if __name__ == '__main__':
main()

View File

@ -1,88 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
import os.path as osp
import xml.etree.ElementTree as ET
import cv2
from mmocr.utils import dump_ocr_data
def parse_args():
parser = argparse.ArgumentParser(
description='Generate testset of svt by cropping box image.')
parser.add_argument(
'root_path',
help='Root dir path of svt, where test.xml in,'
'for example, "data/mixture/svt/svt1/"')
parser.add_argument(
'--resize',
action='store_true',
help='Whether resize cropped image to certain size.')
parser.add_argument('--height', default=32, help='Resize height.')
parser.add_argument('--width', default=100, help='Resize width.')
args = parser.parse_args()
return args
def main():
args = parse_args()
root_path = args.root_path
# inputs
src_label_file = osp.join(root_path, 'test.xml')
if not osp.exists(src_label_file):
raise Exception(
f'{src_label_file} not exists, please check and try again.')
src_image_root = root_path
# outputs
dst_label_file = osp.join(root_path, 'test_label.json')
dst_image_root = osp.join(root_path, 'image')
os.makedirs(dst_image_root, exist_ok=True)
tree = ET.parse(src_label_file)
root = tree.getroot()
index = 1
img_info = []
total_img_num = len(root)
i = 1
for image_node in root.findall('image'):
image_name = image_node.find('imageName').text
print(f'[{i}/{total_img_num}] Process image: {image_name}')
i += 1
# lexicon = image_node.find('lex').text.lower()
# lexicon_list = lexicon.split(',')
# lex_size = len(lexicon_list)
src_img = cv2.imread(osp.join(src_image_root, image_name))
for rectangle in image_node.find('taggedRectangles'):
x = int(rectangle.get('x'))
y = int(rectangle.get('y'))
w = int(rectangle.get('width'))
h = int(rectangle.get('height'))
rb, re = max(0, y), max(0, y + h)
cb, ce = max(0, x), max(0, x + w)
dst_img = src_img[rb:re, cb:ce]
text_label = rectangle.find('tag').text.lower()
if args.resize:
dst_img = cv2.resize(dst_img, (args.width, args.height))
dst_img_name = f'img_{index:04}' + '.jpg'
index += 1
dst_img_path = osp.join(dst_image_root, dst_img_name)
cv2.imwrite(dst_img_path, dst_img)
img_info.append({
'file_name': dst_img_name,
'anno_info': [{
'text': text_label
}]
})
dump_ocr_data(img_info, dst_label_file, 'textrecog')
print(f'Finish to generate svt testset, '
f'with label file {dst_label_file}')
if __name__ == '__main__':
main()

View File

@ -1,146 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import os
from functools import partial
import mmcv
import mmengine
import numpy as np
from scipy.io import loadmat
def parse_args():
parser = argparse.ArgumentParser(
description='Crop images in Synthtext-style dataset in '
'prepration for MMOCR\'s use')
parser.add_argument(
'anno_path', help='Path to gold annotation data (gt.mat)')
parser.add_argument('img_path', help='Path to images')
parser.add_argument('out_dir', help='Path of output images and labels')
parser.add_argument(
'--n_proc',
default=1,
type=int,
help='Number of processes to run with')
args = parser.parse_args()
return args
def load_gt_datum(datum):
img_path, txt, wordBB, charBB = datum
words = []
word_bboxes = []
char_bboxes = []
# when there's only one word in txt
# scipy will load it as a string
if type(txt) is str:
words = txt.split()
else:
for line in txt:
words += line.split()
# From (2, 4, num_boxes) to (num_boxes, 4, 2)
if len(wordBB.shape) == 2:
wordBB = wordBB[:, :, np.newaxis]
cur_wordBB = wordBB.transpose(2, 1, 0)
for box in cur_wordBB:
word_bboxes.append(
[max(round(coord), 0) for pt in box for coord in pt])
# Validate word bboxes.
if len(words) != len(word_bboxes):
return
# From (2, 4, num_boxes) to (num_boxes, 4, 2)
cur_charBB = charBB.transpose(2, 1, 0)
for box in cur_charBB:
char_bboxes.append(
[max(round(coord), 0) for pt in box for coord in pt])
char_bbox_idx = 0
char_bbox_grps = []
for word in words:
temp_bbox = char_bboxes[char_bbox_idx:char_bbox_idx + len(word)]
char_bbox_idx += len(word)
char_bbox_grps.append(temp_bbox)
# Validate char bboxes.
# If the length of the last char bbox is correct, then
# all the previous bboxes are also valid
if len(char_bbox_grps[len(words) - 1]) != len(words[-1]):
return
return img_path, words, word_bboxes, char_bbox_grps
def load_gt_data(filename, n_proc):
mat_data = loadmat(filename, simplify_cells=True)
imnames = mat_data['imnames']
txt = mat_data['txt']
wordBB = mat_data['wordBB']
charBB = mat_data['charBB']
return mmengine.track_parallel_progress(
load_gt_datum, list(zip(imnames, txt, wordBB, charBB)), nproc=n_proc)
def process(data, img_path_prefix, out_dir):
if data is None:
return
# Dirty hack for multi-processing
img_path, words, word_bboxes, char_bbox_grps = data
img_dir, img_name = os.path.split(img_path)
img_name = os.path.splitext(img_name)[0]
input_img = mmcv.imread(os.path.join(img_path_prefix, img_path))
output_sub_dir = os.path.join(out_dir, img_dir)
if not os.path.exists(output_sub_dir):
try:
os.makedirs(output_sub_dir)
except FileExistsError:
pass # occurs when multi-proessing
for i, word in enumerate(words):
output_image_patch_name = f'{img_name}_{i}.png'
output_label_name = f'{img_name}_{i}.txt'
output_image_patch_path = os.path.join(output_sub_dir,
output_image_patch_name)
output_label_path = os.path.join(output_sub_dir, output_label_name)
if os.path.exists(output_image_patch_path) and os.path.exists(
output_label_path):
continue
word_bbox = word_bboxes[i]
min_x, max_x = int(min(word_bbox[::2])), int(max(word_bbox[::2]))
min_y, max_y = int(min(word_bbox[1::2])), int(max(word_bbox[1::2]))
cropped_img = input_img[min_y:max_y, min_x:max_x]
if cropped_img.shape[0] <= 0 or cropped_img.shape[1] <= 0:
continue
char_bbox_grp = np.array(char_bbox_grps[i])
char_bbox_grp[:, ::2] -= min_x
char_bbox_grp[:, 1::2] -= min_y
mmcv.imwrite(cropped_img, output_image_patch_path)
with open(output_label_path, 'w') as output_label_file:
output_label_file.write(word + '\n')
for cbox in char_bbox_grp:
output_label_file.write('%d %d %d %d %d %d %d %d\n' %
tuple(cbox.tolist()))
def main():
args = parse_args()
print('Loading annoataion data...')
data = load_gt_data(args.anno_path, args.n_proc)
process_with_outdir = partial(
process, img_path_prefix=args.img_path, out_dir=args.out_dir)
print('Creating cropped images and gold labels...')
mmengine.track_parallel_progress(
process_with_outdir, data, nproc=args.n_proc)
print('Done')
if __name__ == '__main__':
main()

View File

@ -1,113 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import math
import os
import os.path as osp
from functools import partial
import mmcv
import mmengine
from mmocr.utils import dump_ocr_data
def parse_args():
parser = argparse.ArgumentParser(
description='Generate training and validation set of TextOCR '
'by cropping box image.')
parser.add_argument('root_path', help='Root dir path of TextOCR')
parser.add_argument(
'n_proc', default=1, type=int, help='Number of processes to run')
args = parser.parse_args()
return args
def process_img(args, src_image_root, dst_image_root):
# Dirty hack for multi-processing
img_idx, img_info, anns = args
src_img = mmcv.imread(osp.join(src_image_root, img_info['file_name']))
labels = []
for ann_idx, ann in enumerate(anns):
text_label = ann['utf8_string']
# Ignore illegible or non-English words
if text_label == '.':
continue
x, y, w, h = ann['bbox']
x, y = max(0, math.floor(x)), max(0, math.floor(y))
w, h = math.ceil(w), math.ceil(h)
dst_img = src_img[y:y + h, x:x + w]
dst_img_name = f'img_{img_idx}_{ann_idx}.jpg'
dst_img_path = osp.join(dst_image_root, dst_img_name)
mmcv.imwrite(dst_img, dst_img_path)
labels.append({
'file_name': dst_img_name,
'anno_info': [{
'text': text_label
}]
})
return labels
def convert_textocr(root_path,
dst_image_path,
dst_label_filename,
annotation_filename,
img_start_idx=0,
nproc=1):
annotation_path = osp.join(root_path, annotation_filename)
if not osp.exists(annotation_path):
raise Exception(
f'{annotation_path} not exists, please check and try again.')
src_image_root = root_path
# outputs
dst_label_file = osp.join(root_path, dst_label_filename)
dst_image_root = osp.join(root_path, dst_image_path)
os.makedirs(dst_image_root, exist_ok=True)
annotation = mmengine.load(annotation_path)
process_img_with_path = partial(
process_img,
src_image_root=src_image_root,
dst_image_root=dst_image_root)
tasks = []
for img_idx, img_info in enumerate(annotation['imgs'].values()):
ann_ids = annotation['imgToAnns'][img_info['id']]
anns = [annotation['anns'][ann_id] for ann_id in ann_ids]
tasks.append((img_idx + img_start_idx, img_info, anns))
labels_list = mmengine.track_parallel_progress(
process_img_with_path, tasks, keep_order=True, nproc=nproc)
final_labels = []
for label_list in labels_list:
final_labels += label_list
dump_ocr_data(final_labels, dst_label_file, 'textrecog')
return len(annotation['imgs'])
def main():
args = parse_args()
root_path = args.root_path
print('Processing training set...')
num_train_imgs = convert_textocr(
root_path=root_path,
dst_image_path='image',
dst_label_filename='train_label.json',
annotation_filename='TextOCR_0.1_train.json',
nproc=args.n_proc)
print('Processing validation set...')
convert_textocr(
root_path=root_path,
dst_image_path='image',
dst_label_filename='val_label.json',
annotation_filename='TextOCR_0.1_val.json',
img_start_idx=num_train_imgs,
nproc=args.n_proc)
print('Finish')
if __name__ == '__main__':
main()

View File

@ -1,388 +0,0 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import glob
import os
import os.path as osp
import re
import mmcv
import mmengine
import numpy as np
import scipy.io as scio
import yaml
from shapely.geometry import Polygon
from mmocr.utils import crop_img, dump_ocr_data
def collect_files(img_dir, gt_dir):
"""Collect all images and their corresponding groundtruth files.
Args:
img_dir (str): The image directory
gt_dir (str): The groundtruth directory
Returns:
files(list): The list of tuples (img_file, groundtruth_file)
"""
assert isinstance(img_dir, str)
assert img_dir
assert isinstance(gt_dir, str)
assert gt_dir
# note that we handle png and jpg only. Pls convert others such as gif to
# jpg or png offline
suffixes = ['.png', '.PNG', '.jpg', '.JPG', '.jpeg', '.JPEG']
# suffixes = ['.png']
imgs_list = []
for suffix in suffixes:
imgs_list.extend(glob.glob(osp.join(img_dir, '*' + suffix)))
imgs_list = sorted(imgs_list)
ann_list = sorted(
osp.join(gt_dir, gt_file) for gt_file in os.listdir(gt_dir))
files = [(img_file, gt_file)
for (img_file, gt_file) in zip(imgs_list, ann_list)]
assert len(files), f'No images found in {img_dir}'
print(f'Loaded {len(files)} images from {img_dir}')
return files
def collect_annotations(files, nproc=1):
"""Collect the annotation information.
Args:
files (list): The list of tuples (image_file, groundtruth_file)
nproc (int): The number of process to collect annotations
Returns:
images (list): The list of image information dicts
"""
assert isinstance(files, list)
assert isinstance(nproc, int)
if nproc > 1:
images = mmengine.track_parallel_progress(
load_img_info, files, nproc=nproc)
else:
images = mmengine.track_progress(load_img_info, files)
return images
def get_contours_mat(gt_path):
"""Get the contours and words for each ground_truth mat file.
Args:
gt_path (str): The relative path of the ground_truth mat file
Returns:
contours (list[lists]): A list of lists of contours
for the text instances
words (list[list]): A list of lists of words (string)
for the text instances
"""
assert isinstance(gt_path, str)
contours = []
words = []
data = scio.loadmat(gt_path)
# 'gt' for the latest version; 'polygt' for the legacy version
keys = data.keys()
if 'gt' in keys:
data_polygt = data.get('gt')
elif 'polygt' in keys:
data_polygt = data.get('polygt')
for i, lines in enumerate(data_polygt):
X = np.array(lines[1])
Y = np.array(lines[3])
point_num = len(X[0])
word = lines[4]
if len(word) == 0 or word == '#':
word = '###'
else:
word = word[0]
words.append(word)
arr = np.concatenate([X, Y]).T
contour = []
for i in range(point_num):
contour.append(arr[i][0])
contour.append(arr[i][1])
contours.append(np.asarray(contour))
return contours, words
def load_mat_info(img_info, gt_file):
"""Load the information of one ground truth in .mat format.
Args:
img_info (dict): The dict of only the image information
gt_file (str): The relative path of the ground_truth mat
file for one image
Returns:
img_info(dict): The dict of the img and annotation information
"""
assert isinstance(img_info, dict)
assert isinstance(gt_file, str)
contours, words = get_contours_mat(gt_file)
anno_info = []
for contour, word in zip(contours, words):
if contour.shape[0] == 2 or word == '###':
continue
coordinates = np.array(contour).reshape(-1, 2)
polygon = Polygon(coordinates)
# convert to COCO style XYWH format
min_x, min_y, max_x, max_y = polygon.bounds
bbox = [min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y]
anno = dict(word=word, bbox=bbox)
anno_info.append(anno)
img_info.update(anno_info=anno_info)
return img_info
def process_line(line, contours, words):
"""Get the contours and words by processing each line in the gt file.
Args:
line (str): The line in gt file containing annotation info
contours (list[lists]): A list of lists of contours
for the text instances
words (list[list]): A list of lists of words (string)
for the text instances
Returns:
contours (list[lists]): A list of lists of contours
for the text instances
words (list[list]): A list of lists of words (string)
for the text instances
"""
line = '{' + line.replace('[[', '[').replace(']]', ']') + '}'
ann_dict = re.sub('([0-9]) +([0-9])', r'\1,\2', line)
ann_dict = re.sub('([0-9]) +([ 0-9])', r'\1,\2', ann_dict)
ann_dict = re.sub('([0-9]) -([0-9])', r'\1,-\2', ann_dict)
ann_dict = ann_dict.replace("[u',']", "[u'#']")
ann_dict = yaml.safe_load(ann_dict)
X = np.array([ann_dict['x']])
Y = np.array([ann_dict['y']])
if len(ann_dict['transcriptions']) == 0:
word = '###'
else:
word = ann_dict['transcriptions'][0]
if len(ann_dict['transcriptions']) > 1:
for ann_word in ann_dict['transcriptions'][1:]:
word += ',' + ann_word
word = str(eval(word))
words.append(word)
point_num = len(X[0])
arr = np.concatenate([X, Y]).T
contour = []
for i in range(point_num):
contour.append(arr[i][0])
contour.append(arr[i][1])
contours.append(np.asarray(contour))
return contours, words
def get_contours_txt(gt_path):
"""Get the contours and words for each ground_truth txt file.
Args:
gt_path (str): The relative path of the ground_truth mat file
Returns:
contours (list[lists]): A list of lists of contours
for the text instances
words (list[list]): A list of lists of words (string)
for the text instances
"""
assert isinstance(gt_path, str)
contours = []
words = []
with open(gt_path) as f:
tmp_line = ''
for idx, line in enumerate(f):
line = line.strip()
if idx == 0:
tmp_line = line
continue
if not line.startswith('x:'):
tmp_line += ' ' + line
continue
else:
complete_line = tmp_line
tmp_line = line
contours, words = process_line(complete_line, contours, words)
if tmp_line != '':
contours, words = process_line(tmp_line, contours, words)
for word in words:
if word == '#':
word = '###'
return contours, words
def load_txt_info(gt_file, img_info):
"""Load the information of one ground truth in .txt format.
Args:
img_info (dict): The dict of only the image information
gt_file (str): The relative path of the ground_truth mat
file for one image
Returns:
img_info (dict): The dict of the img and annotation information
"""
contours, words = get_contours_txt(gt_file)
anno_info = []
for contour, word in zip(contours, words):
if contour.shape[0] == 2 or word == '###':
continue
coordinates = np.array(contour).reshape(-1, 2)
polygon = Polygon(coordinates)
# convert to COCO style XYWH format
min_x, min_y, max_x, max_y = polygon.bounds
bbox = [min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y]
anno = dict(word=word, bbox=bbox)
anno_info.append(anno)
img_info.update(anno_info=anno_info)
return img_info
def generate_ann(root_path, split, image_infos):
"""Generate cropped annotations and label txt file.
Args:
root_path (str): The relative path of the totaltext file
split (str): The split of dataset. Namely: training or test
image_infos (list[dict]): A list of dicts of the img and
annotation information
"""
dst_image_root = osp.join(root_path, 'dst_imgs', split)
if split == 'training':
dst_label_file = osp.join(root_path, 'train_label.json')
elif split == 'test':
dst_label_file = osp.join(root_path, 'test_label.json')
os.makedirs(dst_image_root, exist_ok=True)
img_info = []
for image_info in image_infos:
index = 1
src_img_path = osp.join(root_path, 'imgs', image_info['file_name'])
image = mmcv.imread(src_img_path)
src_img_root = osp.splitext(image_info['file_name'])[0].split('/')[1]
for anno in image_info['anno_info']:
word = anno['word']
dst_img = crop_img(image, anno['bbox'])
# Skip invalid annotations
if min(dst_img.shape) == 0 or word == '###':
continue
dst_img_name = f'{src_img_root}_{index}.png'
index += 1
dst_img_path = osp.join(dst_image_root, dst_img_name)
mmcv.imwrite(dst_img, dst_img_path)
img_info.append({
'file_name': dst_img_name,
'anno_info': [{
'text': word
}]
})
dump_ocr_data(img_info, dst_label_file, 'textrecog')
def load_img_info(files):
"""Load the information of one image.
Args:
files (tuple): The tuple of (img_file, groundtruth_file)
Returns:
img_info (dict): The dict of the img and annotation information
"""
assert isinstance(files, tuple)
img_file, gt_file = files
# read imgs with ignoring orientations
img = mmcv.imread(img_file, 'unchanged')
split_name = osp.basename(osp.dirname(img_file))
img_info = dict(
# remove img_prefix for filename
file_name=osp.join(split_name, osp.basename(img_file)),
height=img.shape[0],
width=img.shape[1],
# anno_info=anno_info,
segm_file=osp.join(split_name, osp.basename(gt_file)))
if osp.splitext(gt_file)[1] == '.mat':
img_info = load_mat_info(img_info, gt_file)
elif osp.splitext(gt_file)[1] == '.txt':
img_info = load_txt_info(gt_file, img_info)
else:
raise NotImplementedError
return img_info
def parse_args():
parser = argparse.ArgumentParser(
description='Convert totaltext annotations to COCO format')
parser.add_argument('root_path', help='Totaltext root path')
parser.add_argument(
'--nproc', default=1, type=int, help='Number of process')
args = parser.parse_args()
return args
def main():
args = parse_args()
root_path = args.root_path
img_dir = osp.join(root_path, 'imgs')
gt_dir = osp.join(root_path, 'annotations')
set_name = {}
for split in ['training', 'test']:
set_name.update({split: split + '_label' + '.txt'})
assert osp.exists(osp.join(img_dir, split))
for split, ann_name in set_name.items():
print(f'Converting {split} into {ann_name}')
with mmengine.Timer(
print_tmpl='It takes {}s to convert totaltext annotation'):
files = collect_files(
osp.join(img_dir, split), osp.join(gt_dir, split))
image_infos = collect_annotations(files, nproc=args.nproc)
generate_ann(root_path, split, image_infos)
if __name__ == '__main__':
main()