mirror of https://github.com/open-mmlab/mmocr.git
[Fix] Clear up some unused scripts (#1798)
parent
d0dc90253a
commit
1a379f2f1b
|
@ -9,10 +9,8 @@ This page is a manual preparation guide for datasets not yet supported by [Datas
|
|||
| Dataset | Images | | Annotation Files | | |
|
||||
| :---------------: | :------------------------------------------------------: | :------------------------------------------------: | :-----------------------------------------------------------------: | :-----: | :-: |
|
||||
| | | training | validation | testing | |
|
||||
| CTW1500 | [homepage](https://github.com/Yuliang-Liu/Curve-Text-Detector) | - | - | - | |
|
||||
| ICDAR2011 | [homepage](https://rrc.cvc.uab.es/?ch=1) | - | - | | |
|
||||
| ICDAR2017 | [homepage](https://rrc.cvc.uab.es/?ch=8&com=downloads) | [instances_training.json](https://download.openmmlab.com/mmocr/data/icdar2017/instances_training.json) | [instances_val.json](https://download.openmmlab.com/mmocr/data/icdar2017/instances_val.json) | - | |
|
||||
| Synthtext | [homepage](https://www.robots.ox.ac.uk/~vgg/data/scenetext/) | instances_training.lmdb ([data.mdb](https://download.openmmlab.com/mmocr/data/synthtext/instances_training.lmdb/data.mdb), [lock.mdb](https://download.openmmlab.com/mmocr/data/synthtext/instances_training.lmdb/lock.mdb)) | - | - | |
|
||||
| CurvedSynText150k | [homepage](https://github.com/aim-uofa/AdelaiDet/blob/master/datasets/README.md) \| [Part1](https://drive.google.com/file/d/1OSJ-zId2h3t_-I7g_wUkrK-VqQy153Kj/view?usp=sharing) \| [Part2](https://drive.google.com/file/d/1EzkcOlIgEp5wmEubvHb7-J5EImHExYgY/view?usp=sharing) | [instances_training.json](https://download.openmmlab.com/mmocr/data/curvedsyntext/instances_training.json) | - | - | |
|
||||
| DeText | [homepage](https://rrc.cvc.uab.es/?ch=9) | - | - | - | |
|
||||
| Lecture Video DB | [homepage](https://cvit.iiit.ac.in/research/projects/cvit-projects/lecturevideodb) | - | - | - | |
|
||||
|
@ -62,47 +60,6 @@ backend used in MMCV would read them and apply the rotation on the images. Howe
|
|||
inconsistency results in false examples in the training set. Therefore, users should use `dict(type='LoadImageFromFile', color_type='color_ignore_orientation')` in pipelines to change MMCV's default loading behaviour. (see [DBNet's pipeline config](https://github.com/open-mmlab/mmocr/blob/main/configs/_base_/det_pipelines/dbnet_pipeline.py) for example)
|
||||
```
|
||||
|
||||
## CTW1500
|
||||
|
||||
- Step0: Read [Important Note](#important-note)
|
||||
|
||||
- Step1: Download `train_images.zip`, `test_images.zip`, `train_labels.zip`, `test_labels.zip` from [github](https://github.com/Yuliang-Liu/Curve-Text-Detector)
|
||||
|
||||
```bash
|
||||
mkdir ctw1500 && cd ctw1500
|
||||
mkdir imgs && mkdir annotations
|
||||
|
||||
# For annotations
|
||||
cd annotations
|
||||
wget -O train_labels.zip https://universityofadelaide.box.com/shared/static/jikuazluzyj4lq6umzei7m2ppmt3afyw.zip
|
||||
wget -O test_labels.zip https://cloudstor.aarnet.edu.au/plus/s/uoeFl0pCN9BOCN5/download
|
||||
unzip train_labels.zip && mv ctw1500_train_labels training
|
||||
unzip test_labels.zip -d test
|
||||
cd ..
|
||||
# For images
|
||||
cd imgs
|
||||
wget -O train_images.zip https://universityofadelaide.box.com/shared/static/py5uwlfyyytbb2pxzq9czvu6fuqbjdh8.zip
|
||||
wget -O test_images.zip https://universityofadelaide.box.com/shared/static/t4w48ofnqkdw7jyc4t11nsukoeqk9c3d.zip
|
||||
unzip train_images.zip && mv train_images training
|
||||
unzip test_images.zip && mv test_images test
|
||||
```
|
||||
|
||||
- Step2: Generate `instances_training.json` and `instances_test.json` with following command:
|
||||
|
||||
```bash
|
||||
python tools/dataset_converters/textdet/ctw1500_converter.py /path/to/ctw1500 -o /path/to/ctw1500 --split-list training test
|
||||
```
|
||||
|
||||
- The resulting directory structure looks like the following:
|
||||
|
||||
```text
|
||||
├── ctw1500
|
||||
│ ├── imgs
|
||||
│ ├── annotations
|
||||
│ ├── instances_training.json
|
||||
│ └── instances_val.json
|
||||
```
|
||||
|
||||
## ICDAR 2011 (Born-Digital Images)
|
||||
|
||||
- Step1: Download `Challenge1_Training_Task12_Images.zip`, `Challenge1_Training_Task1_GT.zip`, `Challenge1_Test_Task12_Images.zip`, and `Challenge1_Test_Task1_GT.zip` from [homepage](https://rrc.cvc.uab.es/?ch=1&com=downloads) `Task 1.1: Text Localization (2013 edition)`.
|
||||
|
@ -156,22 +113,6 @@ inconsistency results in false examples in the training set. Therefore, users sh
|
|||
│ └── instances_val.json
|
||||
```
|
||||
|
||||
## SynthText
|
||||
|
||||
- Step1: Download SynthText.zip from \[homepage\](<https://www.robots.ox.ac.uk/~vgg/data/scenetext/> and extract its content to `synthtext/img`.
|
||||
|
||||
- Step2: Download [data.mdb](https://download.openmmlab.com/mmocr/data/synthtext/instances_training.lmdb/data.mdb) and [lock.mdb](https://download.openmmlab.com/mmocr/data/synthtext/instances_training.lmdb/lock.mdb) to `synthtext/instances_training.lmdb/`.
|
||||
|
||||
- The resulting directory structure looks like the following:
|
||||
|
||||
```text
|
||||
├── synthtext
|
||||
│ ├── imgs
|
||||
│ └── instances_training.lmdb
|
||||
│ ├── data.mdb
|
||||
│ └── lock.mdb
|
||||
```
|
||||
|
||||
## CurvedSynText150k
|
||||
|
||||
- Step1: Download [syntext1.zip](https://drive.google.com/file/d/1OSJ-zId2h3t_-I7g_wUkrK-VqQy153Kj/view?usp=sharing) and [syntext2.zip](https://drive.google.com/file/d/1EzkcOlIgEp5wmEubvHb7-J5EImHExYgY/view?usp=sharing) to `CurvedSynText150k/`.
|
||||
|
|
|
@ -11,7 +11,6 @@ This page is a manual preparation guide for datasets not yet supported by [Datas
|
|||
| | | training | test |
|
||||
| coco_text | [homepage](https://rrc.cvc.uab.es/?ch=5&com=downloads) | [train_labels.json](#TODO) | - |
|
||||
| ICDAR2011 | [homepage](https://rrc.cvc.uab.es/?ch=1) | - | - |
|
||||
| MJSynth (Syn90k) | [homepage](https://www.robots.ox.ac.uk/~vgg/data/text/) | [subset_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/subset_train_labels.json) \| [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/train_labels.json) | - |
|
||||
| SynthAdd | [SynthText_Add.zip](https://pan.baidu.com/s/1uV0LtoNmcxbO-0YA7Ch4dg) (code:627x) | [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/synthtext_add/train_labels.json) | - |
|
||||
| OpenVINO | [Open Images](https://github.com/cvdfoundation/open-images-dataset) | [annotations](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) | [annotations](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) |
|
||||
| DeText | [homepage](https://rrc.cvc.uab.es/?ch=9) | - | - |
|
||||
|
@ -110,44 +109,6 @@ For users in China, these datasets can also be downloaded from [OpenDataLab](htt
|
|||
│ └── train_words
|
||||
```
|
||||
|
||||
## MJSynth (Syn90k)
|
||||
|
||||
- Step1: Download `mjsynth.tar.gz` from [homepage](https://www.robots.ox.ac.uk/~vgg/data/text/)
|
||||
- Step2: Download [train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/train_labels.json) (8,919,273 annotations) and [subset_train_labels.json](https://download.openmmlab.com/mmocr/data/1.x/recog/Syn90k/subset_train_labels.json) (2,400,000 randomly sampled annotations).
|
||||
|
||||
```{note}
|
||||
Please make sure you're using the right annotation to train the model by checking its dataset specs in Model Zoo.
|
||||
```
|
||||
|
||||
- Step3:
|
||||
|
||||
```bash
|
||||
mkdir Syn90k && cd Syn90k
|
||||
|
||||
mv /path/to/mjsynth.tar.gz .
|
||||
|
||||
tar -xzf mjsynth.tar.gz
|
||||
|
||||
mv /path/to/subset_train_labels.json .
|
||||
mv /path/to/train_labels.json .
|
||||
|
||||
# create soft link
|
||||
cd /path/to/mmocr/data/recog/
|
||||
|
||||
ln -s /path/to/Syn90k Syn90k
|
||||
|
||||
```
|
||||
|
||||
- After running the above codes, the directory structure
|
||||
should be as follows:
|
||||
|
||||
```text
|
||||
├── Syn90k
|
||||
│ ├── subset_train_labels.json
|
||||
│ ├── train_labels.json
|
||||
│ └── mnt
|
||||
```
|
||||
|
||||
## SynthAdd
|
||||
|
||||
- Step1: Download `SynthText_Add.zip` from [SynthAdd](https://pan.baidu.com/s/1uV0LtoNmcxbO-0YA7Ch4dg) (code:627x))
|
||||
|
|
|
@ -1,147 +0,0 @@
|
|||
# 文字检测
|
||||
|
||||
```{warning}
|
||||
该页面版本落后于英文版文档,请切换至英文阅读最新文档。
|
||||
```
|
||||
|
||||
```{note}
|
||||
我们正努力往 [Dataset Preparer](./dataset_preparer.md) 中增加更多数据集。对于 [Dataset Preparer](./dataset_preparer.md) 暂未能完整支持的数据集,本页提供了一系列手动下载的步骤,供有需要的用户使用。
|
||||
```
|
||||
|
||||
## 概览
|
||||
|
||||
| 数据集名称 | 数据图片 | | 标注文件 | |
|
||||
| :--------: | :-----------------------------------------------: | :-------------------------------------------: | :------------------------------------------------: | :--------------------------------------------: |
|
||||
| | | 训练集 (training) | 验证集 (validation) | 测试集 (testing) |
|
||||
| CTW1500 | [下载地址](https://github.com/Yuliang-Liu/Curve-Text-Detector) | - | - | - |
|
||||
| ICDAR2015 | [下载地址](https://rrc.cvc.uab.es/?ch=4&com=downloads) | [instances_training.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_training.json) | - | [instances_test.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_test.json) |
|
||||
| ICDAR2017 | [下载地址](https://rrc.cvc.uab.es/?ch=8&com=downloads) | [instances_training.json](https://download.openmmlab.com/mmocr/data/icdar2017/instances_training.json) | [instances_val.json](https://download.openmmlab.com/mmocr/data/icdar2017/instances_val.json) | - |
|
||||
| Synthtext | [下载地址](https://www.robots.ox.ac.uk/~vgg/data/scenetext/) | instances_training.lmdb ([data.mdb](https://download.openmmlab.com/mmocr/data/synthtext/instances_training.lmdb/data.mdb), [lock.mdb](https://download.openmmlab.com/mmocr/data/synthtext/instances_training.lmdb/lock.mdb)) | - | - |
|
||||
| TextOCR | [下载地址](https://textvqa.org/textocr/dataset) | - | - | - |
|
||||
| Totaltext | [下载地址](https://github.com/cs-chan/Total-Text-Dataset) | - | - | - |
|
||||
|
||||
对于中国境内的用户,我们也推荐使用开源数据平台[OpenDataLab](https://opendatalab.com/)来获取这些数据集,以获得更好的下载体验:
|
||||
|
||||
- [CTW1500](https://opendatalab.com/SCUT-CTW1500?source=OpenMMLab%20GitHub)
|
||||
- [ICDAR2013](https://opendatalab.com/ICDAR_2013?source=OpenMMLab%20GitHub)
|
||||
- [ICDAR2015](https://opendatalab.com/ICDAR2015?source=OpenMMLab%20GitHub)
|
||||
- [Totaltext](https://opendatalab.com/TotalText?source=OpenMMLab%20GitHub)
|
||||
- [MSRA-TD500](https://opendatalab.com/MSRA-TD500?source=OpenMMLab%20GitHub)
|
||||
|
||||
## 重要提醒
|
||||
|
||||
```{note}
|
||||
**若用户需要在 CTW1500, ICDAR 2015/2017 或 Totaltext 数据集上训练模型**, 请注意这些数据集中有部分图片的 EXIF 信息里保存着方向信息。MMCV 采用的 OpenCV 后端会默认根据方向信息对图片进行旋转;而由于数据集的标注是在原图片上进行的,这种冲突会使得部分训练样本失效。因此,用户应该在配置 pipeline 时使用 `dict(type='LoadImageFromFile', color_type='color_ignore_orientation')` 以避免 MMCV 的这一行为。(配置文件可参考 [DBNet 的 pipeline 配置](https://github.com/open-mmlab/mmocr/blob/main/configs/_base_/det_pipelines/dbnet_pipeline.py))
|
||||
```
|
||||
|
||||
## 准备步骤
|
||||
|
||||
### ICDAR 2015
|
||||
|
||||
- 第一步:从[下载地址](https://rrc.cvc.uab.es/?ch=4&com=downloads)下载 `ch4_training_images.zip`、`ch4_test_images.zip`、`ch4_training_localization_transcription_gt.zip`、`Challenge4_Test_Task1_GT.zip` 四个文件,分别对应训练集数据、测试集数据、训练集标注、测试集标注。
|
||||
- 第二步:运行以下命令,移动数据集到对应文件夹
|
||||
|
||||
```bash
|
||||
mkdir icdar2015 && cd icdar2015
|
||||
mkdir imgs && mkdir annotations
|
||||
# 移动数据到目录:
|
||||
mv ch4_training_images imgs/training
|
||||
mv ch4_test_images imgs/test
|
||||
# 移动标注到目录:
|
||||
mv ch4_training_localization_transcription_gt annotations/training
|
||||
mv Challenge4_Test_Task1_GT annotations/test
|
||||
```
|
||||
|
||||
- 第三步:下载 [instances_training.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_training.json) 和 [instances_test.json](https://download.openmmlab.com/mmocr/data/icdar2015/instances_test.json),并放入 `icdar2015` 文件夹里。或者也可以用以下命令直接生成 `instances_training.json` 和 `instances_test.json`:
|
||||
|
||||
```bash
|
||||
python tools/data/textdet/icdar_converter.py /path/to/icdar2015 -o /path/to/icdar2015 -d icdar2015 --split-list training test
|
||||
```
|
||||
|
||||
### ICDAR 2017
|
||||
|
||||
- 与上述步骤类似。
|
||||
|
||||
### CTW1500
|
||||
|
||||
- 第一步:执行以下命令,从 [下载地址](https://github.com/Yuliang-Liu/Curve-Text-Detector) 下载 `train_images.zip`,`test_images.zip`,`train_labels.zip`,`test_labels.zip` 四个文件并配置到对应目录:
|
||||
|
||||
```bash
|
||||
mkdir ctw1500 && cd ctw1500
|
||||
mkdir imgs && mkdir annotations
|
||||
|
||||
# 下载并配置标注
|
||||
cd annotations
|
||||
wget -O train_labels.zip https://universityofadelaide.box.com/shared/static/jikuazluzyj4lq6umzei7m2ppmt3afyw.zip
|
||||
wget -O test_labels.zip https://cloudstor.aarnet.edu.au/plus/s/uoeFl0pCN9BOCN5/download
|
||||
unzip train_labels.zip && mv ctw1500_train_labels training
|
||||
unzip test_labels.zip -d test
|
||||
cd ..
|
||||
# 下载并配置数据
|
||||
cd imgs
|
||||
wget -O train_images.zip https://universityofadelaide.box.com/shared/static/py5uwlfyyytbb2pxzq9czvu6fuqbjdh8.zip
|
||||
wget -O test_images.zip https://universityofadelaide.box.com/shared/static/t4w48ofnqkdw7jyc4t11nsukoeqk9c3d.zip
|
||||
unzip train_images.zip && mv train_images training
|
||||
unzip test_images.zip && mv test_images test
|
||||
```
|
||||
|
||||
- 第二步:执行以下命令,生成 `instances_training.json` 和 `instances_test.json`。
|
||||
|
||||
```bash
|
||||
python tools/data/textdet/ctw1500_converter.py /path/to/ctw1500 -o /path/to/ctw1500 --split-list training test
|
||||
```
|
||||
|
||||
### SynthText
|
||||
|
||||
- 下载 [data.mdb](https://download.openmmlab.com/mmocr/data/synthtext/instances_training.lmdb/data.mdb) 和 [lock.mdb](https://download.openmmlab.com/mmocr/data/synthtext/instances_training.lmdb/lock.mdb) 并放置到 `synthtext/instances_training.lmdb/` 中.
|
||||
|
||||
### TextOCR
|
||||
|
||||
- 第一步:下载 [train_val_images.zip](https://dl.fbaipublicfiles.com/textvqa/images/train_val_images.zip),[TextOCR_0.1_train.json](https://dl.fbaipublicfiles.com/textvqa/data/textocr/TextOCR_0.1_train.json) 和 [TextOCR_0.1_val.json](https://dl.fbaipublicfiles.com/textvqa/data/textocr/TextOCR_0.1_val.json) 到 `textocr` 文件夹里。
|
||||
|
||||
```bash
|
||||
mkdir textocr && cd textocr
|
||||
|
||||
# 下载 TextOCR 数据集
|
||||
wget https://dl.fbaipublicfiles.com/textvqa/images/train_val_images.zip
|
||||
wget https://dl.fbaipublicfiles.com/textvqa/data/textocr/TextOCR_0.1_train.json
|
||||
wget https://dl.fbaipublicfiles.com/textvqa/data/textocr/TextOCR_0.1_val.json
|
||||
|
||||
# 把图片移到对应目录
|
||||
unzip -q train_val_images.zip
|
||||
mv train_images train
|
||||
```
|
||||
|
||||
- 第二步:生成 `instances_training.json` 和 `instances_val.json`:
|
||||
|
||||
```bash
|
||||
python tools/data/textdet/textocr_converter.py /path/to/textocr
|
||||
```
|
||||
|
||||
### Totaltext
|
||||
|
||||
- 第一步:从 [github dataset](https://github.com/cs-chan/Total-Text-Dataset/tree/master/Dataset) 下载 `totaltext.zip`,从 [github Groundtruth](https://github.com/cs-chan/Total-Text-Dataset/tree/master/Groundtruth/Text) 下载 `groundtruth_text.zip` 。(建议下载 `.mat` 格式的标注文件,因为我们提供的标注格式转换脚本 `totaltext_converter.py` 仅支持 `.mat` 格式。)
|
||||
|
||||
```bash
|
||||
mkdir totaltext && cd totaltext
|
||||
mkdir imgs && mkdir annotations
|
||||
|
||||
# 图像
|
||||
# 在 ./totaltext 中执行
|
||||
unzip totaltext.zip
|
||||
mv Images/Train imgs/training
|
||||
mv Images/Test imgs/test
|
||||
|
||||
# 标注文件
|
||||
unzip groundtruth_text.zip
|
||||
cd Groundtruth
|
||||
mv Polygon/Train ../annotations/training
|
||||
mv Polygon/Test ../annotations/test
|
||||
|
||||
```
|
||||
|
||||
- 第二步:用以下命令生成 `instances_training.json` 和 `instances_test.json` :
|
||||
|
||||
```bash
|
||||
python tools/data/textdet/totaltext_converter.py /path/to/totaltext -o /path/to/totaltext --split-list training test
|
||||
```
|
|
@ -1,314 +0,0 @@
|
|||
# 文字识别
|
||||
|
||||
```{warning}
|
||||
该页面版本落后于英文版文档,请切换至英文阅读最新文档。
|
||||
```
|
||||
|
||||
```{note}
|
||||
我们正努力往 [Dataset Preparer](./dataset_preparer.md) 中增加更多数据集。对于 [Dataset Preparer](./dataset_preparer.md) 暂未能完整支持的数据集,本页提供了一系列手动下载的步骤,供有需要的用户使用。
|
||||
```
|
||||
|
||||
## 概览
|
||||
|
||||
**文字识别任务的数据集应按如下目录配置:**
|
||||
|
||||
```text
|
||||
├── mixture
|
||||
│ ├── coco_text
|
||||
│ │ ├── train_label.txt
|
||||
│ │ ├── train_words
|
||||
│ ├── icdar_2011
|
||||
│ │ ├── training_label.txt
|
||||
│ │ ├── Challenge1_Training_Task3_Images_GT
|
||||
│ ├── icdar_2013
|
||||
│ │ ├── train_label.txt
|
||||
│ │ ├── test_label_1015.txt
|
||||
│ │ ├── test_label_1095.txt
|
||||
│ │ ├── Challenge2_Training_Task3_Images_GT
|
||||
│ │ ├── Challenge2_Test_Task3_Images
|
||||
│ ├── icdar_2015
|
||||
│ │ ├── train_label.txt
|
||||
│ │ ├── test_label.txt
|
||||
│ │ ├── ch4_training_word_images_gt
|
||||
│ │ ├── ch4_test_word_images_gt
|
||||
│ ├── III5K
|
||||
│ │ ├── train_label.txt
|
||||
│ │ ├── test_label.txt
|
||||
│ │ ├── train
|
||||
│ │ ├── test
|
||||
│ ├── ct80
|
||||
│ │ ├── test_label.txt
|
||||
│ │ ├── image
|
||||
│ ├── svt
|
||||
│ │ ├── test_label.txt
|
||||
│ │ ├── image
|
||||
│ ├── svtp
|
||||
│ │ ├── test_label.txt
|
||||
│ │ ├── image
|
||||
│ ├── Syn90k
|
||||
│ │ ├── shuffle_labels.txt
|
||||
│ │ ├── label.txt
|
||||
│ │ ├── label.lmdb
|
||||
│ │ ├── mnt
|
||||
│ ├── SynthText
|
||||
│ │ ├── alphanumeric_labels.txt
|
||||
│ │ ├── shuffle_labels.txt
|
||||
│ │ ├── instances_train.txt
|
||||
│ │ ├── label.txt
|
||||
│ │ ├── label.lmdb
|
||||
│ │ ├── synthtext
|
||||
│ ├── SynthAdd
|
||||
│ │ ├── label.txt
|
||||
│ │ ├── label.lmdb
|
||||
│ │ ├── SynthText_Add
|
||||
│ ├── TextOCR
|
||||
│ │ ├── image
|
||||
│ │ ├── train_label.txt
|
||||
│ │ ├── val_label.txt
|
||||
│ ├── Totaltext
|
||||
│ │ ├── imgs
|
||||
│ │ ├── annotations
|
||||
│ │ ├── train_label.txt
|
||||
│ │ ├── test_label.txt
|
||||
│ ├── OpenVINO
|
||||
│ │ ├── image_1
|
||||
│ │ ├── image_2
|
||||
│ │ ├── image_5
|
||||
│ │ ├── image_f
|
||||
│ │ ├── image_val
|
||||
│ │ ├── train_1_label.txt
|
||||
│ │ ├── train_2_label.txt
|
||||
│ │ ├── train_5_label.txt
|
||||
│ │ ├── train_f_label.txt
|
||||
│ │ ├── val_label.txt
|
||||
```
|
||||
|
||||
| 数据集名称 | 数据图片 | 标注文件 | 标注文件 |
|
||||
| :-------------------: | :---------------------------------------------------------: | :----------------------------------------------------------: | :----------------------------------------------------------: |
|
||||
| | | 训练集(training) | 测试集(test) |
|
||||
| coco_text | [下载地址](https://rrc.cvc.uab.es/?ch=5&com=downloads) | [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/coco_text/train_label.txt) | - |
|
||||
| icdar_2011 | [下载地址](http://www.cvc.uab.es/icdar2011competition/?com=downloads) | [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2015/train_label.txt) | - |
|
||||
| icdar_2013 | [下载地址](https://rrc.cvc.uab.es/?ch=2&com=downloads) | [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2013/train_label.txt) | [test_label_1015.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2013/test_label_1015.txt) |
|
||||
| icdar_2015 | [下载地址](https://rrc.cvc.uab.es/?ch=4&com=downloads) | [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2015/train_label.txt) | [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2015/test_label.txt) |
|
||||
| IIIT5K | [下载地址](http://cvit.iiit.ac.in/projects/SceneTextUnderstanding/IIIT5K.html) | [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/IIIT5K/train_label.txt) | [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/IIIT5K/test_label.txt) |
|
||||
| ct80 | [下载地址](http://cs-chan.com/downloads_CUTE80_dataset.html) | - | [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/ct80/test_label.txt) |
|
||||
| svt | [下载地址](http://www.iapr-tc11.org/mediawiki/index.php/The_Street_View_Text_Dataset) | - | [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/svt/test_label.txt) |
|
||||
| svtp | [非官方下载地址\*](https://github.com/Jyouhou/Case-Sensitive-Scene-Text-Recognition-Datasets) | - | [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/svtp/test_label.txt) |
|
||||
| MJSynth (Syn90k) | [下载地址](https://www.robots.ox.ac.uk/~vgg/data/text/) | [shuffle_labels.txt](https://download.openmmlab.com/mmocr/data/mixture/Syn90k/shuffle_labels.txt) \| [label.txt](https://download.openmmlab.com/mmocr/data/mixture/Syn90k/label.txt) | - |
|
||||
| SynthText (Synth800k) | [下载地址](https://www.robots.ox.ac.uk/~vgg/data/scenetext/) | [alphanumeric_labels.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/alphanumeric_labels.txt) \| [shuffle_labels.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/shuffle_labels.txt) \| [instances_train.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/instances_train.txt) \| [label.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/label.txt) | - |
|
||||
| SynthAdd | [SynthText_Add.zip](https://pan.baidu.com/s/1uV0LtoNmcxbO-0YA7Ch4dg) (code:627x) | [label.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthAdd/label.txt) | - |
|
||||
| TextOCR | [下载地址](https://textvqa.org/textocr/dataset) | - | - |
|
||||
| Totaltext | [下载地址](https://github.com/cs-chan/Total-Text-Dataset) | - | - |
|
||||
| OpenVINO | [下载地址](https://github.com/cvdfoundation/open-images-dataset) | [下载地址](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) | [下载地址](https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text) |
|
||||
|
||||
(\*) 注:由于官方的下载地址已经无法访问,我们提供了一个非官方的地址以供参考,但我们无法保证数据的准确性。
|
||||
|
||||
对于中国境内的用户,我们也推荐使用开源数据平台[OpenDataLab](https://opendatalab.com/)来获取这些数据集,以获得更好的下载体验:
|
||||
|
||||
- [icdar_2013](https://opendatalab.com/ICDAR_2013?source=OpenMMLab%20GitHub)
|
||||
- [icdar_2015](https://opendatalab.com/ICDAR2015?source=OpenMMLab%20GitHub)
|
||||
- [IIIT5K](https://opendatalab.com/IIIT_5K?source=OpenMMLab%20GitHub)
|
||||
- [ct80](https://opendatalab.com/CUTE_80?source=OpenMMLab%20GitHub)
|
||||
- [svt](https://opendatalab.com/SVT?source=OpenMMLab%20GitHub)
|
||||
- [Totaltext](https://opendatalab.com/TotalText?source=OpenMMLab%20GitHub)
|
||||
- [IAM](https://opendatalab.com/IAM_Handwriting?source=OpenMMLab%20GitHub)
|
||||
|
||||
## 准备步骤
|
||||
|
||||
### ICDAR 2013
|
||||
|
||||
- 第一步:从 [下载地址](https://rrc.cvc.uab.es/?ch=2&com=downloads) 下载 `Challenge2_Test_Task3_Images.zip` 和 `Challenge2_Training_Task3_Images_GT.zip`
|
||||
- 第二步:下载 [test_label_1015.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2013/test_label_1015.txt) 和 [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2013/train_label.txt)
|
||||
|
||||
### ICDAR 2015
|
||||
|
||||
- 第一步:从 [下载地址](https://rrc.cvc.uab.es/?ch=4&com=downloads) 下载 `ch4_training_word_images_gt.zip` 和 `ch4_test_word_images_gt.zip`
|
||||
- 第二步:下载 [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2015/train_label.txt) and [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/icdar_2015/test_label.txt)
|
||||
|
||||
### IIIT5K
|
||||
|
||||
- 第一步:从 [下载地址](http://cvit.iiit.ac.in/projects/SceneTextUnderstanding/IIIT5K.html) 下载 `IIIT5K-Word_V3.0.tar.gz`
|
||||
- 第二步:下载 [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/IIIT5K/train_label.txt) 和 [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/IIIT5K/test_label.txt)
|
||||
|
||||
### svt
|
||||
|
||||
- 第一步:从 [下载地址](http://www.iapr-tc11.org/mediawiki/index.php/The_Street_View_Text_Dataset) 下载 `svt.zip`
|
||||
- 第二步:下载 [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/svt/test_label.txt)
|
||||
- 第三步:
|
||||
|
||||
```bash
|
||||
python tools/data/textrecog/svt_converter.py <download_svt_dir_path>
|
||||
```
|
||||
|
||||
### ct80
|
||||
|
||||
- 第一步:下载 [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/ct80/test_label.txt)
|
||||
|
||||
### svtp
|
||||
|
||||
- 第一步:下载 [test_label.txt](https://download.openmmlab.com/mmocr/data/mixture/svtp/test_label.txt)
|
||||
|
||||
### coco_text
|
||||
|
||||
- 第一步:从 [下载地址](https://rrc.cvc.uab.es/?ch=5&com=downloads) 下载文件
|
||||
- 第二步:下载 [train_label.txt](https://download.openmmlab.com/mmocr/data/mixture/coco_text/train_label.txt)
|
||||
|
||||
### MJSynth (Syn90k)
|
||||
|
||||
- 第一步:从 [下载地址](https://www.robots.ox.ac.uk/~vgg/data/text/) 下载 `mjsynth.tar.gz`
|
||||
- 第二步:下载 [shuffle_labels.txt](https://download.openmmlab.com/mmocr/data/mixture/Syn90k/shuffle_labels.txt)
|
||||
- 第三步:
|
||||
|
||||
```bash
|
||||
mkdir Syn90k && cd Syn90k
|
||||
|
||||
mv /path/to/mjsynth.tar.gz .
|
||||
|
||||
tar -xzf mjsynth.tar.gz
|
||||
|
||||
mv /path/to/shuffle_labels.txt .
|
||||
mv /path/to/label.txt .
|
||||
|
||||
# 创建软链接
|
||||
cd /path/to/mmocr/data/mixture
|
||||
|
||||
ln -s /path/to/Syn90k Syn90k
|
||||
```
|
||||
|
||||
### SynthText (Synth800k)
|
||||
|
||||
- 第一步:下载 `SynthText.zip`: [下载地址](https://www.robots.ox.ac.uk/~vgg/data/scenetext/)
|
||||
|
||||
- 第二步:请根据你的实际需要,从下列标注中选择最适合的下载:[label.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/label.txt) (7,266,686个标注); [shuffle_labels.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/shuffle_labels.txt) (2,400,000个随机采样的标注);[alphanumeric_labels.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/alphanumeric_labels.txt) (7,239,272个仅包含数字和字母的标注);[instances_train.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthText/instances_train.txt) (7,266,686个字符级别的标注)。
|
||||
|
||||
- 第三步:
|
||||
|
||||
```bash
|
||||
mkdir SynthText && cd SynthText
|
||||
mv /path/to/SynthText.zip .
|
||||
unzip SynthText.zip
|
||||
mv SynthText synthtext
|
||||
|
||||
mv /path/to/shuffle_labels.txt .
|
||||
mv /path/to/label.txt .
|
||||
mv /path/to/alphanumeric_labels.txt .
|
||||
mv /path/to/instances_train.txt .
|
||||
|
||||
# 创建软链接
|
||||
cd /path/to/mmocr/data/mixture
|
||||
ln -s /path/to/SynthText SynthText
|
||||
```
|
||||
|
||||
- 第四步:生成裁剪后的图像和标注:
|
||||
|
||||
```bash
|
||||
cd /path/to/mmocr
|
||||
|
||||
python tools/data/textrecog/synthtext_converter.py data/mixture/SynthText/gt.mat data/mixture/SynthText/ data/mixture/SynthText/synthtext/SynthText_patch_horizontal --n_proc 8
|
||||
```
|
||||
|
||||
### SynthAdd
|
||||
|
||||
- 第一步:从 [SynthAdd](https://pan.baidu.com/s/1uV0LtoNmcxbO-0YA7Ch4dg) (code:627x) 下载 `SynthText_Add.zip`
|
||||
- 第二步:下载 [label.txt](https://download.openmmlab.com/mmocr/data/mixture/SynthAdd/label.txt)
|
||||
- 第三步:
|
||||
|
||||
`````bash
|
||||
mkdir SynthAdd && cd SynthAdd
|
||||
|
||||
mv /path/to/SynthText_Add.zip .
|
||||
|
||||
unzip SynthText_Add.zip
|
||||
|
||||
mv /path/to/label.txt .
|
||||
|
||||
# 创建软链接
|
||||
cd /path/to/mmocr/data/mixture
|
||||
|
||||
````{tip}
|
||||
运行以下命令,可以把 `.txt` 格式的标注文件转换成 `.lmdb` 格式:
|
||||
```bash
|
||||
python tools/data/utils/txt2lmdb.py -i <txt_label_path> -o <lmdb_label_path>
|
||||
`````
|
||||
|
||||
例如:
|
||||
|
||||
```bash
|
||||
python tools/data/utils/txt2lmdb.py -i data/mixture/Syn90k/label.txt -o data/mixture/Syn90k/label.lmdb
|
||||
```
|
||||
|
||||
````
|
||||
|
||||
### TextOCR
|
||||
- 第一步:下载 [train_val_images.zip](https://dl.fbaipublicfiles.com/textvqa/images/train_val_images.zip),[TextOCR_0.1_train.json](https://dl.fbaipublicfiles.com/textvqa/data/textocr/TextOCR_0.1_train.json) 和 [TextOCR_0.1_val.json](https://dl.fbaipublicfiles.com/textvqa/data/textocr/TextOCR_0.1_val.json) 到 `textocr/` 目录.
|
||||
```bash
|
||||
mkdir textocr && cd textocr
|
||||
|
||||
# 下载 TextOCR 数据集
|
||||
wget https://dl.fbaipublicfiles.com/textvqa/images/train_val_images.zip
|
||||
wget https://dl.fbaipublicfiles.com/textvqa/data/textocr/TextOCR_0.1_train.json
|
||||
wget https://dl.fbaipublicfiles.com/textvqa/data/textocr/TextOCR_0.1_val.json
|
||||
|
||||
# 对于数据图像
|
||||
unzip -q train_val_images.zip
|
||||
mv train_images train
|
||||
```
|
||||
- 第二步:用四个并行进程剪裁图像然后生成 `train_label.txt`,`val_label.txt` ,可以使用以下命令:
|
||||
```bash
|
||||
python tools/data/textrecog/textocr_converter.py /path/to/textocr 4
|
||||
```
|
||||
|
||||
|
||||
### Totaltext
|
||||
- 第一步:从 [github dataset](https://github.com/cs-chan/Total-Text-Dataset/tree/master/Dataset) 下载 `totaltext.zip`,然后从 [github Groundtruth](https://github.com/cs-chan/Total-Text-Dataset/tree/master/Groundtruth/Text) 下载 `groundtruth_text.zip` (我们建议下载 `.mat` 格式的标注文件,因为我们提供的 `totaltext_converter.py` 标注格式转换工具只支持 `.mat` 文件)
|
||||
```bash
|
||||
mkdir totaltext && cd totaltext
|
||||
mkdir imgs && mkdir annotations
|
||||
|
||||
# 对于图像数据
|
||||
# 在 ./totaltext 目录下运行
|
||||
unzip totaltext.zip
|
||||
mv Images/Train imgs/training
|
||||
mv Images/Test imgs/test
|
||||
|
||||
# 对于标注文件
|
||||
unzip groundtruth_text.zip
|
||||
cd Groundtruth
|
||||
mv Polygon/Train ../annotations/training
|
||||
mv Polygon/Test ../annotations/test
|
||||
```
|
||||
- 第二步:用以下命令生成经剪裁后的标注文件 `train_label.txt` 和 `test_label.txt` (剪裁后的图像会被保存在目录 `data/totaltext/dst_imgs/`):
|
||||
```bash
|
||||
python tools/data/textrecog/totaltext_converter.py /path/to/totaltext -o /path/to/totaltext --split-list training test
|
||||
```
|
||||
|
||||
### OpenVINO
|
||||
- 第零步:安装 [awscli](https://aws.amazon.com/cli/)。
|
||||
- 第一步:下载 [Open Images](https://github.com/cvdfoundation/open-images-dataset#download-images-with-bounding-boxes-annotations) 的子数据集 `train_1`、 `train_2`、 `train_5`、 `train_f` 及 `validation` 至 `openvino/`。
|
||||
```bash
|
||||
mkdir openvino && cd openvino
|
||||
|
||||
# 下载 Open Images 的子数据集
|
||||
for s in 1 2 5 f; do
|
||||
aws s3 --no-sign-request cp s3://open-images-dataset/tar/train_${s}.tar.gz .
|
||||
done
|
||||
aws s3 --no-sign-request cp s3://open-images-dataset/tar/validation.tar.gz .
|
||||
|
||||
# 下载标注文件
|
||||
for s in 1 2 5 f; do
|
||||
wget https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text/text_spotting_openimages_v5_train_${s}.json
|
||||
done
|
||||
wget https://storage.openvinotoolkit.org/repositories/openvino_training_extensions/datasets/open_images_v5_text/text_spotting_openimages_v5_validation.json
|
||||
|
||||
# 解压数据集
|
||||
mkdir -p openimages_v5/val
|
||||
for s in 1 2 5 f; do
|
||||
tar zxf train_${s}.tar.gz -C openimages_v5
|
||||
done
|
||||
tar zxf validation.tar.gz -C openimages_v5/val
|
||||
```
|
||||
- 第二步: 运行以下的命令,以用4个进程生成标注 `train_{1,2,5,f}_label.txt` 和 `val_label.txt` 并裁剪原图:
|
||||
```bash
|
||||
python tools/data/textrecog/openvino_converter.py /path/to/openvino 4
|
||||
```
|
||||
````
|
|
@ -1,233 +0,0 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import argparse
|
||||
import glob
|
||||
import os.path as osp
|
||||
import xml.etree.ElementTree as ET
|
||||
from functools import partial
|
||||
|
||||
import mmcv
|
||||
import mmengine
|
||||
import numpy as np
|
||||
from shapely.geometry import Polygon
|
||||
|
||||
from mmocr.utils import dump_ocr_data, list_from_file
|
||||
|
||||
|
||||
def collect_files(img_dir, gt_dir, split):
|
||||
"""Collect all images and their corresponding groundtruth files.
|
||||
|
||||
Args:
|
||||
img_dir(str): The image directory
|
||||
gt_dir(str): The groundtruth directory
|
||||
split(str): The split of dataset. Namely: training or test
|
||||
|
||||
Returns:
|
||||
files(list): The list of tuples (img_file, groundtruth_file)
|
||||
"""
|
||||
assert isinstance(img_dir, str)
|
||||
assert img_dir
|
||||
assert isinstance(gt_dir, str)
|
||||
assert gt_dir
|
||||
|
||||
# note that we handle png and jpg only. Pls convert others such as gif to
|
||||
# jpg or png offline
|
||||
suffixes = ['.png', '.PNG', '.jpg', '.JPG', '.jpeg', '.JPEG']
|
||||
|
||||
imgs_list = []
|
||||
for suffix in suffixes:
|
||||
imgs_list.extend(glob.glob(osp.join(img_dir, '*' + suffix)))
|
||||
|
||||
files = []
|
||||
if split == 'training':
|
||||
for img_file in imgs_list:
|
||||
gt_file = gt_dir + '/' + osp.splitext(
|
||||
osp.basename(img_file))[0] + '.xml'
|
||||
files.append((img_file, gt_file))
|
||||
assert len(files), f'No images found in {img_dir}'
|
||||
print(f'Loaded {len(files)} images from {img_dir}')
|
||||
elif split == 'test':
|
||||
for img_file in imgs_list:
|
||||
gt_file = gt_dir + '/000' + osp.splitext(
|
||||
osp.basename(img_file))[0] + '.txt'
|
||||
files.append((img_file, gt_file))
|
||||
assert len(files), f'No images found in {img_dir}'
|
||||
print(f'Loaded {len(files)} images from {img_dir}')
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def collect_annotations(files, split, nproc=1):
|
||||
"""Collect the annotation information.
|
||||
|
||||
Args:
|
||||
files(list): The list of tuples (image_file, groundtruth_file)
|
||||
split(str): The split of dataset. Namely: training or test
|
||||
nproc(int): The number of process to collect annotations
|
||||
|
||||
Returns:
|
||||
images(list): The list of image information dicts
|
||||
"""
|
||||
assert isinstance(files, list)
|
||||
assert isinstance(split, str)
|
||||
assert isinstance(nproc, int)
|
||||
|
||||
load_img_info_with_split = partial(load_img_info, split=split)
|
||||
if nproc > 1:
|
||||
images = mmengine.track_parallel_progress(
|
||||
load_img_info_with_split, files, nproc=nproc)
|
||||
else:
|
||||
images = mmengine.track_progress(load_img_info_with_split, files)
|
||||
|
||||
return images
|
||||
|
||||
|
||||
def load_txt_info(gt_file, img_info):
|
||||
anno_info = []
|
||||
for line in list_from_file(gt_file):
|
||||
# each line has one ploygen (n vetices), and one text.
|
||||
# e.g., 695,885,866,888,867,1146,696,1143,####Latin 9
|
||||
line = line.strip()
|
||||
strs = line.split(',')
|
||||
category_id = 1
|
||||
assert strs[28][0] == '#'
|
||||
xy = [int(x) for x in strs[0:28]]
|
||||
assert len(xy) == 28
|
||||
coordinates = np.array(xy).reshape(-1, 2)
|
||||
polygon = Polygon(coordinates)
|
||||
iscrowd = 0
|
||||
area = polygon.area
|
||||
# convert to COCO style XYWH format
|
||||
min_x, min_y, max_x, max_y = polygon.bounds
|
||||
bbox = [min_x, min_y, max_x - min_x, max_y - min_y]
|
||||
text = strs[28][4:]
|
||||
|
||||
anno = dict(
|
||||
iscrowd=iscrowd,
|
||||
category_id=category_id,
|
||||
bbox=bbox,
|
||||
area=area,
|
||||
text=text,
|
||||
segmentation=[xy])
|
||||
anno_info.append(anno)
|
||||
img_info.update(anno_info=anno_info)
|
||||
return img_info
|
||||
|
||||
|
||||
def load_xml_info(gt_file, img_info):
|
||||
|
||||
obj = ET.parse(gt_file)
|
||||
anno_info = []
|
||||
for image in obj.getroot(): # image
|
||||
for box in image: # image
|
||||
h = box.attrib['height']
|
||||
w = box.attrib['width']
|
||||
x = box.attrib['left']
|
||||
y = box.attrib['top']
|
||||
text = box[0].text
|
||||
segs = box[1].text
|
||||
pts = segs.strip().split(',')
|
||||
pts = [int(x) for x in pts]
|
||||
assert len(pts) == 28
|
||||
# pts = []
|
||||
# for iter in range(2,len(box)):
|
||||
# pts.extend([int(box[iter].attrib['x']),
|
||||
# int(box[iter].attrib['y'])])
|
||||
iscrowd = 0
|
||||
category_id = 1
|
||||
bbox = [int(x), int(y), int(w), int(h)]
|
||||
|
||||
coordinates = np.array(pts).reshape(-1, 2)
|
||||
polygon = Polygon(coordinates)
|
||||
area = polygon.area
|
||||
anno = dict(
|
||||
iscrowd=iscrowd,
|
||||
category_id=category_id,
|
||||
bbox=bbox,
|
||||
area=area,
|
||||
text=text,
|
||||
segmentation=[pts])
|
||||
anno_info.append(anno)
|
||||
|
||||
img_info.update(anno_info=anno_info)
|
||||
|
||||
return img_info
|
||||
|
||||
|
||||
def load_img_info(files, split):
|
||||
"""Load the information of one image.
|
||||
|
||||
Args:
|
||||
files(tuple): The tuple of (img_file, groundtruth_file)
|
||||
split(str): The split of dataset: training or test
|
||||
|
||||
Returns:
|
||||
img_info(dict): The dict of the img and annotation information
|
||||
"""
|
||||
assert isinstance(files, tuple)
|
||||
assert isinstance(split, str)
|
||||
|
||||
img_file, gt_file = files
|
||||
# read imgs with ignoring orientations
|
||||
img = mmcv.imread(img_file, 'unchanged')
|
||||
|
||||
split_name = osp.basename(osp.dirname(img_file))
|
||||
img_info = dict(
|
||||
# remove img_prefix for filename
|
||||
file_name=osp.join(split_name, osp.basename(img_file)),
|
||||
height=img.shape[0],
|
||||
width=img.shape[1],
|
||||
# anno_info=anno_info,
|
||||
segm_file=osp.join(split_name, osp.basename(gt_file)))
|
||||
|
||||
if split == 'training':
|
||||
img_info = load_xml_info(gt_file, img_info)
|
||||
elif split == 'test':
|
||||
img_info = load_txt_info(gt_file, img_info)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
return img_info
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Convert ctw1500 annotations to COCO format')
|
||||
parser.add_argument('root_path', help='ctw1500 root path')
|
||||
parser.add_argument('-o', '--out-dir', help='output path')
|
||||
parser.add_argument(
|
||||
'--split-list',
|
||||
nargs='+',
|
||||
help='a list of splits. e.g., "--split-list training test"')
|
||||
|
||||
parser.add_argument(
|
||||
'--nproc', default=1, type=int, help='number of process')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
root_path = args.root_path
|
||||
out_dir = args.out_dir if args.out_dir else root_path
|
||||
mmengine.mkdir_or_exist(out_dir)
|
||||
|
||||
img_dir = osp.join(root_path, 'imgs')
|
||||
gt_dir = osp.join(root_path, 'annotations')
|
||||
|
||||
set_name = {}
|
||||
for split in args.split_list:
|
||||
set_name.update({split: 'instances_' + split + '.json'})
|
||||
assert osp.exists(osp.join(img_dir, split))
|
||||
|
||||
for split, json_name in set_name.items():
|
||||
print(f'Converting {split} into {json_name}')
|
||||
with mmengine.Timer(
|
||||
print_tmpl='It takes {}s to convert icdar annotation'):
|
||||
files = collect_files(
|
||||
osp.join(img_dir, split), osp.join(gt_dir, split), split)
|
||||
image_infos = collect_annotations(files, split, nproc=args.nproc)
|
||||
dump_ocr_data(image_infos, osp.join(out_dir, json_name), 'textdet')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,167 +0,0 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import argparse
|
||||
import os
|
||||
import os.path as osp
|
||||
|
||||
import mmcv
|
||||
import mmengine
|
||||
|
||||
from mmocr.utils import dump_ocr_data
|
||||
|
||||
|
||||
def collect_files(img_dir, gt_dir, split):
|
||||
"""Collect all images and their corresponding groundtruth files.
|
||||
|
||||
Args:
|
||||
img_dir (str): The image directory
|
||||
gt_dir (str): The groundtruth directory
|
||||
|
||||
Returns:
|
||||
files (list): The list of tuples (img_file, groundtruth_file)
|
||||
"""
|
||||
assert isinstance(img_dir, str)
|
||||
assert img_dir
|
||||
assert isinstance(gt_dir, str)
|
||||
assert gt_dir
|
||||
|
||||
ann_list, imgs_list, splits = [], [], []
|
||||
for img in os.listdir(img_dir):
|
||||
img_path = osp.join(img_dir, img)
|
||||
imgs_list.append(img_path)
|
||||
ann_list.append(osp.join(gt_dir, 'gt_' + img.split('.')[0] + '.txt'))
|
||||
splits.append(split)
|
||||
|
||||
files = list(zip(sorted(imgs_list), sorted(ann_list), splits))
|
||||
assert len(files), f'No images found in {img_dir}'
|
||||
print(f'Loaded {len(files)} images from {img_dir}')
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def collect_annotations(files, nproc=1):
|
||||
"""Collect the annotation information.
|
||||
|
||||
Args:
|
||||
files (list): The list of tuples (image_file, groundtruth_file)
|
||||
nproc (int): The number of process to collect annotations
|
||||
|
||||
Returns:
|
||||
images (list): The list of image information dicts
|
||||
"""
|
||||
assert isinstance(files, list)
|
||||
assert isinstance(nproc, int)
|
||||
|
||||
if nproc > 1:
|
||||
images = mmengine.track_parallel_progress(
|
||||
load_img_info, files, nproc=nproc)
|
||||
else:
|
||||
images = mmengine.track_progress(load_img_info, files)
|
||||
|
||||
return images
|
||||
|
||||
|
||||
def load_img_info(files):
|
||||
"""Load the information of one image.
|
||||
|
||||
Args:
|
||||
files (tuple): The tuple of (img_file, groundtruth_file, split)
|
||||
|
||||
Returns:
|
||||
img_info (dict): The dict of the img and annotation information
|
||||
"""
|
||||
assert isinstance(files, tuple)
|
||||
|
||||
img_file, gt_file, split = files
|
||||
# read imgs while ignoring orientations
|
||||
img = mmcv.imread(img_file, 'unchanged')
|
||||
|
||||
img_info = dict(
|
||||
file_name=osp.join(osp.basename(img_file)),
|
||||
height=img.shape[0],
|
||||
width=img.shape[1],
|
||||
segm_file=osp.join(osp.basename(gt_file)))
|
||||
|
||||
# IC13 uses different separator in gt files
|
||||
if split == 'training':
|
||||
separator = ' '
|
||||
elif split == 'test':
|
||||
separator = ','
|
||||
else:
|
||||
raise NotImplementedError
|
||||
if osp.splitext(gt_file)[1] == '.txt':
|
||||
img_info = load_txt_info(gt_file, img_info, separator)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
return img_info
|
||||
|
||||
|
||||
def load_txt_info(gt_file, img_info, separator):
|
||||
"""Collect the annotation information.
|
||||
|
||||
The annotation format is as the following:
|
||||
[train]
|
||||
left top right bottom "transcription"
|
||||
[test]
|
||||
left, top, right, bottom, "transcription"
|
||||
|
||||
Args:
|
||||
gt_file (str): The path to ground-truth
|
||||
img_info (dict): The dict of the img and annotation information
|
||||
|
||||
Returns:
|
||||
img_info (dict): The dict of the img and annotation information
|
||||
"""
|
||||
anno_info = []
|
||||
with open(gt_file) as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
xmin, ymin, xmax, ymax = line.split(separator)[0:4]
|
||||
x = max(0, int(xmin))
|
||||
y = max(0, int(ymin))
|
||||
w = int(xmax) - x
|
||||
h = int(ymax) - y
|
||||
bbox = [x, y, w, h]
|
||||
segmentation = [x, y, x + w, y, x + w, y + h, x, y + h]
|
||||
|
||||
anno = dict(
|
||||
iscrowd=0,
|
||||
category_id=1,
|
||||
bbox=bbox,
|
||||
area=w * h,
|
||||
segmentation=[segmentation])
|
||||
anno_info.append(anno)
|
||||
img_info.update(anno_info=anno_info)
|
||||
|
||||
return img_info
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Generate training and test set of IC13')
|
||||
parser.add_argument('root_path', help='Root dir path of IC13')
|
||||
parser.add_argument(
|
||||
'--nproc', default=1, type=int, help='Number of process')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
root_path = args.root_path
|
||||
|
||||
for split in ['training', 'test']:
|
||||
print(f'Processing {split} set...')
|
||||
with mmengine.Timer(
|
||||
print_tmpl='It takes {}s to convert IC13 annotation'):
|
||||
files = collect_files(
|
||||
osp.join(root_path, 'imgs', split),
|
||||
osp.join(root_path, 'annotations', split), split)
|
||||
image_infos = collect_annotations(files, nproc=args.nproc)
|
||||
dump_ocr_data(image_infos,
|
||||
osp.join(root_path, 'instances_' + split + '.json'),
|
||||
'textdet')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,185 +0,0 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import argparse
|
||||
import glob
|
||||
import os.path as osp
|
||||
from functools import partial
|
||||
|
||||
import mmcv
|
||||
import mmengine
|
||||
import numpy as np
|
||||
from shapely.geometry import Polygon
|
||||
|
||||
from mmocr.utils import dump_ocr_data, list_from_file
|
||||
|
||||
|
||||
def collect_files(img_dir, gt_dir):
|
||||
"""Collect all images and their corresponding groundtruth files.
|
||||
|
||||
Args:
|
||||
img_dir(str): The image directory
|
||||
gt_dir(str): The groundtruth directory
|
||||
|
||||
Returns:
|
||||
files(list): The list of tuples (img_file, groundtruth_file)
|
||||
"""
|
||||
assert isinstance(img_dir, str)
|
||||
assert img_dir
|
||||
assert isinstance(gt_dir, str)
|
||||
assert gt_dir
|
||||
|
||||
# note that we handle png and jpg only. Pls convert others such as gif to
|
||||
# jpg or png offline
|
||||
suffixes = ['.png', '.PNG', '.jpg', '.JPG', '.jpeg', '.JPEG']
|
||||
imgs_list = []
|
||||
for suffix in suffixes:
|
||||
imgs_list.extend(glob.glob(osp.join(img_dir, '*' + suffix)))
|
||||
|
||||
files = []
|
||||
for img_file in imgs_list:
|
||||
gt_file = gt_dir + '/gt_' + osp.splitext(
|
||||
osp.basename(img_file))[0] + '.txt'
|
||||
files.append((img_file, gt_file))
|
||||
assert len(files), f'No images found in {img_dir}'
|
||||
print(f'Loaded {len(files)} images from {img_dir}')
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def collect_annotations(files, dataset, nproc=1):
|
||||
"""Collect the annotation information.
|
||||
|
||||
Args:
|
||||
files(list): The list of tuples (image_file, groundtruth_file)
|
||||
dataset(str): The dataset name, icdar2015 or icdar2017
|
||||
nproc(int): The number of process to collect annotations
|
||||
|
||||
Returns:
|
||||
images(list): The list of image information dicts
|
||||
"""
|
||||
assert isinstance(files, list)
|
||||
assert isinstance(dataset, str)
|
||||
assert dataset
|
||||
assert isinstance(nproc, int)
|
||||
|
||||
load_img_info_with_dataset = partial(load_img_info, dataset=dataset)
|
||||
if nproc > 1:
|
||||
images = mmengine.track_parallel_progress(
|
||||
load_img_info_with_dataset, files, nproc=nproc)
|
||||
else:
|
||||
images = mmengine.track_progress(load_img_info_with_dataset, files)
|
||||
|
||||
return images
|
||||
|
||||
|
||||
def load_img_info(files, dataset):
|
||||
"""Load the information of one image.
|
||||
|
||||
Args:
|
||||
files(tuple): The tuple of (img_file, groundtruth_file)
|
||||
dataset(str): Dataset name, icdar2015 or icdar2017
|
||||
|
||||
Returns:
|
||||
img_info(dict): The dict of the img and annotation information
|
||||
"""
|
||||
assert isinstance(files, tuple)
|
||||
assert isinstance(dataset, str)
|
||||
assert dataset
|
||||
|
||||
img_file, gt_file = files
|
||||
# read imgs with ignoring orientations
|
||||
img = mmcv.imread(img_file, 'unchanged')
|
||||
|
||||
if dataset == 'icdar2017':
|
||||
gt_list = list_from_file(gt_file)
|
||||
elif dataset == 'icdar2015':
|
||||
gt_list = list_from_file(gt_file, encoding='utf-8-sig')
|
||||
else:
|
||||
raise NotImplementedError(f'Not support {dataset}')
|
||||
|
||||
anno_info = []
|
||||
for line in gt_list:
|
||||
# each line has one ploygen (4 vetices), and others.
|
||||
# e.g., 695,885,866,888,867,1146,696,1143,Latin,9
|
||||
line = line.strip()
|
||||
strs = line.split(',')
|
||||
category_id = 1
|
||||
xy = [int(x) for x in strs[0:8]]
|
||||
coordinates = np.array(xy).reshape(-1, 2)
|
||||
polygon = Polygon(coordinates)
|
||||
iscrowd = 0
|
||||
# set iscrowd to 1 to ignore 1.
|
||||
if (dataset == 'icdar2015'
|
||||
and strs[8] == '###') or (dataset == 'icdar2017'
|
||||
and strs[9] == '###'):
|
||||
iscrowd = 1
|
||||
print('ignore text')
|
||||
|
||||
area = polygon.area
|
||||
# convert to COCO style XYWH format
|
||||
min_x, min_y, max_x, max_y = polygon.bounds
|
||||
bbox = [min_x, min_y, max_x - min_x, max_y - min_y]
|
||||
|
||||
anno = dict(
|
||||
iscrowd=iscrowd,
|
||||
category_id=category_id,
|
||||
bbox=bbox,
|
||||
area=area,
|
||||
segmentation=[xy])
|
||||
anno_info.append(anno)
|
||||
split_name = osp.basename(osp.dirname(img_file))
|
||||
img_info = dict(
|
||||
# remove img_prefix for filename
|
||||
file_name=osp.join(split_name, osp.basename(img_file)),
|
||||
height=img.shape[0],
|
||||
width=img.shape[1],
|
||||
anno_info=anno_info,
|
||||
segm_file=osp.join(split_name, osp.basename(gt_file)))
|
||||
return img_info
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Convert Icdar2015 or Icdar2017 annotations to COCO format'
|
||||
)
|
||||
parser.add_argument('icdar_path', help='icdar root path')
|
||||
parser.add_argument('-o', '--out-dir', help='output path')
|
||||
parser.add_argument(
|
||||
'-d', '--dataset', required=True, help='icdar2017 or icdar2015')
|
||||
parser.add_argument(
|
||||
'--split-list',
|
||||
nargs='+',
|
||||
help='a list of splits. e.g., "--split-list training test"')
|
||||
|
||||
parser.add_argument(
|
||||
'--nproc', default=1, type=int, help='number of process')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
icdar_path = args.icdar_path
|
||||
out_dir = args.out_dir if args.out_dir else icdar_path
|
||||
mmengine.mkdir_or_exist(out_dir)
|
||||
|
||||
img_dir = osp.join(icdar_path, 'imgs')
|
||||
gt_dir = osp.join(icdar_path, 'annotations')
|
||||
|
||||
set_name = {}
|
||||
for split in args.split_list:
|
||||
set_name.update({split: 'instances_' + split + '.json'})
|
||||
assert osp.exists(osp.join(img_dir, split))
|
||||
|
||||
for split, json_name in set_name.items():
|
||||
print(f'Converting {split} into {json_name}')
|
||||
with mmengine.Timer(
|
||||
print_tmpl='It takes {}s to convert icdar annotation'):
|
||||
files = collect_files(
|
||||
osp.join(img_dir, split), osp.join(gt_dir, split))
|
||||
image_infos = collect_annotations(
|
||||
files, args.dataset, nproc=args.nproc)
|
||||
dump_ocr_data(image_infos, osp.join(out_dir, json_name), 'textdet')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,181 +0,0 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import argparse
|
||||
import json
|
||||
import os.path as osp
|
||||
import time
|
||||
|
||||
import lmdb
|
||||
import mmcv
|
||||
import mmengine
|
||||
import numpy as np
|
||||
from scipy.io import loadmat
|
||||
from shapely.geometry import Polygon
|
||||
|
||||
from mmocr.utils import check_argument
|
||||
|
||||
|
||||
def trace_boundary(char_boxes):
|
||||
"""Trace the boundary point of text.
|
||||
|
||||
Args:
|
||||
char_boxes (list[ndarray]): The char boxes for one text. Each element
|
||||
is 4x2 ndarray.
|
||||
|
||||
Returns:
|
||||
boundary (ndarray): The boundary point sets with size nx2.
|
||||
"""
|
||||
assert check_argument.is_type_list(char_boxes, np.ndarray)
|
||||
|
||||
# from top left to to right
|
||||
p_top = [box[0:2] for box in char_boxes]
|
||||
# from bottom right to bottom left
|
||||
p_bottom = [
|
||||
char_boxes[idx][[2, 3], :]
|
||||
for idx in range(len(char_boxes) - 1, -1, -1)
|
||||
]
|
||||
|
||||
p = p_top + p_bottom
|
||||
|
||||
boundary = np.concatenate(p).astype(int)
|
||||
|
||||
return boundary
|
||||
|
||||
|
||||
def match_bbox_char_str(bboxes, char_bboxes, strs):
|
||||
"""match the bboxes, char bboxes, and strs.
|
||||
|
||||
Args:
|
||||
bboxes (ndarray): The text boxes of size (2, 4, num_box).
|
||||
char_bboxes (ndarray): The char boxes of size (2, 4, num_char_box).
|
||||
strs (ndarray): The string of size (num_strs,)
|
||||
"""
|
||||
assert isinstance(bboxes, np.ndarray)
|
||||
assert isinstance(char_bboxes, np.ndarray)
|
||||
assert isinstance(strs, np.ndarray)
|
||||
bboxes = bboxes.astype(np.int32)
|
||||
char_bboxes = char_bboxes.astype(np.int32)
|
||||
|
||||
if len(char_bboxes.shape) == 2:
|
||||
char_bboxes = np.expand_dims(char_bboxes, axis=2)
|
||||
char_bboxes = np.transpose(char_bboxes, (2, 1, 0))
|
||||
if len(bboxes.shape) == 2:
|
||||
bboxes = np.expand_dims(bboxes, axis=2)
|
||||
bboxes = np.transpose(bboxes, (2, 1, 0))
|
||||
chars = ''.join(strs).replace('\n', '').replace(' ', '')
|
||||
num_boxes = bboxes.shape[0]
|
||||
|
||||
poly_list = [Polygon(bboxes[iter]) for iter in range(num_boxes)]
|
||||
poly_box_list = [bboxes[iter] for iter in range(num_boxes)]
|
||||
|
||||
poly_char_list = [[] for iter in range(num_boxes)]
|
||||
poly_char_idx_list = [[] for iter in range(num_boxes)]
|
||||
poly_charbox_list = [[] for iter in range(num_boxes)]
|
||||
|
||||
words = []
|
||||
for s in strs:
|
||||
words += s.split()
|
||||
words_len = [len(w) for w in words]
|
||||
words_end_inx = np.cumsum(words_len)
|
||||
start_inx = 0
|
||||
for word_inx, end_inx in enumerate(words_end_inx):
|
||||
for char_inx in range(start_inx, end_inx):
|
||||
poly_char_idx_list[word_inx].append(char_inx)
|
||||
poly_char_list[word_inx].append(chars[char_inx])
|
||||
poly_charbox_list[word_inx].append(char_bboxes[char_inx])
|
||||
start_inx = end_inx
|
||||
|
||||
for box_inx in range(num_boxes):
|
||||
assert len(poly_charbox_list[box_inx]) > 0
|
||||
|
||||
poly_boundary_list = []
|
||||
for item in poly_charbox_list:
|
||||
boundary = np.ndarray((0, 2))
|
||||
if len(item) > 0:
|
||||
boundary = trace_boundary(item)
|
||||
poly_boundary_list.append(boundary)
|
||||
|
||||
return (poly_list, poly_box_list, poly_boundary_list, poly_charbox_list,
|
||||
poly_char_idx_list, poly_char_list)
|
||||
|
||||
|
||||
def convert_annotations(root_path, gt_name, lmdb_name):
|
||||
"""Convert the annotation into lmdb dataset.
|
||||
|
||||
Args:
|
||||
root_path (str): The root path of dataset.
|
||||
gt_name (str): The ground truth filename.
|
||||
lmdb_name (str): The output lmdb filename.
|
||||
"""
|
||||
assert isinstance(root_path, str)
|
||||
assert isinstance(gt_name, str)
|
||||
assert isinstance(lmdb_name, str)
|
||||
start_time = time.time()
|
||||
gt = loadmat(gt_name)
|
||||
img_num = len(gt['imnames'][0])
|
||||
env = lmdb.open(lmdb_name, map_size=int(1e9 * 40))
|
||||
with env.begin(write=True) as txn:
|
||||
for img_id in range(img_num):
|
||||
if img_id % 1000 == 0 and img_id > 0:
|
||||
total_time_sec = time.time() - start_time
|
||||
avg_time_sec = total_time_sec / img_id
|
||||
eta_mins = (avg_time_sec * (img_num - img_id)) / 60
|
||||
print(f'\ncurrent_img/total_imgs {img_id}/{img_num} | '
|
||||
f'eta: {eta_mins:.3f} mins')
|
||||
# for each img
|
||||
img_file = osp.join(root_path, 'imgs', gt['imnames'][0][img_id][0])
|
||||
img = mmcv.imread(img_file, 'unchanged')
|
||||
height, width = img.shape[0:2]
|
||||
img_json = {}
|
||||
img_json['file_name'] = gt['imnames'][0][img_id][0]
|
||||
img_json['height'] = height
|
||||
img_json['width'] = width
|
||||
img_json['annotations'] = []
|
||||
wordBB = gt['wordBB'][0][img_id]
|
||||
charBB = gt['charBB'][0][img_id]
|
||||
txt = gt['txt'][0][img_id]
|
||||
poly_list, _, poly_boundary_list, _, _, _ = match_bbox_char_str(
|
||||
wordBB, charBB, txt)
|
||||
for poly_inx in range(len(poly_list)):
|
||||
|
||||
polygon = poly_list[poly_inx]
|
||||
min_x, min_y, max_x, max_y = polygon.bounds
|
||||
bbox = [min_x, min_y, max_x - min_x, max_y - min_y]
|
||||
anno_info = dict()
|
||||
anno_info['iscrowd'] = 0
|
||||
anno_info['category_id'] = 1
|
||||
anno_info['bbox'] = bbox
|
||||
anno_info['segmentation'] = [
|
||||
poly_boundary_list[poly_inx].flatten().tolist()
|
||||
]
|
||||
|
||||
img_json['annotations'].append(anno_info)
|
||||
string = json.dumps(img_json)
|
||||
txn.put(str(img_id).encode('utf8'), string.encode('utf8'))
|
||||
key = b'total_number'
|
||||
value = str(img_num).encode('utf8')
|
||||
txn.put(key, value)
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Convert synthtext to lmdb dataset')
|
||||
parser.add_argument('synthtext_path', help='synthetic root path')
|
||||
parser.add_argument('-o', '--out-dir', help='output path')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
# TODO: Refactor synthtext
|
||||
def main():
|
||||
args = parse_args()
|
||||
synthtext_path = args.synthtext_path
|
||||
out_dir = args.out_dir if args.out_dir else synthtext_path
|
||||
mmengine.mkdir_or_exist(out_dir)
|
||||
|
||||
gt_name = osp.join(synthtext_path, 'gt.mat')
|
||||
lmdb_name = 'synthtext.lmdb'
|
||||
convert_annotations(synthtext_path, gt_name, osp.join(out_dir, lmdb_name))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,76 +0,0 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import argparse
|
||||
import math
|
||||
import os.path as osp
|
||||
|
||||
import mmengine
|
||||
|
||||
from mmocr.utils import dump_ocr_data
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Generate training and validation set of TextOCR ')
|
||||
parser.add_argument('root_path', help='Root dir path of TextOCR')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def collect_textocr_info(root_path, annotation_filename, print_every=1000):
|
||||
|
||||
annotation_path = osp.join(root_path, annotation_filename)
|
||||
if not osp.exists(annotation_path):
|
||||
raise Exception(
|
||||
f'{annotation_path} not exists, please check and try again.')
|
||||
|
||||
annotation = mmengine.load(annotation_path)
|
||||
|
||||
# img_idx = img_start_idx
|
||||
img_infos = []
|
||||
for i, img_info in enumerate(annotation['imgs'].values()):
|
||||
if i > 0 and i % print_every == 0:
|
||||
print(f'{i}/{len(annotation["imgs"].values())}')
|
||||
|
||||
img_info['segm_file'] = annotation_path
|
||||
ann_ids = annotation['imgToAnns'][img_info['id']]
|
||||
anno_info = []
|
||||
for ann_id in ann_ids:
|
||||
ann = annotation['anns'][ann_id]
|
||||
|
||||
# Ignore illegible or non-English words
|
||||
text_label = ann['utf8_string']
|
||||
iscrowd = 1 if text_label == '.' else 0
|
||||
|
||||
x, y, w, h = ann['bbox']
|
||||
x, y = max(0, math.floor(x)), max(0, math.floor(y))
|
||||
w, h = math.ceil(w), math.ceil(h)
|
||||
bbox = [x, y, w, h]
|
||||
segmentation = [max(0, int(x)) for x in ann['points']]
|
||||
anno = dict(
|
||||
iscrowd=iscrowd,
|
||||
category_id=1,
|
||||
bbox=bbox,
|
||||
area=ann['area'],
|
||||
segmentation=[segmentation])
|
||||
anno_info.append(anno)
|
||||
img_info.update(anno_info=anno_info)
|
||||
img_infos.append(img_info)
|
||||
return img_infos
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
root_path = args.root_path
|
||||
print('Processing training set...')
|
||||
training_infos = collect_textocr_info(root_path, 'TextOCR_0.1_train.json')
|
||||
dump_ocr_data(training_infos,
|
||||
osp.join(root_path, 'instances_training.json'), 'textdet')
|
||||
print('Processing validation set...')
|
||||
val_infos = collect_textocr_info(root_path, 'TextOCR_0.1_val.json')
|
||||
dump_ocr_data(val_infos, osp.join(root_path, 'instances_val.json'),
|
||||
'textdet')
|
||||
print('Finish')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,410 +0,0 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import argparse
|
||||
import glob
|
||||
import os
|
||||
import os.path as osp
|
||||
import re
|
||||
|
||||
import cv2
|
||||
import mmcv
|
||||
import mmengine
|
||||
import numpy as np
|
||||
import scipy.io as scio
|
||||
import yaml
|
||||
from shapely.geometry import Polygon
|
||||
|
||||
from mmocr.utils import dump_ocr_data
|
||||
|
||||
|
||||
def collect_files(img_dir, gt_dir):
|
||||
"""Collect all images and their corresponding groundtruth files.
|
||||
|
||||
Args:
|
||||
img_dir (str): The image directory
|
||||
gt_dir (str): The groundtruth directory
|
||||
|
||||
Returns:
|
||||
files (list): The list of tuples (img_file, groundtruth_file)
|
||||
"""
|
||||
assert isinstance(img_dir, str)
|
||||
assert img_dir
|
||||
assert isinstance(gt_dir, str)
|
||||
assert gt_dir
|
||||
|
||||
# note that we handle png and jpg only. Pls convert others such as gif to
|
||||
# jpg or png offline
|
||||
suffixes = ['.png', '.PNG', '.jpg', '.JPG', '.jpeg', '.JPEG']
|
||||
# suffixes = ['.png']
|
||||
|
||||
imgs_list = []
|
||||
for suffix in suffixes:
|
||||
imgs_list.extend(glob.glob(osp.join(img_dir, '*' + suffix)))
|
||||
|
||||
imgs_list = sorted(imgs_list)
|
||||
ann_list = sorted(
|
||||
osp.join(gt_dir, gt_file) for gt_file in os.listdir(gt_dir))
|
||||
|
||||
files = list(zip(imgs_list, ann_list))
|
||||
assert len(files), f'No images found in {img_dir}'
|
||||
print(f'Loaded {len(files)} images from {img_dir}')
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def collect_annotations(files, nproc=1):
|
||||
"""Collect the annotation information.
|
||||
|
||||
Args:
|
||||
files (list): The list of tuples (image_file, groundtruth_file)
|
||||
nproc (int): The number of process to collect annotations
|
||||
|
||||
Returns:
|
||||
images (list): The list of image information dicts
|
||||
"""
|
||||
assert isinstance(files, list)
|
||||
assert isinstance(nproc, int)
|
||||
|
||||
if nproc > 1:
|
||||
images = mmengine.track_parallel_progress(
|
||||
load_img_info, files, nproc=nproc)
|
||||
else:
|
||||
images = mmengine.track_progress(load_img_info, files)
|
||||
|
||||
return images
|
||||
|
||||
|
||||
def get_contours_mat(gt_path):
|
||||
"""Get the contours and words for each ground_truth mat file.
|
||||
|
||||
Args:
|
||||
gt_path (str): The relative path of the ground_truth mat file
|
||||
|
||||
Returns:
|
||||
contours (list[lists]): A list of lists of contours
|
||||
for the text instances
|
||||
words (list[list]): A list of lists of words (string)
|
||||
for the text instances
|
||||
"""
|
||||
assert isinstance(gt_path, str)
|
||||
|
||||
contours = []
|
||||
words = []
|
||||
data = scio.loadmat(gt_path)
|
||||
# 'gt' for the latest version; 'polygt' for the legacy version
|
||||
keys = data.keys()
|
||||
if 'gt' in keys:
|
||||
data_polygt = data.get('gt')
|
||||
elif 'polygt' in keys:
|
||||
data_polygt = data.get('polygt')
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
for i, lines in enumerate(data_polygt):
|
||||
X = np.array(lines[1])
|
||||
Y = np.array(lines[3])
|
||||
|
||||
point_num = len(X[0])
|
||||
word = lines[4]
|
||||
if len(word) == 0 or word == '#':
|
||||
word = '###'
|
||||
else:
|
||||
word = word[0]
|
||||
|
||||
words.append(word)
|
||||
|
||||
arr = np.concatenate([X, Y]).T
|
||||
contour = []
|
||||
for i in range(point_num):
|
||||
contour.append(arr[i][0])
|
||||
contour.append(arr[i][1])
|
||||
contours.append(np.asarray(contour))
|
||||
|
||||
return contours, words
|
||||
|
||||
|
||||
def load_mat_info(img_info, gt_file):
|
||||
"""Load the information of one ground truth in .mat format.
|
||||
|
||||
Args:
|
||||
img_info (dict): The dict of only the image information
|
||||
gt_file (str): The relative path of the ground_truth mat
|
||||
file for one image
|
||||
|
||||
Returns:
|
||||
img_info(dict): The dict of the img and annotation information
|
||||
"""
|
||||
assert isinstance(img_info, dict)
|
||||
assert isinstance(gt_file, str)
|
||||
|
||||
contours, texts = get_contours_mat(gt_file)
|
||||
anno_info = []
|
||||
for contour, text in zip(contours, texts):
|
||||
if contour.shape[0] == 2:
|
||||
continue
|
||||
category_id = 1
|
||||
coordinates = np.array(contour).reshape(-1, 2)
|
||||
polygon = Polygon(coordinates)
|
||||
iscrowd = 1 if text == '###' else 0
|
||||
|
||||
area = polygon.area
|
||||
# convert to COCO style XYWH format
|
||||
min_x, min_y, max_x, max_y = polygon.bounds
|
||||
bbox = [min_x, min_y, max_x - min_x, max_y - min_y]
|
||||
|
||||
anno = dict(
|
||||
iscrowd=iscrowd,
|
||||
category_id=category_id,
|
||||
bbox=bbox,
|
||||
area=area,
|
||||
text=text,
|
||||
segmentation=[contour])
|
||||
anno_info.append(anno)
|
||||
|
||||
img_info.update(anno_info=anno_info)
|
||||
|
||||
return img_info
|
||||
|
||||
|
||||
def process_line(line, contours, words):
|
||||
"""Get the contours and words by processing each line in the gt file.
|
||||
|
||||
Args:
|
||||
line(str): The line in gt file containing annotation info
|
||||
contours(list[lists]): A list of lists of contours
|
||||
for the text instances
|
||||
words(list[list]): A list of lists of words (string)
|
||||
for the text instances
|
||||
|
||||
Returns:
|
||||
contours (list[lists]): A list of lists of contours
|
||||
for the text instances
|
||||
words (list[list]): A list of lists of words (string)
|
||||
for the text instances
|
||||
"""
|
||||
|
||||
line = '{' + line.replace('[[', '[').replace(']]', ']') + '}'
|
||||
ann_dict = re.sub('([0-9]) +([0-9])', r'\1,\2', line)
|
||||
ann_dict = re.sub('([0-9]) +([ 0-9])', r'\1,\2', ann_dict)
|
||||
ann_dict = re.sub('([0-9]) -([0-9])', r'\1,-\2', ann_dict)
|
||||
ann_dict = ann_dict.replace("[u',']", "[u'#']")
|
||||
ann_dict = yaml.safe_load(ann_dict)
|
||||
|
||||
X = np.array([ann_dict['x']])
|
||||
Y = np.array([ann_dict['y']])
|
||||
|
||||
if len(ann_dict['transcriptions']) == 0:
|
||||
word = '###'
|
||||
else:
|
||||
word = ann_dict['transcriptions'][0]
|
||||
if len(ann_dict['transcriptions']) > 1:
|
||||
for ann_word in ann_dict['transcriptions'][1:]:
|
||||
word += ',' + ann_word
|
||||
word = str(eval(word))
|
||||
words.append(word)
|
||||
|
||||
point_num = len(X[0])
|
||||
|
||||
arr = np.concatenate([X, Y]).T
|
||||
contour = []
|
||||
for i in range(point_num):
|
||||
contour.append(arr[i][0])
|
||||
contour.append(arr[i][1])
|
||||
contours.append(np.asarray(contour))
|
||||
|
||||
return contours, words
|
||||
|
||||
|
||||
def get_contours_txt(gt_path):
|
||||
"""Get the contours and words for each ground_truth txt file.
|
||||
|
||||
Args:
|
||||
gt_path (str): The relative path of the ground_truth mat file
|
||||
|
||||
Returns:
|
||||
contours (list[lists]): A list of lists of contours
|
||||
for the text instances
|
||||
words (list[list]): A list of lists of words (string)
|
||||
for the text instances
|
||||
"""
|
||||
assert isinstance(gt_path, str)
|
||||
|
||||
contours = []
|
||||
words = []
|
||||
|
||||
with open(gt_path) as f:
|
||||
tmp_line = ''
|
||||
for idx, line in enumerate(f):
|
||||
line = line.strip()
|
||||
if idx == 0:
|
||||
tmp_line = line
|
||||
continue
|
||||
if not line.startswith('x:'):
|
||||
tmp_line += ' ' + line
|
||||
continue
|
||||
else:
|
||||
complete_line = tmp_line
|
||||
tmp_line = line
|
||||
contours, words = process_line(complete_line, contours, words)
|
||||
|
||||
if tmp_line != '':
|
||||
contours, words = process_line(tmp_line, contours, words)
|
||||
|
||||
words = ['###' if word == '#' else word for word in words]
|
||||
|
||||
return contours, words
|
||||
|
||||
|
||||
def load_txt_info(gt_file, img_info):
|
||||
"""Load the information of one ground truth in .txt format.
|
||||
|
||||
Args:
|
||||
img_info (dict): The dict of only the image information
|
||||
gt_file (str): The relative path of the ground_truth mat
|
||||
file for one image
|
||||
|
||||
Returns:
|
||||
img_info(dict): The dict of the img and annotation information
|
||||
"""
|
||||
|
||||
contours, texts = get_contours_txt(gt_file)
|
||||
anno_info = []
|
||||
for contour, text in zip(contours, texts):
|
||||
if contour.shape[0] == 2:
|
||||
continue
|
||||
category_id = 1
|
||||
coordinates = np.array(contour).reshape(-1, 2)
|
||||
polygon = Polygon(coordinates)
|
||||
iscrowd = 1 if text == '###' else 0
|
||||
|
||||
area = polygon.area
|
||||
# convert to COCO style XYWH format
|
||||
min_x, min_y, max_x, max_y = polygon.bounds
|
||||
bbox = [min_x, min_y, max_x - min_x, max_y - min_y]
|
||||
|
||||
anno = dict(
|
||||
iscrowd=iscrowd,
|
||||
category_id=category_id,
|
||||
bbox=bbox,
|
||||
area=area,
|
||||
text=text,
|
||||
segmentation=[contour])
|
||||
anno_info.append(anno)
|
||||
|
||||
img_info.update(anno_info=anno_info)
|
||||
|
||||
return img_info
|
||||
|
||||
|
||||
def load_png_info(gt_file, img_info):
|
||||
"""Load the information of one ground truth in .png format.
|
||||
|
||||
Args:
|
||||
gt_file (str): The relative path of the ground_truth file for one image
|
||||
img_info (dict): The dict of only the image information
|
||||
|
||||
Returns:
|
||||
img_info (dict): The dict of the img and annotation information
|
||||
"""
|
||||
assert isinstance(gt_file, str)
|
||||
assert isinstance(img_info, dict)
|
||||
gt_img = cv2.imread(gt_file, 0)
|
||||
contours, _ = cv2.findContours(gt_img, cv2.RETR_EXTERNAL,
|
||||
cv2.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
anno_info = []
|
||||
for contour in contours:
|
||||
if contour.shape[0] == 2:
|
||||
continue
|
||||
category_id = 1
|
||||
xy = np.array(contour).flatten().tolist()
|
||||
|
||||
coordinates = np.array(contour).reshape(-1, 2)
|
||||
polygon = Polygon(coordinates)
|
||||
iscrowd = 0
|
||||
|
||||
area = polygon.area
|
||||
# convert to COCO style XYWH format
|
||||
min_x, min_y, max_x, max_y = polygon.bounds
|
||||
bbox = [min_x, min_y, max_x - min_x, max_y - min_y]
|
||||
|
||||
anno = dict(
|
||||
iscrowd=iscrowd,
|
||||
category_id=category_id,
|
||||
bbox=bbox,
|
||||
area=area,
|
||||
segmentation=[xy])
|
||||
anno_info.append(anno)
|
||||
|
||||
img_info.update(anno_info=anno_info)
|
||||
|
||||
return img_info
|
||||
|
||||
|
||||
def load_img_info(files):
|
||||
"""Load the information of one image.
|
||||
|
||||
Args:
|
||||
files (tuple): The tuple of (img_file, groundtruth_file)
|
||||
|
||||
Returns:
|
||||
img_info (dict): The dict of the img and annotation information
|
||||
"""
|
||||
assert isinstance(files, tuple)
|
||||
|
||||
img_file, gt_file = files
|
||||
# read imgs while ignoring orientations
|
||||
img = mmcv.imread(img_file, 'unchanged')
|
||||
|
||||
split_name = osp.basename(osp.dirname(img_file))
|
||||
img_info = dict(
|
||||
# remove img_prefix for filename
|
||||
file_name=osp.join(split_name, osp.basename(img_file)),
|
||||
height=img.shape[0],
|
||||
width=img.shape[1],
|
||||
# anno_info=anno_info,
|
||||
segm_file=osp.join(split_name, osp.basename(gt_file)))
|
||||
|
||||
if osp.splitext(gt_file)[1] == '.mat':
|
||||
img_info = load_mat_info(img_info, gt_file)
|
||||
elif osp.splitext(gt_file)[1] == '.txt':
|
||||
img_info = load_txt_info(gt_file, img_info)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
return img_info
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Convert totaltext annotations to COCO format')
|
||||
parser.add_argument('root_path', help='Totaltext root path')
|
||||
parser.add_argument(
|
||||
'--nproc', default=1, type=int, help='Number of process')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
root_path = args.root_path
|
||||
img_dir = osp.join(root_path, 'imgs')
|
||||
gt_dir = osp.join(root_path, 'annotations')
|
||||
|
||||
set_name = {}
|
||||
for split in ['training', 'test']:
|
||||
set_name.update({split: 'instances_' + split + '.json'})
|
||||
assert osp.exists(osp.join(img_dir, split))
|
||||
|
||||
for split, json_name in set_name.items():
|
||||
print(f'Converting {split} into {json_name}')
|
||||
with mmengine.Timer(
|
||||
print_tmpl='It takes {}s to convert totaltext annotation'):
|
||||
files = collect_files(
|
||||
osp.join(img_dir, split), osp.join(gt_dir, split))
|
||||
image_infos = collect_annotations(files, nproc=args.nproc)
|
||||
dump_ocr_data(image_infos, osp.join(root_path, json_name),
|
||||
'textdet')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,67 +0,0 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import argparse
|
||||
import os.path as osp
|
||||
|
||||
from mmocr.utils import dump_ocr_data
|
||||
|
||||
|
||||
def convert_annotations(root_path, split):
|
||||
"""Convert original annotations to mmocr format.
|
||||
|
||||
The annotation format is as the following:
|
||||
word_1.png, "flying"
|
||||
word_2.png, "today"
|
||||
word_3.png, "means"
|
||||
See the format of converted annotation in mmocr.utils.dump_ocr_data.
|
||||
|
||||
Args:
|
||||
root_path (str): The root path of the dataset
|
||||
split (str): The split of dataset. Namely: training or test
|
||||
"""
|
||||
assert isinstance(root_path, str)
|
||||
assert isinstance(split, str)
|
||||
|
||||
img_info = []
|
||||
with open(
|
||||
osp.join(root_path, 'annotations',
|
||||
f'Challenge2_{split}_Task3_GT.txt'),
|
||||
encoding='"utf-8-sig') as f:
|
||||
annos = f.readlines()
|
||||
for anno in annos:
|
||||
seg = ' ' if split == 'Test1015' else ', "'
|
||||
# text may contain comma ','
|
||||
dst_img_name, word = anno.split(seg)
|
||||
word = word.replace('"\n', '')
|
||||
|
||||
img_info.append({
|
||||
'file_name': osp.basename(dst_img_name),
|
||||
'anno_info': [{
|
||||
'text': word
|
||||
}]
|
||||
})
|
||||
|
||||
return img_info
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Generate training and test set of IC13')
|
||||
parser.add_argument('root_path', help='Root dir path of IC13')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
root_path = args.root_path
|
||||
|
||||
for split in ['Train', 'Test', 'Test1015']:
|
||||
img_info = convert_annotations(root_path, split)
|
||||
dump_ocr_data(img_info,
|
||||
osp.join(root_path, f'{split.lower()}_label.json'),
|
||||
'textrecog')
|
||||
print(f'{split} split converted.')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,88 +0,0 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import argparse
|
||||
import os
|
||||
import os.path as osp
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
import cv2
|
||||
|
||||
from mmocr.utils import dump_ocr_data
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Generate testset of svt by cropping box image.')
|
||||
parser.add_argument(
|
||||
'root_path',
|
||||
help='Root dir path of svt, where test.xml in,'
|
||||
'for example, "data/mixture/svt/svt1/"')
|
||||
parser.add_argument(
|
||||
'--resize',
|
||||
action='store_true',
|
||||
help='Whether resize cropped image to certain size.')
|
||||
parser.add_argument('--height', default=32, help='Resize height.')
|
||||
parser.add_argument('--width', default=100, help='Resize width.')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
root_path = args.root_path
|
||||
|
||||
# inputs
|
||||
src_label_file = osp.join(root_path, 'test.xml')
|
||||
if not osp.exists(src_label_file):
|
||||
raise Exception(
|
||||
f'{src_label_file} not exists, please check and try again.')
|
||||
src_image_root = root_path
|
||||
|
||||
# outputs
|
||||
dst_label_file = osp.join(root_path, 'test_label.json')
|
||||
dst_image_root = osp.join(root_path, 'image')
|
||||
os.makedirs(dst_image_root, exist_ok=True)
|
||||
|
||||
tree = ET.parse(src_label_file)
|
||||
root = tree.getroot()
|
||||
|
||||
index = 1
|
||||
img_info = []
|
||||
total_img_num = len(root)
|
||||
i = 1
|
||||
for image_node in root.findall('image'):
|
||||
image_name = image_node.find('imageName').text
|
||||
print(f'[{i}/{total_img_num}] Process image: {image_name}')
|
||||
i += 1
|
||||
# lexicon = image_node.find('lex').text.lower()
|
||||
# lexicon_list = lexicon.split(',')
|
||||
# lex_size = len(lexicon_list)
|
||||
src_img = cv2.imread(osp.join(src_image_root, image_name))
|
||||
for rectangle in image_node.find('taggedRectangles'):
|
||||
x = int(rectangle.get('x'))
|
||||
y = int(rectangle.get('y'))
|
||||
w = int(rectangle.get('width'))
|
||||
h = int(rectangle.get('height'))
|
||||
rb, re = max(0, y), max(0, y + h)
|
||||
cb, ce = max(0, x), max(0, x + w)
|
||||
dst_img = src_img[rb:re, cb:ce]
|
||||
text_label = rectangle.find('tag').text.lower()
|
||||
if args.resize:
|
||||
dst_img = cv2.resize(dst_img, (args.width, args.height))
|
||||
dst_img_name = f'img_{index:04}' + '.jpg'
|
||||
index += 1
|
||||
dst_img_path = osp.join(dst_image_root, dst_img_name)
|
||||
cv2.imwrite(dst_img_path, dst_img)
|
||||
img_info.append({
|
||||
'file_name': dst_img_name,
|
||||
'anno_info': [{
|
||||
'text': text_label
|
||||
}]
|
||||
})
|
||||
|
||||
dump_ocr_data(img_info, dst_label_file, 'textrecog')
|
||||
print(f'Finish to generate svt testset, '
|
||||
f'with label file {dst_label_file}')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,146 +0,0 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import argparse
|
||||
import os
|
||||
from functools import partial
|
||||
|
||||
import mmcv
|
||||
import mmengine
|
||||
import numpy as np
|
||||
from scipy.io import loadmat
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Crop images in Synthtext-style dataset in '
|
||||
'prepration for MMOCR\'s use')
|
||||
parser.add_argument(
|
||||
'anno_path', help='Path to gold annotation data (gt.mat)')
|
||||
parser.add_argument('img_path', help='Path to images')
|
||||
parser.add_argument('out_dir', help='Path of output images and labels')
|
||||
parser.add_argument(
|
||||
'--n_proc',
|
||||
default=1,
|
||||
type=int,
|
||||
help='Number of processes to run with')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def load_gt_datum(datum):
|
||||
img_path, txt, wordBB, charBB = datum
|
||||
words = []
|
||||
word_bboxes = []
|
||||
char_bboxes = []
|
||||
|
||||
# when there's only one word in txt
|
||||
# scipy will load it as a string
|
||||
if type(txt) is str:
|
||||
words = txt.split()
|
||||
else:
|
||||
for line in txt:
|
||||
words += line.split()
|
||||
|
||||
# From (2, 4, num_boxes) to (num_boxes, 4, 2)
|
||||
if len(wordBB.shape) == 2:
|
||||
wordBB = wordBB[:, :, np.newaxis]
|
||||
cur_wordBB = wordBB.transpose(2, 1, 0)
|
||||
for box in cur_wordBB:
|
||||
word_bboxes.append(
|
||||
[max(round(coord), 0) for pt in box for coord in pt])
|
||||
|
||||
# Validate word bboxes.
|
||||
if len(words) != len(word_bboxes):
|
||||
return
|
||||
|
||||
# From (2, 4, num_boxes) to (num_boxes, 4, 2)
|
||||
cur_charBB = charBB.transpose(2, 1, 0)
|
||||
for box in cur_charBB:
|
||||
char_bboxes.append(
|
||||
[max(round(coord), 0) for pt in box for coord in pt])
|
||||
|
||||
char_bbox_idx = 0
|
||||
char_bbox_grps = []
|
||||
|
||||
for word in words:
|
||||
temp_bbox = char_bboxes[char_bbox_idx:char_bbox_idx + len(word)]
|
||||
char_bbox_idx += len(word)
|
||||
char_bbox_grps.append(temp_bbox)
|
||||
|
||||
# Validate char bboxes.
|
||||
# If the length of the last char bbox is correct, then
|
||||
# all the previous bboxes are also valid
|
||||
if len(char_bbox_grps[len(words) - 1]) != len(words[-1]):
|
||||
return
|
||||
|
||||
return img_path, words, word_bboxes, char_bbox_grps
|
||||
|
||||
|
||||
def load_gt_data(filename, n_proc):
|
||||
mat_data = loadmat(filename, simplify_cells=True)
|
||||
imnames = mat_data['imnames']
|
||||
txt = mat_data['txt']
|
||||
wordBB = mat_data['wordBB']
|
||||
charBB = mat_data['charBB']
|
||||
return mmengine.track_parallel_progress(
|
||||
load_gt_datum, list(zip(imnames, txt, wordBB, charBB)), nproc=n_proc)
|
||||
|
||||
|
||||
def process(data, img_path_prefix, out_dir):
|
||||
if data is None:
|
||||
return
|
||||
# Dirty hack for multi-processing
|
||||
img_path, words, word_bboxes, char_bbox_grps = data
|
||||
img_dir, img_name = os.path.split(img_path)
|
||||
img_name = os.path.splitext(img_name)[0]
|
||||
input_img = mmcv.imread(os.path.join(img_path_prefix, img_path))
|
||||
|
||||
output_sub_dir = os.path.join(out_dir, img_dir)
|
||||
if not os.path.exists(output_sub_dir):
|
||||
try:
|
||||
os.makedirs(output_sub_dir)
|
||||
except FileExistsError:
|
||||
pass # occurs when multi-proessing
|
||||
|
||||
for i, word in enumerate(words):
|
||||
output_image_patch_name = f'{img_name}_{i}.png'
|
||||
output_label_name = f'{img_name}_{i}.txt'
|
||||
output_image_patch_path = os.path.join(output_sub_dir,
|
||||
output_image_patch_name)
|
||||
output_label_path = os.path.join(output_sub_dir, output_label_name)
|
||||
if os.path.exists(output_image_patch_path) and os.path.exists(
|
||||
output_label_path):
|
||||
continue
|
||||
|
||||
word_bbox = word_bboxes[i]
|
||||
min_x, max_x = int(min(word_bbox[::2])), int(max(word_bbox[::2]))
|
||||
min_y, max_y = int(min(word_bbox[1::2])), int(max(word_bbox[1::2]))
|
||||
cropped_img = input_img[min_y:max_y, min_x:max_x]
|
||||
if cropped_img.shape[0] <= 0 or cropped_img.shape[1] <= 0:
|
||||
continue
|
||||
|
||||
char_bbox_grp = np.array(char_bbox_grps[i])
|
||||
char_bbox_grp[:, ::2] -= min_x
|
||||
char_bbox_grp[:, 1::2] -= min_y
|
||||
|
||||
mmcv.imwrite(cropped_img, output_image_patch_path)
|
||||
with open(output_label_path, 'w') as output_label_file:
|
||||
output_label_file.write(word + '\n')
|
||||
for cbox in char_bbox_grp:
|
||||
output_label_file.write('%d %d %d %d %d %d %d %d\n' %
|
||||
tuple(cbox.tolist()))
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
print('Loading annoataion data...')
|
||||
data = load_gt_data(args.anno_path, args.n_proc)
|
||||
process_with_outdir = partial(
|
||||
process, img_path_prefix=args.img_path, out_dir=args.out_dir)
|
||||
print('Creating cropped images and gold labels...')
|
||||
mmengine.track_parallel_progress(
|
||||
process_with_outdir, data, nproc=args.n_proc)
|
||||
print('Done')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,113 +0,0 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import argparse
|
||||
import math
|
||||
import os
|
||||
import os.path as osp
|
||||
from functools import partial
|
||||
|
||||
import mmcv
|
||||
import mmengine
|
||||
|
||||
from mmocr.utils import dump_ocr_data
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Generate training and validation set of TextOCR '
|
||||
'by cropping box image.')
|
||||
parser.add_argument('root_path', help='Root dir path of TextOCR')
|
||||
parser.add_argument(
|
||||
'n_proc', default=1, type=int, help='Number of processes to run')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def process_img(args, src_image_root, dst_image_root):
|
||||
# Dirty hack for multi-processing
|
||||
img_idx, img_info, anns = args
|
||||
src_img = mmcv.imread(osp.join(src_image_root, img_info['file_name']))
|
||||
labels = []
|
||||
for ann_idx, ann in enumerate(anns):
|
||||
text_label = ann['utf8_string']
|
||||
|
||||
# Ignore illegible or non-English words
|
||||
if text_label == '.':
|
||||
continue
|
||||
|
||||
x, y, w, h = ann['bbox']
|
||||
x, y = max(0, math.floor(x)), max(0, math.floor(y))
|
||||
w, h = math.ceil(w), math.ceil(h)
|
||||
dst_img = src_img[y:y + h, x:x + w]
|
||||
dst_img_name = f'img_{img_idx}_{ann_idx}.jpg'
|
||||
dst_img_path = osp.join(dst_image_root, dst_img_name)
|
||||
mmcv.imwrite(dst_img, dst_img_path)
|
||||
labels.append({
|
||||
'file_name': dst_img_name,
|
||||
'anno_info': [{
|
||||
'text': text_label
|
||||
}]
|
||||
})
|
||||
return labels
|
||||
|
||||
|
||||
def convert_textocr(root_path,
|
||||
dst_image_path,
|
||||
dst_label_filename,
|
||||
annotation_filename,
|
||||
img_start_idx=0,
|
||||
nproc=1):
|
||||
|
||||
annotation_path = osp.join(root_path, annotation_filename)
|
||||
if not osp.exists(annotation_path):
|
||||
raise Exception(
|
||||
f'{annotation_path} not exists, please check and try again.')
|
||||
src_image_root = root_path
|
||||
|
||||
# outputs
|
||||
dst_label_file = osp.join(root_path, dst_label_filename)
|
||||
dst_image_root = osp.join(root_path, dst_image_path)
|
||||
os.makedirs(dst_image_root, exist_ok=True)
|
||||
|
||||
annotation = mmengine.load(annotation_path)
|
||||
|
||||
process_img_with_path = partial(
|
||||
process_img,
|
||||
src_image_root=src_image_root,
|
||||
dst_image_root=dst_image_root)
|
||||
tasks = []
|
||||
for img_idx, img_info in enumerate(annotation['imgs'].values()):
|
||||
ann_ids = annotation['imgToAnns'][img_info['id']]
|
||||
anns = [annotation['anns'][ann_id] for ann_id in ann_ids]
|
||||
tasks.append((img_idx + img_start_idx, img_info, anns))
|
||||
labels_list = mmengine.track_parallel_progress(
|
||||
process_img_with_path, tasks, keep_order=True, nproc=nproc)
|
||||
final_labels = []
|
||||
for label_list in labels_list:
|
||||
final_labels += label_list
|
||||
dump_ocr_data(final_labels, dst_label_file, 'textrecog')
|
||||
return len(annotation['imgs'])
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
root_path = args.root_path
|
||||
print('Processing training set...')
|
||||
num_train_imgs = convert_textocr(
|
||||
root_path=root_path,
|
||||
dst_image_path='image',
|
||||
dst_label_filename='train_label.json',
|
||||
annotation_filename='TextOCR_0.1_train.json',
|
||||
nproc=args.n_proc)
|
||||
print('Processing validation set...')
|
||||
convert_textocr(
|
||||
root_path=root_path,
|
||||
dst_image_path='image',
|
||||
dst_label_filename='val_label.json',
|
||||
annotation_filename='TextOCR_0.1_val.json',
|
||||
img_start_idx=num_train_imgs,
|
||||
nproc=args.n_proc)
|
||||
print('Finish')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -1,388 +0,0 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import argparse
|
||||
import glob
|
||||
import os
|
||||
import os.path as osp
|
||||
import re
|
||||
|
||||
import mmcv
|
||||
import mmengine
|
||||
import numpy as np
|
||||
import scipy.io as scio
|
||||
import yaml
|
||||
from shapely.geometry import Polygon
|
||||
|
||||
from mmocr.utils import crop_img, dump_ocr_data
|
||||
|
||||
|
||||
def collect_files(img_dir, gt_dir):
|
||||
"""Collect all images and their corresponding groundtruth files.
|
||||
|
||||
Args:
|
||||
img_dir (str): The image directory
|
||||
gt_dir (str): The groundtruth directory
|
||||
|
||||
Returns:
|
||||
files(list): The list of tuples (img_file, groundtruth_file)
|
||||
"""
|
||||
assert isinstance(img_dir, str)
|
||||
assert img_dir
|
||||
assert isinstance(gt_dir, str)
|
||||
assert gt_dir
|
||||
|
||||
# note that we handle png and jpg only. Pls convert others such as gif to
|
||||
# jpg or png offline
|
||||
suffixes = ['.png', '.PNG', '.jpg', '.JPG', '.jpeg', '.JPEG']
|
||||
# suffixes = ['.png']
|
||||
|
||||
imgs_list = []
|
||||
for suffix in suffixes:
|
||||
imgs_list.extend(glob.glob(osp.join(img_dir, '*' + suffix)))
|
||||
|
||||
imgs_list = sorted(imgs_list)
|
||||
ann_list = sorted(
|
||||
osp.join(gt_dir, gt_file) for gt_file in os.listdir(gt_dir))
|
||||
|
||||
files = [(img_file, gt_file)
|
||||
for (img_file, gt_file) in zip(imgs_list, ann_list)]
|
||||
assert len(files), f'No images found in {img_dir}'
|
||||
print(f'Loaded {len(files)} images from {img_dir}')
|
||||
|
||||
return files
|
||||
|
||||
|
||||
def collect_annotations(files, nproc=1):
|
||||
"""Collect the annotation information.
|
||||
|
||||
Args:
|
||||
files (list): The list of tuples (image_file, groundtruth_file)
|
||||
nproc (int): The number of process to collect annotations
|
||||
|
||||
Returns:
|
||||
images (list): The list of image information dicts
|
||||
"""
|
||||
assert isinstance(files, list)
|
||||
assert isinstance(nproc, int)
|
||||
|
||||
if nproc > 1:
|
||||
images = mmengine.track_parallel_progress(
|
||||
load_img_info, files, nproc=nproc)
|
||||
else:
|
||||
images = mmengine.track_progress(load_img_info, files)
|
||||
|
||||
return images
|
||||
|
||||
|
||||
def get_contours_mat(gt_path):
|
||||
"""Get the contours and words for each ground_truth mat file.
|
||||
|
||||
Args:
|
||||
gt_path (str): The relative path of the ground_truth mat file
|
||||
|
||||
Returns:
|
||||
contours (list[lists]): A list of lists of contours
|
||||
for the text instances
|
||||
words (list[list]): A list of lists of words (string)
|
||||
for the text instances
|
||||
"""
|
||||
assert isinstance(gt_path, str)
|
||||
|
||||
contours = []
|
||||
words = []
|
||||
data = scio.loadmat(gt_path)
|
||||
# 'gt' for the latest version; 'polygt' for the legacy version
|
||||
keys = data.keys()
|
||||
if 'gt' in keys:
|
||||
data_polygt = data.get('gt')
|
||||
elif 'polygt' in keys:
|
||||
data_polygt = data.get('polygt')
|
||||
|
||||
for i, lines in enumerate(data_polygt):
|
||||
X = np.array(lines[1])
|
||||
Y = np.array(lines[3])
|
||||
|
||||
point_num = len(X[0])
|
||||
word = lines[4]
|
||||
if len(word) == 0 or word == '#':
|
||||
word = '###'
|
||||
else:
|
||||
word = word[0]
|
||||
|
||||
words.append(word)
|
||||
|
||||
arr = np.concatenate([X, Y]).T
|
||||
contour = []
|
||||
for i in range(point_num):
|
||||
contour.append(arr[i][0])
|
||||
contour.append(arr[i][1])
|
||||
contours.append(np.asarray(contour))
|
||||
|
||||
return contours, words
|
||||
|
||||
|
||||
def load_mat_info(img_info, gt_file):
|
||||
"""Load the information of one ground truth in .mat format.
|
||||
|
||||
Args:
|
||||
img_info (dict): The dict of only the image information
|
||||
gt_file (str): The relative path of the ground_truth mat
|
||||
file for one image
|
||||
|
||||
Returns:
|
||||
img_info(dict): The dict of the img and annotation information
|
||||
"""
|
||||
assert isinstance(img_info, dict)
|
||||
assert isinstance(gt_file, str)
|
||||
|
||||
contours, words = get_contours_mat(gt_file)
|
||||
anno_info = []
|
||||
for contour, word in zip(contours, words):
|
||||
if contour.shape[0] == 2 or word == '###':
|
||||
continue
|
||||
coordinates = np.array(contour).reshape(-1, 2)
|
||||
polygon = Polygon(coordinates)
|
||||
|
||||
# convert to COCO style XYWH format
|
||||
min_x, min_y, max_x, max_y = polygon.bounds
|
||||
bbox = [min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y]
|
||||
anno = dict(word=word, bbox=bbox)
|
||||
anno_info.append(anno)
|
||||
|
||||
img_info.update(anno_info=anno_info)
|
||||
return img_info
|
||||
|
||||
|
||||
def process_line(line, contours, words):
|
||||
"""Get the contours and words by processing each line in the gt file.
|
||||
|
||||
Args:
|
||||
line (str): The line in gt file containing annotation info
|
||||
contours (list[lists]): A list of lists of contours
|
||||
for the text instances
|
||||
words (list[list]): A list of lists of words (string)
|
||||
for the text instances
|
||||
|
||||
Returns:
|
||||
contours (list[lists]): A list of lists of contours
|
||||
for the text instances
|
||||
words (list[list]): A list of lists of words (string)
|
||||
for the text instances
|
||||
"""
|
||||
|
||||
line = '{' + line.replace('[[', '[').replace(']]', ']') + '}'
|
||||
ann_dict = re.sub('([0-9]) +([0-9])', r'\1,\2', line)
|
||||
ann_dict = re.sub('([0-9]) +([ 0-9])', r'\1,\2', ann_dict)
|
||||
ann_dict = re.sub('([0-9]) -([0-9])', r'\1,-\2', ann_dict)
|
||||
ann_dict = ann_dict.replace("[u',']", "[u'#']")
|
||||
ann_dict = yaml.safe_load(ann_dict)
|
||||
|
||||
X = np.array([ann_dict['x']])
|
||||
Y = np.array([ann_dict['y']])
|
||||
|
||||
if len(ann_dict['transcriptions']) == 0:
|
||||
word = '###'
|
||||
else:
|
||||
word = ann_dict['transcriptions'][0]
|
||||
if len(ann_dict['transcriptions']) > 1:
|
||||
for ann_word in ann_dict['transcriptions'][1:]:
|
||||
word += ',' + ann_word
|
||||
word = str(eval(word))
|
||||
words.append(word)
|
||||
|
||||
point_num = len(X[0])
|
||||
|
||||
arr = np.concatenate([X, Y]).T
|
||||
contour = []
|
||||
for i in range(point_num):
|
||||
contour.append(arr[i][0])
|
||||
contour.append(arr[i][1])
|
||||
contours.append(np.asarray(contour))
|
||||
|
||||
return contours, words
|
||||
|
||||
|
||||
def get_contours_txt(gt_path):
|
||||
"""Get the contours and words for each ground_truth txt file.
|
||||
|
||||
Args:
|
||||
gt_path (str): The relative path of the ground_truth mat file
|
||||
|
||||
Returns:
|
||||
contours (list[lists]): A list of lists of contours
|
||||
for the text instances
|
||||
words (list[list]): A list of lists of words (string)
|
||||
for the text instances
|
||||
"""
|
||||
assert isinstance(gt_path, str)
|
||||
|
||||
contours = []
|
||||
words = []
|
||||
|
||||
with open(gt_path) as f:
|
||||
tmp_line = ''
|
||||
for idx, line in enumerate(f):
|
||||
line = line.strip()
|
||||
if idx == 0:
|
||||
tmp_line = line
|
||||
continue
|
||||
if not line.startswith('x:'):
|
||||
tmp_line += ' ' + line
|
||||
continue
|
||||
else:
|
||||
complete_line = tmp_line
|
||||
tmp_line = line
|
||||
contours, words = process_line(complete_line, contours, words)
|
||||
|
||||
if tmp_line != '':
|
||||
contours, words = process_line(tmp_line, contours, words)
|
||||
|
||||
for word in words:
|
||||
if word == '#':
|
||||
word = '###'
|
||||
|
||||
return contours, words
|
||||
|
||||
|
||||
def load_txt_info(gt_file, img_info):
|
||||
"""Load the information of one ground truth in .txt format.
|
||||
|
||||
Args:
|
||||
img_info (dict): The dict of only the image information
|
||||
gt_file (str): The relative path of the ground_truth mat
|
||||
file for one image
|
||||
|
||||
Returns:
|
||||
img_info (dict): The dict of the img and annotation information
|
||||
"""
|
||||
|
||||
contours, words = get_contours_txt(gt_file)
|
||||
anno_info = []
|
||||
for contour, word in zip(contours, words):
|
||||
if contour.shape[0] == 2 or word == '###':
|
||||
continue
|
||||
coordinates = np.array(contour).reshape(-1, 2)
|
||||
polygon = Polygon(coordinates)
|
||||
|
||||
# convert to COCO style XYWH format
|
||||
min_x, min_y, max_x, max_y = polygon.bounds
|
||||
bbox = [min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y]
|
||||
anno = dict(word=word, bbox=bbox)
|
||||
anno_info.append(anno)
|
||||
|
||||
img_info.update(anno_info=anno_info)
|
||||
return img_info
|
||||
|
||||
|
||||
def generate_ann(root_path, split, image_infos):
|
||||
"""Generate cropped annotations and label txt file.
|
||||
|
||||
Args:
|
||||
root_path (str): The relative path of the totaltext file
|
||||
split (str): The split of dataset. Namely: training or test
|
||||
image_infos (list[dict]): A list of dicts of the img and
|
||||
annotation information
|
||||
"""
|
||||
|
||||
dst_image_root = osp.join(root_path, 'dst_imgs', split)
|
||||
if split == 'training':
|
||||
dst_label_file = osp.join(root_path, 'train_label.json')
|
||||
elif split == 'test':
|
||||
dst_label_file = osp.join(root_path, 'test_label.json')
|
||||
os.makedirs(dst_image_root, exist_ok=True)
|
||||
|
||||
img_info = []
|
||||
for image_info in image_infos:
|
||||
index = 1
|
||||
src_img_path = osp.join(root_path, 'imgs', image_info['file_name'])
|
||||
image = mmcv.imread(src_img_path)
|
||||
src_img_root = osp.splitext(image_info['file_name'])[0].split('/')[1]
|
||||
|
||||
for anno in image_info['anno_info']:
|
||||
word = anno['word']
|
||||
dst_img = crop_img(image, anno['bbox'])
|
||||
|
||||
# Skip invalid annotations
|
||||
if min(dst_img.shape) == 0 or word == '###':
|
||||
continue
|
||||
|
||||
dst_img_name = f'{src_img_root}_{index}.png'
|
||||
index += 1
|
||||
dst_img_path = osp.join(dst_image_root, dst_img_name)
|
||||
mmcv.imwrite(dst_img, dst_img_path)
|
||||
img_info.append({
|
||||
'file_name': dst_img_name,
|
||||
'anno_info': [{
|
||||
'text': word
|
||||
}]
|
||||
})
|
||||
|
||||
dump_ocr_data(img_info, dst_label_file, 'textrecog')
|
||||
|
||||
|
||||
def load_img_info(files):
|
||||
"""Load the information of one image.
|
||||
|
||||
Args:
|
||||
files (tuple): The tuple of (img_file, groundtruth_file)
|
||||
|
||||
Returns:
|
||||
img_info (dict): The dict of the img and annotation information
|
||||
"""
|
||||
assert isinstance(files, tuple)
|
||||
|
||||
img_file, gt_file = files
|
||||
# read imgs with ignoring orientations
|
||||
img = mmcv.imread(img_file, 'unchanged')
|
||||
|
||||
split_name = osp.basename(osp.dirname(img_file))
|
||||
img_info = dict(
|
||||
# remove img_prefix for filename
|
||||
file_name=osp.join(split_name, osp.basename(img_file)),
|
||||
height=img.shape[0],
|
||||
width=img.shape[1],
|
||||
# anno_info=anno_info,
|
||||
segm_file=osp.join(split_name, osp.basename(gt_file)))
|
||||
|
||||
if osp.splitext(gt_file)[1] == '.mat':
|
||||
img_info = load_mat_info(img_info, gt_file)
|
||||
elif osp.splitext(gt_file)[1] == '.txt':
|
||||
img_info = load_txt_info(gt_file, img_info)
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
return img_info
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Convert totaltext annotations to COCO format')
|
||||
parser.add_argument('root_path', help='Totaltext root path')
|
||||
parser.add_argument(
|
||||
'--nproc', default=1, type=int, help='Number of process')
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
root_path = args.root_path
|
||||
img_dir = osp.join(root_path, 'imgs')
|
||||
gt_dir = osp.join(root_path, 'annotations')
|
||||
|
||||
set_name = {}
|
||||
for split in ['training', 'test']:
|
||||
set_name.update({split: split + '_label' + '.txt'})
|
||||
assert osp.exists(osp.join(img_dir, split))
|
||||
|
||||
for split, ann_name in set_name.items():
|
||||
print(f'Converting {split} into {ann_name}')
|
||||
with mmengine.Timer(
|
||||
print_tmpl='It takes {}s to convert totaltext annotation'):
|
||||
files = collect_files(
|
||||
osp.join(img_dir, split), osp.join(gt_dir, split))
|
||||
image_infos = collect_annotations(files, nproc=args.nproc)
|
||||
generate_ann(root_path, split, image_infos)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue