mirror of https://github.com/open-mmlab/mmocr.git
[Docs] Inferencer docs (#1744)
* [Enhancement] Support batch visualization & dumping in Inferencer * fix empty det output * Update mmocr/apis/inferencers/base_mmocr_inferencer.py Co-authored-by: liukuikun <24622904+Harold-lkk@users.noreply.github.com> * [Docs] Inferencer docs * fix * Support weight_list * add req * improve md * inferencers.md * update * add tab * refine * polish * add cn docs * js * js * js * fix ch docs * translate * translate * finish * fix * fix * fix * update * standard inferencer * update docs * update docs * update docs * update docs * update docs * update docs * en * update * update * update * update * fix * apply sugg --------- Co-authored-by: liukuikun <24622904+Harold-lkk@users.noreply.github.com>pull/1765/head
parent
cc78866ed7
commit
33cbc9b92f
|
@ -67,6 +67,7 @@ instance/
|
|||
# Sphinx documentation
|
||||
docs/en/_build/
|
||||
docs/zh_cn/_build/
|
||||
docs/*/api/generated/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
|
|
@ -48,5 +48,5 @@ Models:
|
|||
Metrics:
|
||||
macro_f1: 0.931
|
||||
micro_f1: 0.940
|
||||
edgee_micro_f1: 0.792
|
||||
edge_micro_f1: 0.792
|
||||
Weights: https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_novisual_60e_wildreceipt-openset/sdmgr_novisual_60e_wildreceipt-openset_20220831_200807-dedf15ec.pth
|
||||
|
|
|
@ -26,15 +26,3 @@ Models:
|
|||
Metrics:
|
||||
hmean-iou: 0.8467
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/drrg/drrg_resnet50_fpn-unet_1200e_ctw1500/drrg_resnet50_fpn-unet_1200e_ctw1500_20220827_105233-d5c702dd.pth
|
||||
|
||||
- Name: drrg_resnet50-oclip_fpn-unet_1200e_ctw1500
|
||||
In Collection: DRRG
|
||||
Config: configs/textdet/drrg/drrg_resnet50-oclip_fpn-unet_1200e_ctw1500.py
|
||||
Metadata:
|
||||
Training Data: CTW1500
|
||||
Results:
|
||||
- Task: Text Detection
|
||||
Dataset: CTW1500
|
||||
Metrics:
|
||||
hmean-iou:
|
||||
Weights:
|
||||
|
|
|
@ -26,7 +26,7 @@ Models:
|
|||
- Task: Text Detection
|
||||
Dataset: CTW1500
|
||||
Metrics:
|
||||
hmean: 0.7458
|
||||
hmean-iou: 0.7458
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_ctw1500/mask-rcnn_resnet50_fpn_160e_ctw1500_20220826_154755-ce68ee8e.pth
|
||||
|
||||
- Name: mask-rcnn_resnet50-oclip_fpn_160e_ctw1500
|
||||
|
@ -38,7 +38,7 @@ Models:
|
|||
- Task: Text Detection
|
||||
Dataset: CTW1500
|
||||
Metrics:
|
||||
hmean: 0.7562
|
||||
hmean-iou: 0.7562
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_ctw1500/mask-rcnn_resnet50-oclip_fpn_160e_ctw1500_20221101_154448-6e9e991c.pth
|
||||
|
||||
- Name: mask-rcnn_resnet50_fpn_160e_icdar2015
|
||||
|
@ -51,7 +51,7 @@ Models:
|
|||
- Task: Text Detection
|
||||
Dataset: ICDAR2015
|
||||
Metrics:
|
||||
hmean: 0.8182
|
||||
hmean-iou: 0.8182
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015/mask-rcnn_resnet50_fpn_160e_icdar2015_20220826_154808-ff5c30bf.pth
|
||||
|
||||
- Name: mask-rcnn_resnet50-oclip_fpn_160e_icdar2015
|
||||
|
@ -64,5 +64,5 @@ Models:
|
|||
- Task: Text Detection
|
||||
Dataset: ICDAR2015
|
||||
Metrics:
|
||||
hmean: 0.8513
|
||||
hmean-iou: 0.8513
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_icdar2015/mask-rcnn_resnet50-oclip_fpn_160e_icdar2015_20221101_131357-a19f7802.pth
|
||||
|
|
|
@ -45,7 +45,7 @@ Attention-based scene text recognizers have gained huge success, which leverages
|
|||
|
||||
```bibtex
|
||||
@article{Lu2021MASTER,
|
||||
title={{MASTER}: Multi-Aspect Non-local Network for Scene Text Recognition},
|
||||
title={MASTER: Multi-Aspect Non-local Network for Scene Text Recognition},
|
||||
author={Ning Lu and Wenwen Yu and Xianbiao Qi and Yihao Chen and Ping Gong and Rong Xiao and Xiang Bai},
|
||||
journal={Pattern Recognition},
|
||||
year={2021}
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
$(document).ready(function () {
|
||||
table = $('.model-summary').DataTable({
|
||||
"stateSave": false,
|
||||
"lengthChange": false,
|
||||
"pageLength": 10,
|
||||
"order": [],
|
||||
"scrollX": true,
|
||||
"columnDefs": [
|
||||
{ "type": "summary", targets: '_all' },
|
||||
]
|
||||
});
|
||||
// Override the default sorting for the summary columns, which
|
||||
// never takes the "-" character into account.
|
||||
jQuery.extend(jQuery.fn.dataTableExt.oSort, {
|
||||
"summary-asc": function (str1, str2) {
|
||||
if (str1 == "<p>-</p>")
|
||||
return 1;
|
||||
if (str2 == "<p>-</p>")
|
||||
return -1;
|
||||
return ((str1 < str2) ? -1 : ((str1 > str2) ? 1 : 0));
|
||||
},
|
||||
|
||||
"summary-desc": function (str1, str2) {
|
||||
if (str1 == "<p>-</p>")
|
||||
return 1;
|
||||
if (str2 == "<p>-</p>")
|
||||
return -1;
|
||||
return ((str1 < str2) ? 1 : ((str1 > str2) ? -1 : 0));
|
||||
}
|
||||
});
|
||||
})
|
|
@ -40,7 +40,7 @@ The conventions for the fields in `InstanceData` in MMOCR are shown in the table
|
|||
| | | |
|
||||
| ----------- | ---------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Field | Type | Description |
|
||||
| bboxes | `torch.FloatTensor` | Bounding boxes `[x1, x2, y1, y2]` with the shape `(N, 4)`. |
|
||||
| bboxes | `torch.FloatTensor` | Bounding boxes `[x1, y1, x2, y2]` with the shape `(N, 4)`. |
|
||||
| labels | `torch.LongTensor` | Instance label with the shape `(N, )`. By default, MMOCR uses `0` to represent the "text" class. |
|
||||
| polygons | `list[np.array(dtype=np.float32)]` | Polygonal bounding boxes with the shape `(N, )`. |
|
||||
| scores | `torch.Tensor` | Confidence scores of the predictions of bounding boxes. `(N, )`. |
|
||||
|
@ -99,7 +99,7 @@ The fields of [`InstanceData`](#instancedata) that will be used are:
|
|||
| | | |
|
||||
| -------- | ---------------------------------- | ------------------------------------------------------------------------------------------------ |
|
||||
| Field | Type | Description |
|
||||
| bboxes | `torch.FloatTensor` | Bounding boxes `[x1, x2, y1, y2]` with the shape `(N, 4)`. |
|
||||
| bboxes | `torch.FloatTensor` | Bounding boxes `[x1, y1, x2, y2]` with the shape `(N, 4)`. |
|
||||
| labels | `torch.LongTensor` | Instance label with the shape `(N, )`. By default, MMOCR uses `0` to represent the "text" class. |
|
||||
| polygons | `list[np.array(dtype=np.float32)]` | Polygonal bounding boxes with the shape `(N, )`. |
|
||||
| scores | `torch.Tensor` | Confidence scores of the predictions of bounding boxes. `(N, )`. |
|
||||
|
@ -182,7 +182,7 @@ The [`InstanceData`](#text-detection-instancedata) fields that will be used by t
|
|||
| | | |
|
||||
| ----------- | ------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| Field | Type | Description |
|
||||
| bboxes | `torch.FloatTensor` | Bounding boxes `[x1, x2, y1, y2]` with the shape `(N, 4)`. |
|
||||
| bboxes | `torch.FloatTensor` | Bounding boxes `[x1, y1, x2, y2]` with the shape `(N, 4)`. |
|
||||
| labels | `torch.LongTensor` | Instance label with the shape `(N, )`. |
|
||||
| texts | `list[str]` | The text content of each instance with the shape `(N, )`,used for e2e text spotting or KIE task. |
|
||||
| edge_labels | `torch.IntTensor` | The node adjacency matrix with the shape `(N, N)`. In the KIE task, the optional values for the state between nodes are `-1` (ignored, not involved in loss calculation),`0` (disconnected) and `1`(connected). |
|
||||
|
|
|
@ -48,6 +48,7 @@ extensions = [
|
|||
'sphinx.ext.autodoc.typehints',
|
||||
'sphinx.ext.autosummary',
|
||||
'sphinx.ext.autosectionlabel',
|
||||
'sphinx_tabs.tabs',
|
||||
]
|
||||
autodoc_typehints = 'description'
|
||||
autodoc_mock_imports = ['mmcv._ext']
|
||||
|
@ -57,6 +58,8 @@ autosummary_generate = True # Turn on sphinx.ext.autosummary
|
|||
copybutton_prompt_text = r'>>> |\.\.\. '
|
||||
copybutton_prompt_is_regexp = True
|
||||
|
||||
myst_enable_extensions = ['colon_fence']
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['_templates']
|
||||
|
||||
|
@ -149,8 +152,17 @@ master_doc = 'index'
|
|||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ['_static']
|
||||
html_css_files = ['css/readthedocs.css']
|
||||
html_js_files = ['js/collapsed.js']
|
||||
|
||||
html_css_files = [
|
||||
'https://cdn.datatables.net/1.13.2/css/dataTables.bootstrap5.min.css',
|
||||
'css/readthedocs.css'
|
||||
]
|
||||
html_js_files = [
|
||||
'https://cdn.datatables.net/1.13.2/js/jquery.dataTables.min.js',
|
||||
'https://cdn.datatables.net/1.13.2/js/dataTables.bootstrap5.min.js',
|
||||
'js/collapsed.js',
|
||||
'js/table.js',
|
||||
]
|
||||
|
||||
myst_heading_anchors = 4
|
||||
|
||||
|
|
|
@ -27,43 +27,45 @@ conda activate openmmlab
|
|||
|
||||
**Step 2.** Install PyTorch following [official instructions](https://pytorch.org/get-started/locally/), e.g.
|
||||
|
||||
On GPU platforms:
|
||||
````{tabs}
|
||||
|
||||
```shell
|
||||
```{code-tab} shell GPU Platform
|
||||
conda install pytorch torchvision -c pytorch
|
||||
```
|
||||
|
||||
On CPU platforms:
|
||||
|
||||
```shell
|
||||
```{code-tab} shell CPU Platform
|
||||
conda install pytorch torchvision cpuonly -c pytorch
|
||||
```
|
||||
|
||||
````
|
||||
|
||||
## Installation Steps
|
||||
|
||||
We recommend that users follow our best practices to install MMOCR. However, the whole process is highly customizable. See [Customize Installation](#customize-installation) section for more information.
|
||||
|
||||
### Best Practices
|
||||
|
||||
**Step 0.** Install [MMEngine](https://github.com/open-mmlab/mmengine) and [MMCV](https://github.com/open-mmlab/mmcv) using [MIM](https://github.com/open-mmlab/mim).
|
||||
**Step 0.** Install [MMEngine](https://github.com/open-mmlab/mmengine), [MMCV](https://github.com/open-mmlab/mmcv) and [MMDetection](https://github.com/open-mmlab/mmdetection) using [MIM](https://github.com/open-mmlab/mim).
|
||||
|
||||
```shell
|
||||
pip install -U openmim
|
||||
mim install mmengine
|
||||
mim install 'mmcv>=2.0.0rc1'
|
||||
mim install 'mmdet>=3.0.0rc0'
|
||||
```
|
||||
|
||||
**Step 1.** Install [MMDetection](https://github.com/open-mmlab/mmdetection) as a dependency.
|
||||
**Step 1.** Install MMOCR.
|
||||
|
||||
If you wish to run and develop MMOCR directly, install it from **source** (recommended).
|
||||
|
||||
If you use MMOCR as a dependency or third-party package, install it with **MIM**.
|
||||
|
||||
`````{tabs}
|
||||
|
||||
````{group-tab} Install from Source
|
||||
|
||||
```shell
|
||||
pip install 'mmdet>=3.0.0rc0'
|
||||
```
|
||||
|
||||
**Step 2.** Install MMOCR.
|
||||
|
||||
Case A: If you wish to run and develop MMOCR directly, install it from source:
|
||||
|
||||
```shell
|
||||
git clone https://github.com/open-mmlab/mmocr.git
|
||||
cd mmocr
|
||||
git checkout 1.x
|
||||
|
@ -72,58 +74,99 @@ pip install -v -e .
|
|||
# "-v" increases pip's verbosity.
|
||||
# "-e" means installing the project in editable mode,
|
||||
# That is, any local modifications on the code will take effect immediately.
|
||||
|
||||
```
|
||||
|
||||
Case B: If you use MMOCR as a dependency or third-party package, install it with pip:
|
||||
````
|
||||
|
||||
````{group-tab} Install via MIM
|
||||
|
||||
```shell
|
||||
pip install 'mmocr>=1.0.0rc0'
|
||||
|
||||
mim install 'mmocr>=1.0.0rc0'
|
||||
|
||||
```
|
||||
|
||||
**Step 3. (Optional)** If you wish to use any transform involving `albumentations` (For example, `Albu` in ABINet's pipeline), install the dependency using the following command:
|
||||
````
|
||||
|
||||
`````
|
||||
|
||||
**Step 2. (Optional)** If you wish to use any transform involving `albumentations` (For example, `Albu` in ABINet's pipeline), install the dependency using the following command:
|
||||
|
||||
`````{tabs}
|
||||
|
||||
````{group-tab} Install from Source
|
||||
|
||||
```shell
|
||||
# If MMOCR is installed from source
|
||||
pip install -r requirements/albu.txt
|
||||
# If MMOCR is installed via pip
|
||||
```
|
||||
|
||||
````
|
||||
|
||||
````{group-tab} Install via MIM
|
||||
|
||||
```shell
|
||||
pip install albumentations>=1.1.0 --no-binary qudida,albumentations
|
||||
```
|
||||
|
||||
````
|
||||
|
||||
`````
|
||||
|
||||
```{note}
|
||||
|
||||
We recommend checking the environment after installing `albumentations` to
|
||||
ensure that `opencv-python` and `opencv-python-headless` are not installed together, otherwise it might cause unexpected issues. If that's unfortunately the case, please uninstall `opencv-python-headless` to make sure MMOCR's visualization utilities can work.
|
||||
|
||||
Refer
|
||||
to ['albumentations`'s official documentation](https://albumentations.ai/docs/getting_started/installation/#note-on-opencv-dependencies) for more details.
|
||||
to [albumentations's official documentation](https://albumentations.ai/docs/getting_started/installation/#note-on-opencv-dependencies) for more details.
|
||||
|
||||
```
|
||||
|
||||
### Verify the installation
|
||||
|
||||
We provide a method to verify the installation via inference demo, depending on your installation method. You should be able to see a pop-up image and the inference result upon successful verification.
|
||||
You may verify the installation via this inference demo.
|
||||
|
||||
`````{tabs}
|
||||
|
||||
````{tab} Python
|
||||
|
||||
Run the following code in a Python interpreter:
|
||||
|
||||
```python
|
||||
>>> from mmocr.apis import MMOCRInferencer
|
||||
>>> ocr = MMOCRInferencer(det='DBNet', rec='CRNN')
|
||||
>>> ocr('demo/demo_text_ocr.jpg', show=True, print_result=True)
|
||||
```
|
||||
````
|
||||
|
||||
````{tab} Shell
|
||||
|
||||
If you installed MMOCR from source, you can run the following in MMOCR's root directory:
|
||||
|
||||
```shell
|
||||
python tools/infer.py demo/demo_text_ocr.jpg --det DBNet --rec CRNN --show --print-result
|
||||
```
|
||||
````
|
||||
|
||||
`````
|
||||
|
||||
You should be able to see a pop-up image and the inference result printed out in the console upon successful verification.
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/24622904/187825445-d30cbfa6-5549-4358-97fe-245f08f4ed94.jpg" height="250"/>
|
||||
</div>
|
||||
<br />
|
||||
|
||||
```bash
|
||||
# Inference result
|
||||
{'rec_texts': ['cbanke', 'docece', 'sroumats', 'chounsonse', 'doceca', 'c', '', 'sond', 'abrandso', 'sretane', '1', 'tosl', 'roundi', 'slen', 'yet', 'ally', 's', 'sue', 'salle', 'v'], 'rec_scores': [...], 'det_polygons': [...], 'det_scores': tensor([...])}
|
||||
{'predictions': [{'rec_texts': ['cbanks', 'docecea', 'grouf', 'pwate', 'chobnsonsg', 'soxee', 'oeioh', 'c', 'sones', 'lbrandec', 'sretalg', '11', 'to8', 'round', 'sale', 'year',
|
||||
'ally', 'sie', 'sall'], 'rec_scores': [...], 'det_polygons': [...], 'det_scores':
|
||||
[...]}]}
|
||||
```
|
||||
|
||||
Run the following in MMOCR's directory:
|
||||
|
||||
```bash
|
||||
python mmocr/ocr.py --det DB_r18 --recog CRNN demo/demo_text_ocr.jpg --show
|
||||
```
|
||||
|
||||
Also can run the following codes in your Python interpreter:
|
||||
|
||||
```python
|
||||
from mmocr.ocr import MMOCR
|
||||
ocr = MMOCR(recog='CRNN', det='DB_r18')
|
||||
ocr.readtext('demo_text_ocr.jpg', show=True)
|
||||
```{note}
|
||||
If you are running MMOCR on a server without GUI or via SSH tunnel with X11 forwarding disabled, you may not see the pop-up window.
|
||||
```
|
||||
|
||||
## Customize Installation
|
||||
|
|
|
@ -1,15 +1,37 @@
|
|||
# Quick Run
|
||||
|
||||
This chapter will take you through the basic functions of MMOCR. And we assume you [installed MMOCR from source](install.md#best-practices).
|
||||
|
||||
## Inference
|
||||
|
||||
Please refer to [here](install.md#verify-the-installation) for a quick inference run. A detailed description of MMOCR's inference interface can be found [here](../user_guides/inference.md)
|
||||
Run the following in MMOCR's root directory:
|
||||
|
||||
```shell
|
||||
python tools/infer.py demo/demo_text_ocr.jpg --det DBNet --rec CRNN --show --print-result
|
||||
```
|
||||
|
||||
You should be able to see a pop-up image and the inference result printed out in the console.
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/24622904/187825445-d30cbfa6-5549-4358-97fe-245f08f4ed94.jpg" height="250"/>
|
||||
</div>
|
||||
<br />
|
||||
|
||||
```bash
|
||||
# Inference result
|
||||
{'predictions': [{'rec_texts': ['cbanks', 'docecea', 'grouf', 'pwate', 'chobnsonsg', 'soxee', 'oeioh', 'c', 'sones', 'lbrandec', 'sretalg', '11', 'to8', 'round', 'sale', 'year',
|
||||
'ally', 'sie', 'sall'], 'rec_scores': [...], 'det_polygons': [...], 'det_scores':
|
||||
[...]}]}
|
||||
```
|
||||
|
||||
```{note}
|
||||
In addition to using our well-provided pre-trained models, you can also train models on your own datasets. In the next section, we will take you through the basic functions of MMOCR by training DBNet on the mini [ICDAR 2015](https://rrc.cvc.uab.es/?ch=4&com=downloads) dataset as an example.
|
||||
|
||||
The following sections assume that you [installed MMOCR from source](install.md#best-practices).
|
||||
If you are running MMOCR on a server without GUI or via SSH tunnel with X11 forwarding disabled, you may not see the pop-up window.
|
||||
```
|
||||
|
||||
A detailed description of MMOCR's inference interface can be found [here](../user_guides/inference.md)
|
||||
|
||||
In addition to using our well-provided pre-trained models, you can also train models on your own datasets. In the next section, we will take you through the basic functions of MMOCR by training DBNet on the mini [ICDAR 2015](https://rrc.cvc.uab.es/?ch=4&com=downloads) dataset as an example.
|
||||
|
||||
## Prepare a Dataset
|
||||
|
||||
Since the variety of OCR dataset formats are not conducive to either switching or joint training of multiple datasets, MMOCR proposes a uniform [data format](../user_guides/dataset_prepare.md), and provides [dataset preparer](../user_guides/data_prepare/dataset_preparer.md) for commonly used OCR datasets. Usually, to use those datasets in MMOCR, you just need to follow the steps to get them ready for use.
|
||||
|
@ -18,12 +40,12 @@ Since the variety of OCR dataset formats are not conducive to either switching o
|
|||
But here, efficiency means everything.
|
||||
```
|
||||
|
||||
Here, we have prepared a lite version of ICDAR 2015 dataset for demonstration purposes. Download our pre-prepared [zip](https://download.openmmlab.com/mmocr/data/icdar2015/mini_icdar2015.tar.gz) and extract it to the `data/det/` directory under mmocr to get our prepared image and annotation file.
|
||||
Here, we have prepared a lite version of ICDAR 2015 dataset for demonstration purposes. Download our pre-prepared [zip](https://download.openmmlab.com/mmocr/data/icdar2015/mini_icdar2015.tar.gz) and extract it to the `data/` directory under mmocr to get our prepared image and annotation file.
|
||||
|
||||
```Bash
|
||||
wget https://download.openmmlab.com/mmocr/data/icdar2015/mini_icdar2015.tar.gz
|
||||
mkdir -p data/det/
|
||||
tar xzvf mini_icdar2015.tar.gz -C data/det/
|
||||
mkdir -p data/
|
||||
tar xzvf mini_icdar2015.tar.gz -C data/
|
||||
```
|
||||
|
||||
## Modify the Config
|
||||
|
|
|
@ -4,6 +4,4 @@
|
|||
sed -e '$a\\n' -s ../../configs/kie/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# Key Information Extraction Models' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmocr/tree/master/=g' >kie_models.md
|
||||
sed -e '$a\\n' -s ../../configs/textdet/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# Text Detection Models' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmocr/tree/master/=g' >textdet_models.md
|
||||
sed -e '$a\\n' -s ../../configs/textrecog/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# Text Recognition Models' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmocr/tree/master/=g' >textrecog_models.md
|
||||
sed -e '$a\\n' -s ../../configs/backbone/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# BackBone' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmocr/tree/master/=g' >backbones.md
|
||||
# replace special symbols in inference.md
|
||||
sed -i 's/:heavy_check_mark:/Yes/g' user_guides/inference.md && sed -i 's/:x:/No/g' user_guides/inference.md
|
||||
sed -e '$a\\n' -s ../../configs/backbone/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# BackBones' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmocr/tree/master/=g' >backbones.md
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
#!/usr/bin/env python
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import functools as func
|
||||
import glob
|
||||
import re
|
||||
from os.path import basename, splitext
|
||||
|
||||
import numpy as np
|
||||
import titlecase
|
||||
from weight_list import gen_weight_list
|
||||
|
||||
|
||||
def title2anchor(name):
|
||||
|
@ -16,7 +16,9 @@ def title2anchor(name):
|
|||
|
||||
# Count algorithms
|
||||
|
||||
files = sorted(glob.glob('*_models.md'))
|
||||
files = [
|
||||
'backbones.md', 'textdet_models.md', 'textrecog_models.md', 'kie_models.md'
|
||||
]
|
||||
|
||||
stats = []
|
||||
|
||||
|
@ -51,7 +53,7 @@ for f in files:
|
|||
re.search(
|
||||
rf'\btitle\s*=\s*{{\s*{q}\s*}}.*?\n## (.*?)\s*[,;]?\s*\n',
|
||||
revcontent, re.DOTALL | re.IGNORECASE).group(1))
|
||||
paperlinks[p] = f'[{p}]({splitext(basename(f))[0]}.html#{paper_link})'
|
||||
paperlinks[p] = f'[{p}]({splitext(basename(f))[0]}.md#{paper_link})'
|
||||
paperlist = '\n'.join(
|
||||
sorted(f' - [{t}] {paperlinks[x]}' for t, x in papers))
|
||||
# count configs
|
||||
|
@ -68,7 +70,7 @@ for f in files:
|
|||
}
|
||||
|
||||
statsmsg = f"""
|
||||
## [{title}]({f})
|
||||
### [{title}]({f})
|
||||
|
||||
* Number of checkpoints: {len(ckpts)}
|
||||
* Number of configs: {len(configs)}
|
||||
|
@ -89,8 +91,33 @@ papertypes, papercounts = np.unique([t for t, _ in allpapers],
|
|||
countstr = '\n'.join(
|
||||
[f' - {t}: {c}' for t, c in zip(papertypes, papercounts)])
|
||||
|
||||
# get model list
|
||||
weight_list = gen_weight_list()
|
||||
|
||||
modelzoo = f"""
|
||||
# Statistics
|
||||
# Overview
|
||||
|
||||
## Weights
|
||||
|
||||
Here are the list of weights available for
|
||||
[Inference](user_guides/inference.md).
|
||||
|
||||
For the ease of reference, some weights may have shorter aliases, which will be
|
||||
separated by `/` in the table.
|
||||
For example, "`DB_r18 / dbnet_resnet18_fpnc_1200e_icdar2015`" means that you can
|
||||
use either `DB_r18` or `dbnet_resnet18_fpnc_1200e_icdar2015`
|
||||
to initialize the Inferencer:
|
||||
|
||||
```python
|
||||
>>> from mmocr.apis import TextDetInferencer
|
||||
>>> inferencer = TextDetInferencer(model='DB_r18')
|
||||
>>> # equivalent to
|
||||
>>> inferencer = TextDetInferencer(model='dbnet_resnet18_fpnc_1200e_icdar2015')
|
||||
```
|
||||
|
||||
{weight_list}
|
||||
|
||||
## Statistics
|
||||
|
||||
* Number of checkpoints: {len(allckpts)}
|
||||
* Number of configs: {len(allconfigs)}
|
||||
|
@ -98,7 +125,7 @@ modelzoo = f"""
|
|||
{countstr}
|
||||
|
||||
{msglist}
|
||||
"""
|
||||
""" # noqa
|
||||
|
||||
with open('modelzoo.md', 'w') as f:
|
||||
f.write(modelzoo)
|
||||
|
|
|
@ -1,194 +1,535 @@
|
|||
# Inference
|
||||
|
||||
We provide an easy-to-use API for the demo and application purpose in [ocr.py](/mmocr/ocr.py) script.
|
||||
In OpenMMLab, all the inference operations are unified into a new interface - `Inferencer`. `Inferencer` is designed to expose a neat and simple API to users, and shares very similar interface across different OpenMMLab libraries.
|
||||
|
||||
The API can be called through command line (CL) or by calling it from another python script.
|
||||
It exposes all the models in MMOCR to API as individual modules that can be called and chained together.
|
||||
In MMOCR, Inferencers are constructed in different levels of task abstraction.
|
||||
|
||||
```{warning}
|
||||
This interface is being refactored is much likely to be changed in the upcoming release.
|
||||
```
|
||||
- Standard Inferencer: Following OpenMMLab's convention, each fundamental task in MMOCR has a standard Inferencer, namely `TextDetInferencer` (text detection), `TextRecInferencer` (text recognition), `TextSpottingInferencer` (end-to-end OCR), and `KIEInferencer` (key information extraction). They are designed to perform inference on a single task, and can be chained together to perform inference on a series of tasks. They also share very similar interface, have standard input/output protocol, and overall follow the OpenMMLab design.
|
||||
- **MMOCRInferencer**: We also provide `MMOCRInferencer`, a convenient inference interface only designed for MMOCR. It encapsulates and chains all the Inferencers in MMOCR, so users can use this Inferencer to perform a series of tasks on an image and directly get the final result in an end-to-end manner. *However, it has a relatively different interface from other standard Inferencers, and some of standard Inferencer functionalities might be sacrificed for the sake of simplicity.*
|
||||
|
||||
## Example 1: Text Detection
|
||||
For new users, we recommend using **MMOCRInferencer** to test out different combinations of models.
|
||||
|
||||
If you are a developer and wish to integrate the models into your own project, we recommend using **standard Inferencers**, as they are more flexible and standardized, equipped with full functionalities.
|
||||
|
||||
## Basic Usage
|
||||
|
||||
`````{tabs}
|
||||
|
||||
````{group-tab} MMOCRInferencer
|
||||
|
||||
As of now, `MMOCRInferencer` can perform inference on the following tasks:
|
||||
|
||||
- Text detection
|
||||
- Text recognition
|
||||
- OCR (text detection + text recognition)
|
||||
- Key information extraction (text detection + text recognition + key information extraction)
|
||||
- *OCR (text spotting)* (coming soon)
|
||||
|
||||
For convenience, `MMOCRInferencer` provides both Python and command line interfaces. For example, if you want to perform OCR inference on `demo/demo_text_ocr.jpg` with `DBNet` as the text detection model and `CRNN` as the text recognition model, you can simply run the following command:
|
||||
|
||||
::::{tabs}
|
||||
|
||||
:::{code-tab} python
|
||||
>>> from mmocr.apis import MMOCRInferencer
|
||||
>>> # Load models into memory
|
||||
>>> ocr = MMOCRInferencer(det='DBNet', rec='SAR')
|
||||
>>> # Perform inference
|
||||
>>> ocr('demo/demo_text_ocr.jpg', show=True)
|
||||
:::
|
||||
|
||||
:::{code-tab} bash
|
||||
python tools/infer.py demo/demo_text_ocr.jpg --det DBNet --rec SAR --show
|
||||
:::
|
||||
::::
|
||||
|
||||
The resulting OCR output will be displayed in a new window:
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/24622904/187825864-8ead5acb-c3c5-443b-bd90-3f4b188fa315.jpg" height="250"/>
|
||||
<img src="https://user-images.githubusercontent.com/22607038/220563262-e9c1ab52-9b96-4d9c-bcb6-f55ff0b9e1be.png" height="250"/>
|
||||
</div>
|
||||
|
||||
**Instruction:** Perform detection inference on an image with the TextSnake recognition model, export the result in a json file (default) and save the visualization file.
|
||||
|
||||
- CL interface:
|
||||
|
||||
```shell
|
||||
python mmocr/ocr.py demo/demo_text_det.jpg --det TextSnake --img-out-dir demo/
|
||||
```
|
||||
|
||||
- Python interface:
|
||||
|
||||
```python
|
||||
from mmocr.ocr import MMOCR
|
||||
|
||||
# Load models into memory
|
||||
ocr = MMOCR(det='TextSnake')
|
||||
|
||||
# Inference
|
||||
results = ocr.readtext('demo/demo_text_det.jpg', img_out_dir='demo/')
|
||||
```
|
||||
|
||||
## Example 2: Text Detection + Recognition
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/24622904/187825445-d30cbfa6-5549-4358-97fe-245f08f4ed94.jpg" height="250"/>
|
||||
</div>
|
||||
|
||||
**Instruction:** Perform ocr (det + recog) inference on the demo/demo_text_det.jpg image with the DB_r18 detection model and CRNN recognition model, print the result in the terminal and show the visualization.
|
||||
|
||||
- CL interface:
|
||||
|
||||
```shell
|
||||
python mmocr/ocr.py --det DB_r18 --recog CRNN demo/demo_text_ocr.jpg --print-result --show
|
||||
```
|
||||
|
||||
```{note}
|
||||
|
||||
When calling the script from the command line, the script assumes configs are saved in the `configs/` folder. User can customize the directory by specifying the value of `config_dir`.
|
||||
|
||||
If you are running MMOCR on a server without GUI or via SSH tunnel with X11 forwarding disabled, the `show` option will not work. However, you can still save visualizations to files by setting `out_dir` and `save_vis=True` arguments. Read [Dumping Results](#dumping-results) for details.
|
||||
```
|
||||
|
||||
- Python interface:
|
||||
Depending on the initialization arguments, `MMOCRInferencer` can run in different modes. For example, it can run in KIE mode if it is initialized with `det`, `rec` and `kie` specified.
|
||||
|
||||
```python
|
||||
from mmocr.ocr import MMOCR
|
||||
::::{tabs}
|
||||
|
||||
# Load models into memory
|
||||
ocr = MMOCR(det='DB_r18', recog='CRNN')
|
||||
:::{code-tab} python
|
||||
>>> kie = MMOCRInferencer(det='DBNet', rec='SAR', kie='SDMGR')
|
||||
>>> kie('demo/demo_kie.jpeg', show=True)
|
||||
:::
|
||||
|
||||
# Inference
|
||||
results = ocr.readtext('demo/demo_text_ocr.jpg', print_result=True, show=True)
|
||||
```
|
||||
:::{code-tab} bash
|
||||
python tools/infer.py demo/demo_kie.jpeg --det DBNet --rec SAR --kie SDMGR --show
|
||||
:::
|
||||
|
||||
## Example 3: Text Detection + Recognition + Key Information Extraction
|
||||
::::
|
||||
|
||||
The output image should look like this:
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/24622904/187825451-6b043df9-10f7-4656-a528-45fe043df92b.jpg" height="250"/>
|
||||
<img src="https://user-images.githubusercontent.com/22607038/220569700-fd4894bc-f65a-405e-95e7-ebd2d614aedd.png" height="250"/>
|
||||
</div>
|
||||
<br />
|
||||
|
||||
**Instruction:** Perform end-to-end ocr (det + recog) inference first with DB_r18 detection model and CRNN recognition model, then run KIE inference with SDMGR model on the ocr result and show the visualization.
|
||||
You may have found that the Python interface and the command line interface of `MMOCRInferencer` are very similar. The following sections will use the Python interface as an example to introduce the usage of `MMOCRInferencer`. For more information about the command line interface, please refer to [Command Line Interface](#command-line-interface).
|
||||
|
||||
- CL interface:
|
||||
````
|
||||
|
||||
```shell
|
||||
python mmocr/ocr.py demo/demo_kie.jpeg --det DB_r18 --recog CRNN --kie SDMGR --print-result --show
|
||||
```
|
||||
````{group-tab} Standard Inferencer
|
||||
|
||||
```{note}
|
||||
|
||||
Note: When calling the script from the command line, the script assumes configs are saved in the `configs/` folder. User can customize the directory by specifying the value of `config_dir`.
|
||||
|
||||
```
|
||||
|
||||
- Python interface:
|
||||
In general, all the standard Inferencers across OpenMMLab share a very similar interface. The following example shows how to use `TextDetInferencer` to perform inference on a single image.
|
||||
|
||||
```python
|
||||
from mmocr.ocr import MMOCR
|
||||
|
||||
# Load models into memory
|
||||
ocr = MMOCR(det='DB_r18', recog='CRNN', kie='SDMGR')
|
||||
|
||||
# Inference
|
||||
results = ocr.readtext('demo/demo_kie.jpeg', print_result=True, show=True)
|
||||
>>> from mmocr.apis import TextDetInferencer
|
||||
>>> # Load models into memory
|
||||
>>> inferencer = TextDetInferencer(model='DBNet')
|
||||
>>> # Inference
|
||||
>>> inferencer('demo/demo_text_ocr.jpg', show=True)
|
||||
```
|
||||
|
||||
## API Arguments
|
||||
The visualization result should look like:
|
||||
|
||||
The API has an extensive list of arguments that you can use. The following tables are for the python interface.
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/22607038/221418215-2431d0e9-e16e-4deb-9c52-f8b86801706a.png" height="250"/>
|
||||
</div>
|
||||
|
||||
**MMOCR():**
|
||||
````
|
||||
|
||||
| Arguments | Type | Default | Description |
|
||||
| -------------- | --------------------- | -------- | ---------------------------------------------------------------------------------------------------- |
|
||||
| `det` | see [models](#models) | None | Text detection algorithm |
|
||||
| `recog` | see [models](#models) | None | Text recognition algorithm |
|
||||
| `kie` \[1\] | see [models](#models) | None | Key information extraction algorithm |
|
||||
| `config_dir` | str | configs/ | Path to the config directory where all the config files are located |
|
||||
| `det_config` | str | None | Path to the custom config file of the selected det model |
|
||||
| `det_ckpt` | str | None | Path to the custom checkpoint file of the selected det model |
|
||||
| `recog_config` | str | None | Path to the custom config file of the selected recog model |
|
||||
| `recog_ckpt` | str | None | Path to the custom checkpoint file of the selected recog model |
|
||||
| `kie_config` | str | None | Path to the custom config file of the selected kie model |
|
||||
| `kie_ckpt` | str | None | Path to the custom checkpoint file of the selected kie model |
|
||||
| `device` | str | None | Device used for inference, accepting all allowed strings by `torch.device`. E.g., 'cuda:0' or 'cpu'. |
|
||||
`````
|
||||
|
||||
## Initialization
|
||||
|
||||
Each Inferencer must be initialized with a model. You can also choose the inference device during initialization.
|
||||
|
||||
### Model Initialization
|
||||
|
||||
`````{tabs}
|
||||
|
||||
````{group-tab} MMOCRInferencer
|
||||
|
||||
For each task, `MMOCRInferencer` takes two arguments in the form of `xxx` and `xxx_weights` (e.g. `det` and `det_weights`) for initialization, and there are many ways to initialize a model for inference. We will take `det` and `det_weights` as an example to illustrate some typical ways to initialize a model.
|
||||
|
||||
- To infer with MMOCR's pre-trained model, passing its name to the argument `det` can work. The weights will be automatically downloaded and loaded from OpenMMLab's model zoo. Check [Weights](../modelzoo.md#weights) for available model names.
|
||||
|
||||
```python
|
||||
>>> MMOCRInferencer(det='DBNet')
|
||||
```
|
||||
|
||||
- To load custom config and weight, you can pass the path to the config file to `det` and the path to the weight to `det_weights`.
|
||||
|
||||
```python
|
||||
>>> MMOCRInferencer(det='path/to/dbnet_config.py', det_weights='path/to/dbnet.pth')
|
||||
```
|
||||
|
||||
You may click on the "Standard Inferencer" tab to find more initialization methods.
|
||||
|
||||
````
|
||||
|
||||
````{group-tab} Standard Inferencer
|
||||
|
||||
Every standard `Inferencer` accepts two parameters, `model` and `weights`. (In `MMOCRInferencer`, they are referred to as `xxx` and `xxx_weights`)
|
||||
|
||||
- `model` takes either the name of a model, or the path to a config file as input. The name of a model is obtained from the model's metafile ([Example](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/dbnet/metafile.yml)) indexed from [model-index.yml](https://github.com/open-mmlab/mmocr/blob/1.x/model-index.yml). You can find the list of available weights [here](../modelzoo.md#weights).
|
||||
|
||||
- `weights` accepts the path to a weight file.
|
||||
|
||||
<br />
|
||||
|
||||
There are various ways to initialize a model.
|
||||
|
||||
- To infer with MMOCR's pre-trained model, you can pass its name to `model`. The weights will be automatically downloaded and loaded from OpenMMLab's model zoo.
|
||||
|
||||
```python
|
||||
>>> from mmocr.apis import TextDetInferencer
|
||||
>>> inferencer = TextDetInferencer(model='DBNet')
|
||||
```
|
||||
|
||||
```{note}
|
||||
The model type must match the Inferencer type.
|
||||
```
|
||||
|
||||
You can load another weight by passing its path/url to `weights`.
|
||||
|
||||
```python
|
||||
>>> inferencer = TextDetInferencer(model='DBNet', weights='path/to/dbnet.pth')
|
||||
```
|
||||
|
||||
- To load custom config and weight, you can pass the path to the config file to `model` and the path to the weight to `weights`.
|
||||
|
||||
```python
|
||||
>>> inferencer = TextDetInferencer(model='path/to/dbnet_config.py', weights='path/to/dbnet.pth')
|
||||
```
|
||||
|
||||
- By default, [MMEngine](https://github.com/open-mmlab/mmengine/) dumps config to the weight. If you have a weight trained on MMEngine, you can also pass the path to the weight file to `weights` without specifying `model`:
|
||||
|
||||
```python
|
||||
>>> # It will raise an error if the config file cannot be found in the weight
|
||||
>>> inferencer = TextDetInferencer(weights='path/to/dbnet.pth')
|
||||
```
|
||||
|
||||
- Passing config file to `model` without specifying `weight` will result in a randomly initialized model.
|
||||
|
||||
````
|
||||
`````
|
||||
|
||||
### Device
|
||||
|
||||
Each Inferencer instance is bound to a device.
|
||||
By default, the best device is automatically decided by [MMEngine](https://github.com/open-mmlab/mmengine/). You can also alter the device by specifying the `device` argument. For example, you can use the following code to create an Inferencer on GPU 1.
|
||||
|
||||
`````{tabs}
|
||||
|
||||
````{group-tab} MMOCRInferencer
|
||||
|
||||
```python
|
||||
>>> inferencer = MMOCRInferencer(det='DBNet', device='cuda:1')
|
||||
```
|
||||
|
||||
````
|
||||
|
||||
````{group-tab} Standard Inferencer
|
||||
|
||||
```python
|
||||
>>> inferencer = TextDetInferencer(model='DBNet', device='cuda:1')
|
||||
```
|
||||
|
||||
````
|
||||
|
||||
`````
|
||||
|
||||
To create an Inferencer on CPU:
|
||||
|
||||
`````{tabs}
|
||||
|
||||
````{group-tab} MMOCRInferencer
|
||||
|
||||
```python
|
||||
>>> inferencer = MMOCRInferencer(det='DBNet', device='cpu')
|
||||
```
|
||||
|
||||
````
|
||||
|
||||
````{group-tab} Standard Inferencer
|
||||
|
||||
```python
|
||||
>>> inferencer = TextDetInferencer(model='DBNet', device='cpu')
|
||||
```
|
||||
|
||||
````
|
||||
|
||||
`````
|
||||
|
||||
Refer to [torch.device](torch.device) for all the supported forms.
|
||||
|
||||
## Inference
|
||||
|
||||
Once the Inferencer is initialized, you can directly pass in the raw data to be inferred and get the inference results from return values.
|
||||
|
||||
### Input
|
||||
|
||||
`````{tabs}
|
||||
|
||||
````{tab} MMOCRInferencer / TextDetInferencer / TextRecInferencer / TextSpottingInferencer
|
||||
|
||||
Input can be either of these types:
|
||||
|
||||
- str: Path/URL to the image.
|
||||
|
||||
```python
|
||||
>>> inferencer('demo/demo_text_ocr.jpg')
|
||||
```
|
||||
|
||||
- array: Image in numpy array. It should be in BGR order.
|
||||
|
||||
```python
|
||||
>>> import mmcv
|
||||
>>> array = mmcv.imread('demo/demo_text_ocr.jpg')
|
||||
>>> inferencer(array)
|
||||
```
|
||||
|
||||
- list: A list of basic types above. Each element in the list will be processed separately.
|
||||
|
||||
```python
|
||||
>>> inferencer(['img_1.jpg', 'img_2.jpg])
|
||||
>>> # You can even mix the types
|
||||
>>> inferencer(['img_1.jpg', array])
|
||||
```
|
||||
|
||||
- str: Path to the directory. All images in the directory will be processed.
|
||||
|
||||
```python
|
||||
>>> inferencer('tests/data/det_toy_dataset/imgs/test/')
|
||||
```
|
||||
|
||||
````
|
||||
|
||||
````{tab} KIEInferencer
|
||||
|
||||
Input can be a dict or list[dict], where each dictionary contains
|
||||
following keys:
|
||||
|
||||
- `img` (str or ndarray): Path to the image or the image itself. If KIE Inferencer is used in no-visual mode, this key is not required.
|
||||
If it's an numpy array, it should be in BGR order.
|
||||
- `img_shape` (tuple(int, int)): Image shape in (H, W). Only required when KIE Inferencer is used in no-visual mode and no `img` is provided.
|
||||
- `instances` (list[dict]): A list of instances.
|
||||
|
||||
Each `instance` looks like the following:
|
||||
|
||||
```python
|
||||
{
|
||||
# A nested list of 4 numbers representing the bounding box of
|
||||
# the instance, in (x1, y1, x2, y2) order.
|
||||
"bbox": np.array([[x1, y1, x2, y2], [x1, y1, x2, y2], ...],
|
||||
dtype=np.int32),
|
||||
|
||||
# List of texts.
|
||||
"texts": ['text1', 'text2', ...],
|
||||
}
|
||||
```
|
||||
|
||||
````
|
||||
`````
|
||||
|
||||
### Output
|
||||
|
||||
By default, each `Inferencer` returns the prediction results in a dictionary format.
|
||||
|
||||
- `visualization` contains the visualized predictions. But it's an empty list by default unless `return_vis=True`.
|
||||
|
||||
- `predictions` contains the predictions results in a json-serializable format. As presented below, the contents are slightly different depending on the task type.
|
||||
|
||||
`````{tabs}
|
||||
|
||||
:::{group-tab} MMOCRInferencer
|
||||
|
||||
```python
|
||||
{
|
||||
'predictions' : [
|
||||
# Each instance corresponds to an input image
|
||||
{
|
||||
'det_polygons': [...], # 2d list of length (N,), format: [x1, y1, x2, y2, ...]
|
||||
'det_scores': [...], # float list of length (N,)
|
||||
'det_bboxes': [...], # 2d list of shape (N, 4), format: [min_x, min_y, max_x, max_y]
|
||||
'rec_texts': [...], # str list of length (N,)
|
||||
'rec_scores': [...], # float list of length (N,)
|
||||
'kie_labels': [...], # node labels, length (N, )
|
||||
'kie_scores': [...], # node scores, length (N, )
|
||||
'kie_edge_scores': [...], # edge scores, shape (N, N)
|
||||
'kie_edge_labels': [...] # edge labels, shape (N, N)
|
||||
},
|
||||
...
|
||||
],
|
||||
'visualization' : [
|
||||
array(..., dtype=uint8),
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
:::
|
||||
|
||||
:::{group-tab} Standard Inferencer
|
||||
|
||||
````{tabs}
|
||||
```{code-tab} python TextDetInferencer
|
||||
|
||||
{
|
||||
'predictions' : [
|
||||
# Each instance corresponds to an input image
|
||||
{
|
||||
'polygons': [...], # 2d list of len (N,) in the format of [x1, y1, x2, y2, ...]
|
||||
'bboxes': [...], # 2d list of shape (N, 4), in the format of [min_x, min_y, max_x, max_y]
|
||||
'scores': [...] # list of float, len (N, )
|
||||
},
|
||||
]
|
||||
'visualization' : [
|
||||
array(..., dtype=uint8),
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
```{code-tab} python TextRecInferencer
|
||||
{
|
||||
'predictions' : [
|
||||
# Each instance corresponds to an input image
|
||||
{
|
||||
'text': '...', # a string
|
||||
'scores': 0.1, # a float
|
||||
},
|
||||
...
|
||||
]
|
||||
'visualization' : [
|
||||
array(..., dtype=uint8),
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
```{code-tab} python TextSpottingInferencer
|
||||
{
|
||||
'predictions' : [
|
||||
# Each instance corresponds to an input image
|
||||
{
|
||||
'polygons': [...], # 2d list of len (N,) in the format of [x1, y1, x2, y2, ...]
|
||||
'bboxes': [...], # 2d list of shape (N, 4), in the format of [min_x, min_y, max_x, max_y]
|
||||
'scores': [...] # list of float, len (N, )
|
||||
'texts': ['...',] # list of texts, len (N, )
|
||||
},
|
||||
]
|
||||
'visualization' : [
|
||||
array(..., dtype=uint8),
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
```{code-tab} python KIEInferencer
|
||||
{
|
||||
'predictions' : [
|
||||
# Each instance corresponds to an input image
|
||||
{
|
||||
'labels': [...], # node label, len (N,)
|
||||
'scores': [...], # node scores, len (N, )
|
||||
'edge_scores': [...], # edge scores, shape (N, N)
|
||||
'edge_labels': [...], # edge labels, shape (N, N)
|
||||
},
|
||||
]
|
||||
'visualization' : [
|
||||
array(..., dtype=uint8),
|
||||
]
|
||||
}
|
||||
```
|
||||
````
|
||||
|
||||
:::
|
||||
|
||||
`````
|
||||
|
||||
If you wish to get the raw outputs from the model, you can set `return_datasamples` to `True` to get the original [DataSample](structures.md), which will be stored in `predictions`.
|
||||
|
||||
### Dumping Results
|
||||
|
||||
Apart from obtaining predictions from the return value, you can also export the predictions/visualizations to files by setting `out_dir` and `save_pred`/`save_vis` arguments.
|
||||
|
||||
```python
|
||||
>>> inferencer('img_1.jpg', out_dir='outputs/', save_pred=True, save_vis=True)
|
||||
```
|
||||
|
||||
Results in the directory structure like:
|
||||
|
||||
```text
|
||||
outputs
|
||||
├── preds
|
||||
│ └── img_1.json
|
||||
└── vis
|
||||
└── img_1.jpg
|
||||
```
|
||||
|
||||
The filename of each file is the same as the corresponding input image filename. If the input image is an array, the filename will be a number starting from 0.
|
||||
|
||||
### Batch Inference
|
||||
|
||||
You can customize the batch size by setting `batch_size`. The default batch size is 1.
|
||||
|
||||
## API
|
||||
|
||||
Here are extensive lists of parameters that you can use.
|
||||
|
||||
````{tabs}
|
||||
|
||||
```{group-tab} MMOCRInferencer
|
||||
|
||||
**MMOCRInferencer.\_\_init\_\_():**
|
||||
|
||||
| Arguments | Type | Default | Description |
|
||||
| ------------- | ---------------------------------------------------- | ------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `det` | str or [Weights](../modelzoo.html#weights), optional | None | Pretrained text detection algorithm. It's the path to the config file or the model name defined in metafile. |
|
||||
| `det_weights` | str, optional | None | Path to the custom checkpoint file of the selected det model. If it is not specified and "det" is a model name of metafile, the weights will be loaded from metafile. |
|
||||
| `rec` | str or [Weights](../modelzoo.html#weights), optional | None | Pretrained text recognition algorithm. It’s the path to the config file or the model name defined in metafile. |
|
||||
| `rec_weights` | str, optional | None | Path to the custom checkpoint file of the selected rec model. If it is not specified and “rec” is a model name of metafile, the weights will be loaded from metafile. |
|
||||
| `kie` \[1\] | str or [Weights](../modelzoo.html#weights), optional | None | Pretrained key information extraction algorithm. It’s the path to the config file or the model name defined in metafile. |
|
||||
| `kie_weights` | str, optional | None | Path to the custom checkpoint file of the selected kie model. If it is not specified and “kie” is a model name of metafile, the weights will be loaded from metafile. |
|
||||
| `device` | str, optional | None | Device used for inference, accepting all allowed strings by `torch.device`. E.g., 'cuda:0' or 'cpu'. If None, the available device will be automatically used. Defaults to None. |
|
||||
|
||||
\[1\]: `kie` is only effective when both text detection and recognition models are specified.
|
||||
|
||||
```{note}
|
||||
**MMOCRInferencer.\_\_call\_\_()**
|
||||
|
||||
User can use default pretrained models by specifying `det` and/or `recog`, which is equivalent to specifying their corresponding `*_config` and `*_ckpt`. However, manually specifying `*_config` and `*_ckpt` will always override values set by `det` and/or `recog`. Similar rules also apply to `kie`, `kie_config` and `kie_ckpt`.
|
||||
| Arguments | Type | Default | Description |
|
||||
| -------------------- | ----------------------- | ------------ | ------------------------------------------------------------------------------------------------ |
|
||||
| `inputs` | str/list/tuple/np.array | **required** | It can be a path to an image/a folder, an np array or a list/tuple (with img paths or np arrays) |
|
||||
| `return_datasamples` | bool | False | Whether to return results as DataSamples. If False, the results will be packed into a dict. |
|
||||
| `batch_size` | int | 1 | Inference batch size. |
|
||||
| `return_vis` | bool | False | Whether to return the visualization result. |
|
||||
| `print_result` | bool | False | Whether to print the inference result to the console. |
|
||||
| `show` | bool | False | Whether to display the visualization results in a popup window. |
|
||||
| `wait_time` | float | 0 | The interval of show(s). |
|
||||
| `out_dir` | str | `results/` | Output directory of results. |
|
||||
| `save_vis` | bool | False | Whether to save the visualization results to `out_dir`. |
|
||||
| `save_pred` | bool | False | Whether to save the inference results to `out_dir`. |
|
||||
|
||||
```
|
||||
|
||||
### readtext()
|
||||
```{group-tab} Standard Inferencer
|
||||
|
||||
| Arguments | Type | Default | Description |
|
||||
| -------------- | ----------------------- | ------------ | ---------------------------------------------------------------------- |
|
||||
| `img` | str/list/tuple/np.array | **required** | img, folder path, np array or list/tuple (with img paths or np arrays) |
|
||||
| `img_out_dir` | str | None | Output directory of images. |
|
||||
| `show` | bool | False | Whether to show the result visualization on screen |
|
||||
| `print_result` | bool | False | Whether to show the result for each image |
|
||||
**Inferencer.\_\_init\_\_():**
|
||||
|
||||
All arguments are the same for the cli, all you need to do is add 2 hyphens at the beginning of the argument and replace underscores by hyphens.
|
||||
(*Example:* `img_out_dir` becomes `--img-out-dir`)
|
||||
| Arguments | Type | Default | Description |
|
||||
| --------- | ---------------------------------------------------- | ------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `model` | str or [Weights](../modelzoo.html#weights), optional | None | Path to the config file or the model name defined in metafile. |
|
||||
| `weights` | str, optional | None | Path to the custom checkpoint file of the selected det model. If it is not specified and "det" is a model name of metafile, the weights will be loaded from metafile. |
|
||||
| `device` | str, optional | None | Device used for inference, accepting all allowed strings by `torch.device`. E.g., 'cuda:0' or 'cpu'. If None, the available device will be automatically used. Defaults to None. |
|
||||
|
||||
For bool type arguments, putting the argument in the command stores it as true.
|
||||
(*Example:* `python mmocr/demo/ocr.py --det DB_r18 demo/demo_text_det.jpg --print_result`
|
||||
means that `print_result` is set to `True`)
|
||||
**Inferencer.\_\_call\_\_()**
|
||||
|
||||
## Models
|
||||
| Arguments | Type | Default | Description |
|
||||
| -------------------- | ----------------------- | ------------ | ---------------------------------------------------------------------------------------------------------------- |
|
||||
| `inputs` | str/list/tuple/np.array | **required** | It can be a path to an image/a folder, an np array or a list/tuple (with img paths or np arrays) |
|
||||
| `return_datasamples` | bool | False | Whether to return results as DataSamples. If False, the results will be packed into a dict. |
|
||||
| `batch_size` | int | 1 | Inference batch size. |
|
||||
| `progress_bar` | bool | True | Whether to show a progress bar. |
|
||||
| `return_vis` | bool | False | Whether to return the visualization result. |
|
||||
| `print_result` | bool | False | Whether to print the inference result to the console. |
|
||||
| `show` | bool | False | Whether to display the visualization results in a popup window. |
|
||||
| `wait_time` | float | 0 | The interval of show(s). |
|
||||
| `draw_pred` | bool | True | Whether to draw predicted bounding boxes. *Only applicable on `TextDetInferencer` and `TextSpottingInferencer`.* |
|
||||
| `out_dir` | str | `results/` | Output directory of results. |
|
||||
| `save_vis` | bool | False | Whether to save the visualization results to `out_dir`. |
|
||||
| `save_pred` | bool | False | Whether to save the inference results to `out_dir`. |
|
||||
|
||||
**Text detection:**
|
||||
```
|
||||
````
|
||||
|
||||
| Name | Reference |
|
||||
| ------------- | :----------------------------------------------------------------------------: |
|
||||
| DB_r18 | [link](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#dbnet) |
|
||||
| DB_r50 | [link](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#dbnet) |
|
||||
| DBPP_r50 | [link](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#dbnetpp) |
|
||||
| DRRG | [link](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#drrg) |
|
||||
| FCE_IC15 | [link](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#fcenet) |
|
||||
| FCE_CTW_DCNv2 | [link](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#fcenet) |
|
||||
| MaskRCNN_CTW | [link](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#mask-r-cnn) |
|
||||
| MaskRCNN_IC15 | [link](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#mask-r-cnn) |
|
||||
| PANet_CTW | [link](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#panet) |
|
||||
| PANet_IC15 | [link](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#panet) |
|
||||
| PS_CTW | [link](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#psenet) |
|
||||
| PS_IC15 | [link](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#psenet) |
|
||||
| TextSnake | [link](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#textsnake) |
|
||||
## Command Line Interface
|
||||
|
||||
**Text recognition:**
|
||||
```{note}
|
||||
This section is only applicable to `MMOCRInferencer`.
|
||||
```
|
||||
|
||||
| Name | Reference |
|
||||
| ------------- | :---------------------------------------------------------------------------------: |
|
||||
| ABINet | [link](https://mmocr.readthedocs.io/en/dev-1.x/textrecog_models.html#abinet) |
|
||||
| ABINet_Vision | [link](https://mmocr.readthedocs.io/en/dev-1.x/textrecog_models.html#abinet) |
|
||||
| ASTER | [link](https://mmocr.readthedocs.io/en/dev-1.x/textrecog_models.html#aster) |
|
||||
| CRNN | [link](https://mmocr.readthedocs.io/en/dev-1.x/textrecog_models.html#crnn) |
|
||||
| MASTER | [link](https://mmocr.readthedocs.io/en/dev-1.x/textrecog_models.html#master) |
|
||||
| NRTR_1/16-1/8 | [link](https://mmocr.readthedocs.io/en/dev-1.x/textrecog_models.html#nrtr) |
|
||||
| NRTR_1/8-1/4 | [link](https://mmocr.readthedocs.io/en/dev-1.x/textrecog_models.html#nrtr) |
|
||||
| RobustScanner | [link](https://mmocr.readthedocs.io/en/dev-1.x/textrecog_models.html#robustscanner) |
|
||||
| SAR | [link](https://mmocr.readthedocs.io/en/dev-1.x/textrecog_models.html#sar) |
|
||||
| SATRN | [link](https://mmocr.readthedocs.io/en/dev-1.x/textrecog_models.html#satrn) |
|
||||
| SATRN_sm | [link](https://mmocr.readthedocs.io/en/dev-1.x/textrecog_models.html#satrn) |
|
||||
You can use `tools/infer.py` to perform inference through `MMOCRInferencer`.
|
||||
Its general usage is as follows:
|
||||
|
||||
**Key information extraction:**
|
||||
```bash
|
||||
python tools/infer.py INPUT_PATH [--det DET] [--det-weights ...] ...
|
||||
```
|
||||
|
||||
| Name | Reference |
|
||||
| ----- | :----------------------------------------------------------------------------------------------------------------------------------: |
|
||||
| SDMGR | [link](https://mmocr.readthedocs.io/en/dev-1.x/kie_models.html#spatial-dual-modality-graph-reasoning-for-key-information-extraction) |
|
||||
where `INPUT_PATH` is a required field, which should be a path to an image or a folder. Command-line parameters follow the mapping relationship with the Python interface parameters as follows:
|
||||
|
||||
## Additional info
|
||||
- To convert the Python interface parameters to the command line ones, you need to add two `--` in front of the Python interface parameters, and replace the underscore `_` with the hyphen `-`. For example, `out_dir` becomes `--out-dir`.
|
||||
- For boolean type parameters, putting the parameter in the command is equivalent to specifying it as True. For example, `--show` will specify the `show` parameter as True.
|
||||
|
||||
- To perform det + recog inference (end2end ocr), both the `det` and `recog` arguments must be defined.
|
||||
- To perform only detection set the `recog` argument to `None`.
|
||||
- To perform only recognition set the `det` argument to `None`.
|
||||
In addition, the command line will not display the inference result by default. You can use the `--print-result` parameter to view the inference result.
|
||||
|
||||
If you have any suggestions for new features, feel free to open a thread or even PR :)
|
||||
Here is an example:
|
||||
|
||||
```bash
|
||||
python tools/infer.py demo/demo_text_ocr.jpg --det DBNet --rec SAR --show --print-result
|
||||
```
|
||||
|
||||
Running this command will give the following result:
|
||||
|
||||
```bash
|
||||
{'predictions': [{'rec_texts': ['CBank', 'Docbcba', 'GROUP', 'MAUN', 'CROBINSONS', 'AOCOC', '916M3', 'BOO9', 'Oven', 'BRANDS', 'ARETAIL', '14', '70<UKN>S', 'ROUND', 'SALE', 'YEAR', 'ALLY', 'SALE', 'SALE'],
|
||||
'rec_scores': [0.9753464579582214, ...], 'det_polygons': [[551.9930285844646, 411.9138765335083, 553.6153911653112,
|
||||
383.53195309638977, 620.2410061195247, 387.33785033226013, 618.6186435386782, 415.71977376937866], ...], 'det_scores': [0.8230461478233337, ...]}]}
|
||||
```
|
||||
|
|
|
@ -0,0 +1,115 @@
|
|||
import os.path as osp
|
||||
|
||||
from mmengine.fileio import load
|
||||
from tabulate import tabulate
|
||||
|
||||
|
||||
class BaseWeightList:
|
||||
"""Class for generating model list in markdown format.
|
||||
|
||||
Args:
|
||||
dataset_list (list[str]): List of dataset names.
|
||||
table_header (list[str]): List of table header.
|
||||
msg (str): Message to be displayed.
|
||||
task_abbr (str): Abbreviation of task name.
|
||||
metric_name (str): Metric name.
|
||||
"""
|
||||
|
||||
base_url: str = 'https://github.com/open-mmlab/mmocr/blob/1.x/'
|
||||
table_cfg: dict = dict(
|
||||
tablefmt='pipe', floatfmt='.2f', numalign='right', stralign='center')
|
||||
dataset_list: list
|
||||
table_header: list
|
||||
msg: str
|
||||
task_abbr: str
|
||||
metric_name: str
|
||||
|
||||
def __init__(self):
|
||||
data = (d + f' ({self.metric_name})' for d in self.dataset_list)
|
||||
self.table_header = ['Model', 'README', *data]
|
||||
|
||||
def _get_model_info(self, task_name: str):
|
||||
meta_indexes = load('../../model-index.yml')
|
||||
for meta_path in meta_indexes['Import']:
|
||||
meta_path = osp.join('../../', meta_path)
|
||||
metainfo = load(meta_path)
|
||||
collection2md = {}
|
||||
for item in metainfo['Collections']:
|
||||
url = self.base_url + item['README']
|
||||
collection2md[item['Name']] = f'[link]({url})'
|
||||
for item in metainfo['Models']:
|
||||
if task_name not in item['Config']:
|
||||
continue
|
||||
name = f'`{item["Name"]}`'
|
||||
if item.get('Alias', None):
|
||||
if isinstance(item['Alias'], str):
|
||||
item['Alias'] = [item['Alias']]
|
||||
aliases = [f'`{alias}`' for alias in item['Alias']]
|
||||
aliases.append(name)
|
||||
name = ' / '.join(aliases)
|
||||
readme = collection2md[item['In Collection']]
|
||||
eval_res = self._get_eval_res(item)
|
||||
yield (name, readme, *eval_res)
|
||||
|
||||
def _get_eval_res(self, item):
|
||||
eval_res = {k: '-' for k in self.dataset_list}
|
||||
for res in item['Results']:
|
||||
if res['Dataset'] in self.dataset_list:
|
||||
eval_res[res['Dataset']] = res['Metrics'][self.metric_name]
|
||||
return (eval_res[k] for k in self.dataset_list)
|
||||
|
||||
def gen_model_list(self):
|
||||
content = f'\n{self.msg}\n'
|
||||
content += '```{table}\n:class: model-summary nowrap field-list '
|
||||
content += 'table table-hover\n'
|
||||
content += tabulate(
|
||||
self._get_model_info(self.task_abbr), self.table_header,
|
||||
**self.table_cfg)
|
||||
content += '\n```\n'
|
||||
return content
|
||||
|
||||
|
||||
class TextDetWeightList(BaseWeightList):
|
||||
|
||||
dataset_list = ['ICDAR2015', 'CTW1500', 'Totaltext']
|
||||
msg = '### Text Detection'
|
||||
task_abbr = 'textdet'
|
||||
metric_name = 'hmean-iou'
|
||||
|
||||
|
||||
class TextRecWeightList(BaseWeightList):
|
||||
|
||||
dataset_list = [
|
||||
'Avg', 'IIIT5K', 'SVT', 'ICDAR2013', 'ICDAR2015', 'SVTP', 'CT80'
|
||||
]
|
||||
msg = ('### Text Recognition\n'
|
||||
'```{note}\n'
|
||||
'Avg is the average on IIIT5K, SVT, ICDAR2013, ICDAR2015, SVTP,'
|
||||
' CT80.\n```\n')
|
||||
task_abbr = 'textrecog'
|
||||
metric_name = 'word_acc'
|
||||
|
||||
def _get_eval_res(self, item):
|
||||
eval_res = {k: '-' for k in self.dataset_list}
|
||||
avg = []
|
||||
for res in item['Results']:
|
||||
if res['Dataset'] in self.dataset_list:
|
||||
eval_res[res['Dataset']] = res['Metrics'][self.metric_name]
|
||||
avg.append(res['Metrics'][self.metric_name])
|
||||
eval_res['Avg'] = sum(avg) / len(avg)
|
||||
return (eval_res[k] for k in self.dataset_list)
|
||||
|
||||
|
||||
class KIEWeightList(BaseWeightList):
|
||||
|
||||
dataset_list = ['wildreceipt']
|
||||
msg = '### Key Information Extraction'
|
||||
task_abbr = 'kie'
|
||||
metric_name = 'macro_f1'
|
||||
|
||||
|
||||
def gen_weight_list():
|
||||
content = TextDetWeightList().gen_model_list()
|
||||
content += TextRecWeightList().gen_model_list()
|
||||
content += KIEWeightList().gen_model_list()
|
||||
return content
|
|
@ -0,0 +1,31 @@
|
|||
$(document).ready(function () {
|
||||
table = $('.model-summary').DataTable({
|
||||
"stateSave": false,
|
||||
"lengthChange": false,
|
||||
"pageLength": 10,
|
||||
"order": [],
|
||||
"scrollX": true,
|
||||
"columnDefs": [
|
||||
{ "type": "summary", targets: '_all' },
|
||||
]
|
||||
});
|
||||
// Override the default sorting for the summary columns, which
|
||||
// never takes the "-" character into account.
|
||||
jQuery.extend(jQuery.fn.dataTableExt.oSort, {
|
||||
"summary-asc": function (str1, str2) {
|
||||
if (str1 == "<p>-</p>")
|
||||
return 1;
|
||||
if (str2 == "<p>-</p>")
|
||||
return -1;
|
||||
return ((str1 < str2) ? -1 : ((str1 > str2) ? 1 : 0));
|
||||
},
|
||||
|
||||
"summary-desc": function (str1, str2) {
|
||||
if (str1 == "<p>-</p>")
|
||||
return 1;
|
||||
if (str2 == "<p>-</p>")
|
||||
return -1;
|
||||
return ((str1 < str2) ? 1 : ((str1 > str2) ? -1 : 0));
|
||||
}
|
||||
});
|
||||
})
|
|
@ -40,7 +40,7 @@ MMOCR 中对 `InstanceData` 字段的约定如下表所示。值得注意的是
|
|||
| | | |
|
||||
| ----------- | ---------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| 字段 | 类型 | 说明 |
|
||||
| bboxes | `torch.FloatTensor` | 文本边界框 `[x1, x2, y1, y2]`,形状为 `(N, 4)`。 |
|
||||
| bboxes | `torch.FloatTensor` | 文本边界框 `[x1, y1, x2, y2]`,形状为 `(N, 4)`。 |
|
||||
| labels | `torch.LongTensor` | 实例的类别,长度为 `(N, )`。MMOCR 中默认使用 `0` 来表示正样本类,即 “text” 类。 |
|
||||
| polygons | `list[np.array(dtype=np.float32)]` | 表示文本实例的多边形,列表长度为 `(N, )`。 |
|
||||
| scores | `torch.Tensor` | 文本实例检测框的置信度,长度为 `(N, )`。 |
|
||||
|
@ -99,7 +99,7 @@ MMOCR 中对 `LabelData` 字段的约定如下表所示:
|
|||
| | | |
|
||||
| -------- | ---------------------------------- | -------------------------------------------------------------------------------- |
|
||||
| 字段 | 类型 | 说明 |
|
||||
| bboxes | `torch.FloatTensor` | 文本边界框 `[x1, x2, y1, y2]`,形状为 `(N, 4)`。 |
|
||||
| bboxes | `torch.FloatTensor` | 文本边界框 `[x1, y1, x2, y2]`,形状为 `(N, 4)`。 |
|
||||
| labels | `torch.LongTensor` | 实例的类别,长度为 `(N, )`。在 MMOCR 中通常使用 `0` 来表示正样本类,即 “text” 类 |
|
||||
| polygons | `list[np.array(dtype=np.float32)]` | 表示文本实例的多边形,列表长度为 `(N, )`。 |
|
||||
| scores | `torch.Tensor` | 文本实例任务预测的检测框的置信度,长度为 `(N, )`。 |
|
||||
|
@ -182,7 +182,7 @@ data_sample.pred_text = pred_text
|
|||
| | | |
|
||||
| ----------- | ------------------- | --------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| 字段 | 类型 | 说明 |
|
||||
| bboxes | `torch.Tensor` | 文本边界框 `[x1, x2, y1, y2]`,形状为 `(N, 4)`。 |
|
||||
| bboxes | `torch.Tensor` | 文本边界框 `[x1, y1, x2, y2]`,形状为 `(N, 4)`。 |
|
||||
| labels | `torch.LongTensor` | 实例的类别,长度为 `(N, )`。在 MMOCR 中通常为 0,即 “text” 类。 |
|
||||
| texts | `list[str]` | 实例对应的文本,长度为 `(N, )` ,用于端到端 OCR 任务和 KIE 任务。 |
|
||||
| edge_labels | `torch.IntTensor` | 节点之间的邻接矩阵,形状为 `(N, N)`。在 KIE 任务中,节点之间状态的可选值为 `-1` (不关心,且不参与 loss 计算),`0` (断开)和 `1` (连接)。 |
|
||||
|
|
|
@ -48,6 +48,7 @@ extensions = [
|
|||
'sphinx.ext.autodoc.typehints',
|
||||
'sphinx.ext.autosummary',
|
||||
'sphinx.ext.autosectionlabel',
|
||||
'sphinx_tabs.tabs',
|
||||
]
|
||||
autodoc_typehints = 'description'
|
||||
|
||||
|
@ -57,6 +58,8 @@ autosummary_generate = True # Turn on sphinx.ext.autosummary
|
|||
copybutton_prompt_text = r'>>> |\.\.\. '
|
||||
copybutton_prompt_is_regexp = True
|
||||
|
||||
myst_enable_extensions = ['colon_fence']
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
templates_path = ['_templates']
|
||||
|
||||
|
@ -145,8 +148,16 @@ master_doc = 'index'
|
|||
# relative to this directory. They are copied after the builtin static files,
|
||||
# so a file named "default.css" will overwrite the builtin "default.css".
|
||||
html_static_path = ['_static']
|
||||
html_css_files = ['css/readthedocs.css']
|
||||
html_js_files = ['js/collapsed.js']
|
||||
html_css_files = [
|
||||
'https://cdn.datatables.net/1.13.2/css/dataTables.bootstrap5.min.css',
|
||||
'css/readthedocs.css'
|
||||
]
|
||||
html_js_files = [
|
||||
'https://cdn.datatables.net/1.13.2/js/jquery.dataTables.min.js',
|
||||
'https://cdn.datatables.net/1.13.2/js/dataTables.bootstrap5.min.js',
|
||||
'js/collapsed.js',
|
||||
'js/table.js',
|
||||
]
|
||||
|
||||
myst_heading_anchors = 4
|
||||
|
||||
|
|
|
@ -27,41 +27,42 @@ conda activate openmmlab
|
|||
|
||||
**第三步** 依照[官方指南](https://pytorch.org/get-started/locally/),安装 PyTorch。
|
||||
|
||||
在 GPU 平台上:
|
||||
````{tabs}
|
||||
|
||||
```shell
|
||||
```{code-tab} shell GPU 平台
|
||||
conda install pytorch torchvision -c pytorch
|
||||
```
|
||||
|
||||
在 CPU 平台上:
|
||||
|
||||
```shell
|
||||
```{code-tab} shell CPU 平台
|
||||
conda install pytorch torchvision cpuonly -c pytorch
|
||||
```
|
||||
|
||||
````
|
||||
|
||||
## 安装步骤
|
||||
|
||||
我们建议大多数用户采用我们的推荐方式安装 MMOCR。倘若你需要更灵活的安装过程,则可以参考[自定义安装](#自定义安装)一节。
|
||||
|
||||
### 推荐步骤
|
||||
|
||||
**第一步** 使用 [MIM](https://github.com/open-mmlab/mim) 安装 [MMEngine](https://github.com/open-mmlab/mmengine) and [MMCV](https://github.com/open-mmlab/mmcv).
|
||||
**第一步** 使用 [MIM](https://github.com/open-mmlab/mim) 安装 [MMEngine](https://github.com/open-mmlab/mmengine), [MMCV](https://github.com/open-mmlab/mmcv) 和 [MMDetection](https://github.com/open-mmlab/mmdetection)。
|
||||
|
||||
```shell
|
||||
pip install -U openmim
|
||||
mim install mmengine
|
||||
mim install 'mmcv>=2.0.0rc1'
|
||||
mim install 'mmdet>=3.0.0rc0'
|
||||
```
|
||||
|
||||
**第二步** 将 [MMDetection](https://github.com/open-mmlab/mmdetection) 以依赖库的形式安装。
|
||||
**第二步** 安装 MMOCR.
|
||||
|
||||
```shell
|
||||
pip install 'mmdet>=3.0.0rc0'
|
||||
```
|
||||
若你需要直接运行 MMOCR 或在其基础上进行开发,则通过源码安装(推荐)。
|
||||
|
||||
**第三步** 安装 MMOCR.
|
||||
如果你将 MMOCR 作为一个外置依赖库使用,则可以通过 MIM 安装。
|
||||
|
||||
情况1: 若你需要直接运行 MMOCR 或在其基础上进行开发,则通过源码安装:
|
||||
`````{tabs}
|
||||
|
||||
````{group-tab} 源码安装
|
||||
|
||||
```shell
|
||||
git clone https://github.com/open-mmlab/mmocr.git
|
||||
|
@ -73,21 +74,42 @@ pip install -v -e .
|
|||
# "-e" 会以可编辑的方式安装该代码库,你对该代码库所作的任何更改都会立即生效
|
||||
```
|
||||
|
||||
情况2:如果你将 MMOCR 作为一个外置依赖库使用,通过 pip 安装即可:
|
||||
````
|
||||
|
||||
````{group-tab} MIM 安装
|
||||
|
||||
```shell
|
||||
pip install 'mmocr>=1.0.0rc0'
|
||||
|
||||
mim install 'mmocr>=1.0.0rc0'
|
||||
|
||||
```
|
||||
|
||||
**第四步(可选)** 如果你需要使用与 `albumentations` 有关的变换,比如 ABINet 数据流水线中的 `Albu`,请使用以下命令安装依赖:
|
||||
````
|
||||
|
||||
`````
|
||||
|
||||
**第三步(可选)** 如果你需要使用与 `albumentations` 有关的变换,比如 ABINet 数据流水线中的 `Albu`,请使用以下命令安装依赖:
|
||||
|
||||
`````{tabs}
|
||||
|
||||
````{group-tab} 源码安装
|
||||
|
||||
```shell
|
||||
# 若 MMOCR 通过源码安装
|
||||
pip install -r requirements/albu.txt
|
||||
# 若 MMOCR 通过 pip 安装
|
||||
```
|
||||
|
||||
````
|
||||
|
||||
````{group-tab} MIM 安装
|
||||
|
||||
```shell
|
||||
pip install albumentations>=1.1.0 --no-binary qudida,albumentations
|
||||
```
|
||||
|
||||
````
|
||||
|
||||
`````
|
||||
|
||||
```{note}
|
||||
|
||||
我们建议在安装 `albumentations` 之后检查当前环境,确保 `opencv-python` 和 `opencv-python-headless` 没有同时被安装,否则有可能会产生一些无法预知的错误。如果它们不巧同时存在于环境当中,请卸载 `opencv-python-headless` 以确保 MMOCR 的可视化工具可以正常运行。
|
||||
|
@ -98,29 +120,48 @@ pip install albumentations>=1.1.0 --no-binary qudida,albumentations
|
|||
|
||||
### 检验
|
||||
|
||||
根据安装方式的不同,我们提供了验证安装正确性的方法。若 MMOCR 的安装无误,你在这一节完成后应当能看到以图片和文字形式表示的识别结果,示意如下:
|
||||
你可以通过运行一个简单的推理任务来检验 MMOCR 的安装是否成功。
|
||||
|
||||
`````{tabs}
|
||||
|
||||
````{tab} Python
|
||||
|
||||
在 Python 中运行以下代码:
|
||||
|
||||
```python
|
||||
>>> from mmocr.apis import MMOCRInferencer
|
||||
>>> ocr = MMOCRInferencer(det='DBNet', rec='CRNN')
|
||||
>>> ocr('demo/demo_text_ocr.jpg', show=True, print_result=True)
|
||||
```
|
||||
````
|
||||
|
||||
````{tab} Shell
|
||||
|
||||
如果你是通过源码安装的 MMOCR,你可以在 MMOCR 的根目录下运行以下命令:
|
||||
|
||||
```shell
|
||||
python tools/infer.py demo/demo_text_ocr.jpg --det DBNet --rec CRNN --show --print-result
|
||||
```
|
||||
````
|
||||
|
||||
`````
|
||||
|
||||
若 MMOCR 的安装无误,你在这一节完成后应当能看到以图片和文字形式表示的识别结果:
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/24622904/187825445-d30cbfa6-5549-4358-97fe-245f08f4ed94.jpg" height="250"/>
|
||||
</div>
|
||||
<br/ >
|
||||
|
||||
```bash
|
||||
# 识别结果
|
||||
{'rec_texts': ['cbanke', 'docece', 'sroumats', 'chounsonse', 'doceca', 'c', '', 'sond', 'abrandso', 'sretane', '1', 'tosl', 'roundi', 'slen', 'yet', 'ally', 's', 'sue', 'salle', 'v'], 'rec_scores': [...], 'det_polygons': [...], 'det_scores': tensor([...])}
|
||||
{'predictions': [{'rec_texts': ['cbanks', 'docecea', 'grouf', 'pwate', 'chobnsonsg', 'soxee', 'oeioh', 'c', 'sones', 'lbrandec', 'sretalg', '11', 'to8', 'round', 'sale', 'year',
|
||||
'ally', 'sie', 'sall'], 'rec_scores': [...], 'det_polygons': [...], 'det_scores':
|
||||
[...]}]}
|
||||
```
|
||||
|
||||
在 MMOCR 的目录运行以下命令:
|
||||
|
||||
```bash
|
||||
python mmocr/ocr.py --det DB_r18 --recog CRNN demo/demo_text_ocr.jpg --show
|
||||
```
|
||||
|
||||
也可以在 Python 解释器中运行以下代码:
|
||||
|
||||
```python
|
||||
from mmocr.ocr import MMOCR
|
||||
ocr = MMOCR(recog='CRNN', det='DB_r18')
|
||||
ocr.readtext('demo_text_ocr.jpg', show=True)
|
||||
```{note}
|
||||
如果你在没有 GUI 的服务器上运行 MMOCR,或者通过没有开启 X11 转发的 SSH 隧道运行 MMOCR,你可能无法看到弹出的窗口。
|
||||
```
|
||||
|
||||
## 自定义安装
|
||||
|
|
|
@ -1,17 +1,37 @@
|
|||
# 快速运行
|
||||
|
||||
这个章节会介绍 MMOCR 的一些基本功能。我们假设你已经[从源码安装了 MMOCR](install.md#best-practices)。
|
||||
|
||||
## 推理
|
||||
|
||||
如果想快速运行一个推理,请直接阅读安装文档的[检验](install.md#检验)。对 MMOCR 中推理接口更为详细说明,可以在[这里](../user_guides/inference.md)找到。
|
||||
在 MMOCR 的根目录下运行以下命令:
|
||||
|
||||
```shell
|
||||
python tools/infer.py demo/demo_text_ocr.jpg --det DBNet --rec CRNN --show --print-result
|
||||
```
|
||||
|
||||
你可以看到弹出的预测结果,以及在控制台中打印出的推理结果。
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/24622904/187825445-d30cbfa6-5549-4358-97fe-245f08f4ed94.jpg" height="250"/>
|
||||
</div>
|
||||
<br/ >
|
||||
|
||||
```bash
|
||||
# 识别结果
|
||||
{'predictions': [{'rec_texts': ['cbanks', 'docecea', 'grouf', 'pwate', 'chobnsonsg', 'soxee', 'oeioh', 'c', 'sones', 'lbrandec', 'sretalg', '11', 'to8', 'round', 'sale', 'year',
|
||||
'ally', 'sie', 'sall'], 'rec_scores': [...], 'det_polygons': [...], 'det_scores':
|
||||
[...]}]}
|
||||
```
|
||||
|
||||
```{note}
|
||||
如果你在没有 GUI 的服务器上运行 MMOCR,或者通过没有开启 X11 转发的 SSH 隧道运行 MMOCR,你可能无法看到弹出的窗口。
|
||||
```
|
||||
|
||||
对 MMOCR 中推理接口更为详细的说明,可以在[这里](../user_guides/inference.md)找到。
|
||||
|
||||
除了使用我们提供好的预训练模型,用户也可以在自己的数据集上训练流行模型。接下来我们以在迷你的 [ICDAR 2015](https://rrc.cvc.uab.es/?ch=4&com=downloads) 数据集上训练 DBNet 为例,带大家熟悉 MMOCR 的基本功能。
|
||||
|
||||
接下来的部分都假设你使用的是[编辑方式安装 MMOCR 代码库](install.md)。
|
||||
|
||||
```
|
||||
|
||||
## 准备数据集
|
||||
|
||||
由于 OCR 任务的数据集种类多样,格式不一,不利于多数据集的切换和联合训练,因此 MMOCR 约定了一种[统一的数据格式](../user_guides/dataset_prepare.md),并针对常用的 OCR 数据集提供了[一键式数据准备脚本](../user_guides/data_prepare/dataset_preparer.md)。通常,要在 MMOCR 中使用数据集,你只需要按照对应步骤运行指令即可。
|
||||
|
@ -20,12 +40,12 @@
|
|||
但我们亦深知,效率就是生命——尤其对想要快速上手 MMOCR 的你来说。
|
||||
```
|
||||
|
||||
在这里,我们准备了一个用于演示的精简版 ICDAR 2015 数据集。下载我们预先准备好的[压缩包](https://download.openmmlab.com/mmocr/data/icdar2015/mini_icdar2015.tar.gz),解压到 mmocr 的 `data/det/` 目录下,就能得到我们准备好的图片和标注文件。
|
||||
在这里,我们准备了一个用于演示的精简版 ICDAR 2015 数据集。下载我们预先准备好的[压缩包](https://download.openmmlab.com/mmocr/data/icdar2015/mini_icdar2015.tar.gz),解压到 mmocr 的 `data/` 目录下,就能得到我们准备好的图片和标注文件。
|
||||
|
||||
```Bash
|
||||
wget https://download.openmmlab.com/mmocr/data/icdar2015/mini_icdar2015.tar.gz
|
||||
mkdir -p data/det/
|
||||
tar xzvf mini_icdar2015.tar.gz -C data/det/
|
||||
mkdir -p data/
|
||||
tar xzvf mini_icdar2015.tar.gz -C data/
|
||||
```
|
||||
|
||||
## 修改配置
|
||||
|
|
|
@ -5,5 +5,3 @@ sed -e '$a\\n' -s ../../configs/kie/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/
|
|||
sed -e '$a\\n' -s ../../configs/textdet/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# 文本检测模型' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmocr/tree/master/=g' >textdet_models.md
|
||||
sed -e '$a\\n' -s ../../configs/textrecog/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# 文本识别模型' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmocr/tree/master/=g' >textrecog_models.md
|
||||
sed -e '$a\\n' -s ../../configs/backbone/*/*.md | sed "s/md###t/html#t/g" | sed "s/#/#&/" | sed '1i\# 骨干网络' | sed 's/](\/docs\//](/g' | sed 's=](/=](https://github.com/open-mmlab/mmocr/tree/master/=g' >backbones.md
|
||||
# replace special symbols in inference.md
|
||||
sed -i 's/:heavy_check_mark:/Yes/g' user_guides/inference.md && sed -i 's/:x:/No/g' user_guides/inference.md
|
||||
|
|
|
@ -1,12 +1,12 @@
|
|||
#!/usr/bin/env python
|
||||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import functools as func
|
||||
import glob
|
||||
import re
|
||||
from os.path import basename, splitext
|
||||
|
||||
import numpy as np
|
||||
import titlecase
|
||||
from weight_list import gen_weight_list
|
||||
|
||||
|
||||
def title2anchor(name):
|
||||
|
@ -16,7 +16,9 @@ def title2anchor(name):
|
|||
|
||||
# Count algorithms
|
||||
|
||||
files = sorted(glob.glob('*_models.md'))
|
||||
files = [
|
||||
'backbones.md', 'textdet_models.md', 'textrecog_models.md', 'kie_models.md'
|
||||
]
|
||||
|
||||
stats = []
|
||||
|
||||
|
@ -51,7 +53,7 @@ for f in files:
|
|||
re.search(
|
||||
rf'\btitle\s*=\s*{{\s*{q}\s*}}.*?\n## (.*?)\s*[,;]?\s*\n',
|
||||
revcontent, re.DOTALL | re.IGNORECASE).group(1))
|
||||
paperlinks[p] = f'[{p}]({splitext(basename(f))[0]}.html#{paper_link})'
|
||||
paperlinks[p] = f'[{p}]({splitext(basename(f))[0]}.md#{paper_link})'
|
||||
paperlist = '\n'.join(
|
||||
sorted(f' - [{t}] {paperlinks[x]}' for t, x in papers))
|
||||
# count configs
|
||||
|
@ -89,8 +91,30 @@ papertypes, papercounts = np.unique([t for t, _ in allpapers],
|
|||
countstr = '\n'.join(
|
||||
[f' - {t}: {c}' for t, c in zip(papertypes, papercounts)])
|
||||
|
||||
# get model list
|
||||
weight_list = gen_weight_list()
|
||||
|
||||
modelzoo = f"""
|
||||
# 统计数据
|
||||
# 总览
|
||||
|
||||
## 权重
|
||||
以下是可用于[推理](user_guides/inference.md)的权重列表。
|
||||
|
||||
为了便于使用,有的权重可能会存在多个较短的别名,这在表格中将用“/”分隔。
|
||||
|
||||
例如,表格中展示的 `DB_r18 / dbnet_resnet18_fpnc_1200e_icdar2015` 表示您可以使用
|
||||
`DB_r18` 或 `dbnet_resnet18_fpnc_1200e_icdar2015` 来初始化推理器:
|
||||
|
||||
```python
|
||||
>>> from mmocr.apis import TextDetInferencer
|
||||
>>> inferencer = TextDetInferencer(model='DB_r18')
|
||||
>>> # 等价于
|
||||
>>> inferencer = TextDetInferencer(model='dbnet_resnet18_fpnc_1200e_icdar2015')
|
||||
```
|
||||
|
||||
{weight_list}
|
||||
|
||||
## 统计数据
|
||||
|
||||
* 模型权重文件数量: {len(allckpts)}
|
||||
* 配置文件数量: {len(allconfigs)}
|
||||
|
@ -98,7 +122,7 @@ modelzoo = f"""
|
|||
{countstr}
|
||||
|
||||
{msglist}
|
||||
"""
|
||||
""" # noqa
|
||||
|
||||
with open('modelzoo.md', 'w') as f:
|
||||
f.write(modelzoo)
|
||||
|
|
|
@ -1,192 +1,531 @@
|
|||
# 推理
|
||||
|
||||
MMOCR 为示例和应用,以 [ocr.py](/mmocr/ocr.py) 脚本形式,提供了方便使用的 API。
|
||||
在 OpenMMLab 中,所有的推理操作都被统一到了推理器 `Inferencer` 中。推理器被设计成为一个简洁易用的 API,它在不同的 OpenMMLab 库中都有着非常相似的接口。
|
||||
|
||||
该 API 可以通过命令行执行,也可以在 python 脚本内调用。在该 API 里,MMOCR 里的所有模型能以独立模块的形式被调用或串联。
|
||||
MMOCR 中存在两种不同的推理器:
|
||||
|
||||
```{warning}
|
||||
该脚本仍在重构过程中,在接下来的版本接口中有可能会发生变化。
|
||||
```
|
||||
- **标准推理器**:MMOCR 中的每个基本任务都有一个标准推理器,即 `TextDetInferencer`(文本检测),`TextRecInferencer`(文本识别),`TextSpottingInferencer`(端到端 OCR) 和 `KIEInferencer`(关键信息提取)。它们具有非常相似的接口,具有标准的输入/输出协议,并且总体遵循 OpenMMLab 的设计。这些推理器也可以被串联在一起,以便对一系列任务进行推理。
|
||||
- **MMOCRInferencer**:我们还提供了 `MMOCRInferencer`,一个专门为 MMOCR 设计的便捷推理接口。它封装和链接了 MMOCR 中的所有推理器,因此用户可以使用此推理器对图像执行一系列任务,并直接获得最终结果。*但是,它的接口与标准推理器有一些不同,并且为了简单起见,可能会牺牲一些标准的推理器功能。*
|
||||
|
||||
## 案例一:文本检测
|
||||
对于新用户,我们建议使用 **MMOCRInferencer** 来测试不同模型的组合。
|
||||
|
||||
如果你是开发人员并希望将模型集成到自己的项目中,我们建议使用**标准推理器**,因为它们更灵活且标准化,并具有完整的功能。
|
||||
|
||||
## 基础用法
|
||||
|
||||
`````{tabs}
|
||||
|
||||
````{group-tab} MMOCRInferencer
|
||||
|
||||
目前,`MMOCRInferencer` 可以对以下任务进行推理:
|
||||
|
||||
- 文本检测
|
||||
- 文本识别
|
||||
- OCR(文本检测 + 文本识别)
|
||||
- 关键信息提取(文本检测 + 文本识别 + 关键信息提取)
|
||||
- *OCR(text spotting)*(即将推出)
|
||||
|
||||
为了便于使用,`MMOCRInferencer` 向用户提供了 Python 接口和命令行接口。例如,如果你想要对 demo/demo_text_ocr.jpg 进行 OCR 推理,使用 `DBNet` 作为文本检测模型,`CRNN` 作为文本识别模型,只需执行以下命令:
|
||||
|
||||
::::{tabs}
|
||||
|
||||
:::{code-tab} python
|
||||
>>> from mmocr.apis import MMOCRInferencer
|
||||
>>> # 读取模型
|
||||
>>> ocr = MMOCRInferencer(det='DBNet', rec='SAR')
|
||||
>>> # 进行推理并可视化结果
|
||||
>>> ocr('demo/demo_text_ocr.jpg', show=True)
|
||||
:::
|
||||
|
||||
:::{code-tab} bash 命令行
|
||||
python tools/infer.py demo/demo_text_ocr.jpg --det DBNet --rec SAR --show
|
||||
:::
|
||||
::::
|
||||
|
||||
可视化结果将被显示在一个新窗口中:
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/24622904/187825864-8ead5acb-c3c5-443b-bd90-3f4b188fa315.jpg" height="250"/>
|
||||
<img src="https://user-images.githubusercontent.com/22607038/220563262-e9c1ab52-9b96-4d9c-bcb6-f55ff0b9e1be.png" height="250"/>
|
||||
</div>
|
||||
|
||||
**注:** 使用 TextSnake 检测模型对图像上的文本进行检测,并保存可视化的文件。
|
||||
|
||||
- 命令行执行:
|
||||
|
||||
```shell
|
||||
python mmocr/ocr.py demo/demo_text_det.jpg --det TextSnake --img-out-dir demo/
|
||||
```{note}
|
||||
如果你在没有 GUI 的服务器上运行 MMOCR,或者是通过禁用 X11 转发的 SSH 隧道运行该指令,`show` 选项将不起作用。然而,你仍然可以通过设置 `out_dir` 和 `save_vis=True` 参数将可视化数据保存到文件。阅读 [储存结果](#储存结果) 了解详情。
|
||||
```
|
||||
|
||||
- Python 调用:
|
||||
根据初始化参数,`MMOCRInferencer`可以在不同模式下运行。例如,如果初始化时指定了 `det`、`rec` 和 `kie`,它可以在 KIE 模式下运行。
|
||||
|
||||
```python
|
||||
from mmocr.ocr import MMOCR
|
||||
::::{tabs}
|
||||
|
||||
# 导入模型到内存
|
||||
ocr = MMOCR(det='TextSnake')
|
||||
:::{code-tab} python
|
||||
>>> kie = MMOCRInferencer(det='DBNet', rec='SAR', kie='SDMGR')
|
||||
>>> kie('demo/demo_kie.jpeg', show=True)
|
||||
:::
|
||||
|
||||
# 推理
|
||||
results = ocr.readtext('demo/demo_text_det.jpg', img_out_dir='demo/')
|
||||
```
|
||||
:::{code-tab} bash 命令行
|
||||
python tools/infer.py demo/demo_kie.jpeg --det DBNet --rec SAR --kie SDMGR --show
|
||||
:::
|
||||
|
||||
## 案例二:文本检测+识别
|
||||
::::
|
||||
|
||||
可视化结果如下:
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/24622904/187825445-d30cbfa6-5549-4358-97fe-245f08f4ed94.jpg" height="250"/>
|
||||
<img src="https://user-images.githubusercontent.com/22607038/220569700-fd4894bc-f65a-405e-95e7-ebd2d614aedd.png" height="250"/>
|
||||
</div>
|
||||
<br />
|
||||
|
||||
**注:** 使用 DB_r18 检测模型和 CRNN 识别模型,对 demo/demo_text_det.jpg 图片执行 ocr(检测+识别)推理,在终端打印结果并展示可视化结果。
|
||||
可以见到,MMOCRInferencer 的 Python 接口与命令行接口的使用方法非常相似。下文将以 Python 接口为例,介绍 MMOCRInferencer 的具体用法。关于命令行接口的更多信息,请参考 [命令行接口](#命令行接口)。
|
||||
|
||||
- 命令行执行:
|
||||
|
||||
```shell
|
||||
python mmocr/ocr.py --det DB_r18 --recog CRNN demo/demo_text_ocr.jpg --print-result --show
|
||||
```
|
||||
````
|
||||
|
||||
```{note}
|
||||
````{group-tab} 标准推理器
|
||||
|
||||
当用户从命令行执行脚本时,默认配置文件都会保存在 `configs/` 目录下。用户可以通过指定 `config_dir` 的值来自定义读取配置文件的文件夹。
|
||||
|
||||
```
|
||||
|
||||
- Python 调用:
|
||||
通常,OpenMMLab 中的所有标准推理器都具有非常相似的接口。下面的例子展示了如何使用 `TextDetInferencer` 对单个图像进行推理。
|
||||
|
||||
```python
|
||||
from mmocr.ocr import MMOCR
|
||||
|
||||
# 导入模型到内存
|
||||
ocr = MMOCR(det='DB_r18', recog='CRNN')
|
||||
|
||||
# 推理
|
||||
results = ocr.readtext('demo/demo_text_ocr.jpg', print_result=True, show=True)
|
||||
>>> from mmocr.apis import TextDetInferencer
|
||||
>>> # 读取模型
|
||||
>>> inferencer = TextDetInferencer(model='DBNet')
|
||||
>>> # 推理
|
||||
>>> inferencer('demo/demo_text_ocr.jpg', show=True)
|
||||
```
|
||||
|
||||
## 案例三: 文本检测+识别+关键信息提取
|
||||
可视化结果如图:
|
||||
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/24622904/187825451-6b043df9-10f7-4656-a528-45fe043df92b.jpg" height="250"/>
|
||||
<img src="https://user-images.githubusercontent.com/22607038/221418215-2431d0e9-e16e-4deb-9c52-f8b86801706a.png" height="250"/>
|
||||
</div>
|
||||
|
||||
**注:** 首先,使用 DB_r18 检测模型和 CRNN 识别模型,进行端到端的 ocr (检测+识别)推理,然后对得到的结果,使用 SDMGR 模型提取关键信息(KIE),并展示可视化结果。
|
||||
````
|
||||
|
||||
- 命令行执行:
|
||||
`````
|
||||
|
||||
```shell
|
||||
python mmocr/ocr.py demo/demo_kie.jpeg --det DB_r18 --recog CRNN --kie SDMGR --print-result --show
|
||||
```
|
||||
## 初始化
|
||||
|
||||
```{note}
|
||||
每个推理器必须使用一个模型进行初始化。初始化时,可以手动选择推理设备。
|
||||
|
||||
当用户从命令行执行脚本时,默认配置文件都会保存在 `configs/` 目录下。用户可以通过指定 `config_dir` 的值来自定义读取配置文件的文件夹。
|
||||
### 模型初始化
|
||||
|
||||
```
|
||||
`````{tabs}
|
||||
|
||||
- Python 调用:
|
||||
````{group-tab} MMOCRInferencer
|
||||
|
||||
对于每个任务,`MMOCRInferencer` 需要两个参数 `xxx` 和 `xxx_weights` (例如 `det` 和 `det_weights`)以对模型进行初始化。此处将以`det`和`det_weights`为例来说明一些典型的初始化模型的方法。
|
||||
|
||||
- 要用 MMOCR 的预训练模型进行推理,只需要把它的名字传给参数 `det`,权重将自动从 OpenMMLab 的模型库中下载和加载。[此处](../modelzoo.md#权重)记录了 MMOCR 中可以通过该方法初始化的所有模型。
|
||||
|
||||
```python
|
||||
>>> MMOCRInferencer(det='DBNet')
|
||||
```
|
||||
|
||||
- 要加载自定义的配置和权重,你可以把配置文件的路径传给 `det`,把权重的路径传给 `det_weights`。
|
||||
|
||||
```python
|
||||
>>> MMOCRInferencer(det='path/to/dbnet_config.py', det_weights='path/to/dbnet.pth')
|
||||
```
|
||||
|
||||
如果需要查看更多的初始化方法,请点击“标准推理器”选项卡。
|
||||
|
||||
````
|
||||
|
||||
````{group-tab} 标准推理器
|
||||
|
||||
每个标准的 `Inferencer` 都接受两个参数,`model` 和 `weights` 。在 MMOCRInferencer 中,这两个参数分别对应 `xxx` 和 `xxx_weights` (例如 `det` 和 `det_weights`)。
|
||||
|
||||
- `model` 接受模型的名称或配置文件的路径作为输入。模型的名称从 [model-index.yml](https://github.com/open-mmlab/mmocr/blob/1.x/model-index.yml) 中的模型的元文件([示例](https://github.com/open-mmlab/mmocr/blob/1.x/configs/textdet/dbnet/metafile.yml) )中获取。你可以在[此处](../modelzoo.md#权重)找到可用权重的列表。
|
||||
|
||||
- `weights` 接受权重文件的路径。
|
||||
|
||||
<br />
|
||||
|
||||
此处列举了一些常见的初始化模型的方法。
|
||||
|
||||
- 你可以通过传递模型的名称给 `model` 来推理 MMOCR 的预训练模型。权重将会自动从 OpenMMLab 的模型库中下载并加载。
|
||||
|
||||
```python
|
||||
>>> from mmocr.apis import TextDetInferencer
|
||||
>>> inferencer = TextDetInferencer(model='DBNet')
|
||||
```
|
||||
|
||||
```{note}
|
||||
模型与推理器的任务种类必须匹配。
|
||||
```
|
||||
|
||||
你可以通过将权重的路径或 URL 传递给 `weights` 来让推理器加载自定义的权重。
|
||||
|
||||
```python
|
||||
>>> inferencer = TextDetInferencer(model='DBNet', weights='path/to/dbnet.pth')
|
||||
```
|
||||
|
||||
- 如果有自定义的配置和权重,你可以将配置文件的路径传递给 `model`,将权重的路径传递给 `weights`。
|
||||
|
||||
```python
|
||||
>>> inferencer = TextDetInferencer(model='path/to/dbnet_config.py', weights='path/to/dbnet.pth')
|
||||
```
|
||||
|
||||
- 默认情况下,[MMEngine](https://github.com/open-mmlab/mmengine/) 会在训练模型时自动将配置文件转储到权重文件中。如果你有一个在 MMEngine 上训练的权重,你也可以将权重文件的路径传递给 `weights`,而不需要指定 `model`:
|
||||
|
||||
```python
|
||||
>>> # 如果无法在权重中找到配置文件,则会引发错误
|
||||
>>> inferencer = TextDetInferencer(weights='path/to/dbnet.pth')
|
||||
```
|
||||
|
||||
- 传递配置文件到 `model` 而不指定 `weight` 则会产生一个随机初始化的模型。
|
||||
|
||||
````
|
||||
`````
|
||||
|
||||
### 推理设备
|
||||
|
||||
每个推理器实例都会跟一个设备绑定。默认情况下,最佳设备是由 [MMEngine](https://github.com/open-mmlab/mmengine/) 自动决定的。你也可以通过指定 `device` 参数来改变设备。例如,你可以使用以下代码在 GPU 1上创建一个推理器。
|
||||
|
||||
`````{tabs}
|
||||
|
||||
````{group-tab} MMOCRInferencer
|
||||
|
||||
```python
|
||||
from mmocr.ocr import MMOCR
|
||||
|
||||
# 导入模型到内存
|
||||
ocr = MMOCR(det='DB_r18', recog='CRNN', kie='SDMGR')
|
||||
|
||||
# 推理
|
||||
results = ocr.readtext('demo/demo_kie.jpeg', print_result=True, show=True)
|
||||
>>> inferencer = MMOCRInferencer(det='DBNet', device='cuda:1')
|
||||
```
|
||||
|
||||
## API 参数
|
||||
````
|
||||
|
||||
该 API 有多个可供使用的参数列表。下表是 python 接口的参数。
|
||||
````{group-tab} 标准推理器
|
||||
|
||||
**MMOCR():**
|
||||
```python
|
||||
>>> inferencer = TextDetInferencer(model='DBNet', device='cuda:1')
|
||||
```
|
||||
|
||||
| 参数 | 类型 | 默认值 | 描述 |
|
||||
| -------------- | ------------------ | -------- | ---------------------------------------------------------------------------------------- |
|
||||
| `det` | 参考 **模型** 章节 | None | 文本检测算法 |
|
||||
| `recog` | 参考 **模型** 章节 | None | 文本识别算法 |
|
||||
| `kie` \[1\] | 参考 **模型** 章节 | None | 关键信息提取算法 |
|
||||
| `config_dir` | str | configs/ | 用于存放所有配置文件的文件夹路径 |
|
||||
| `det_config` | str | None | 指定检测模型的自定义配置文件路径 |
|
||||
| `det_ckpt` | str | None | 指定检测模型的自定义参数文件路径 |
|
||||
| `recog_config` | str | None | 指定识别模型的自定义配置文件路径 |
|
||||
| `recog_ckpt` | str | None | 指定识别模型的自定义参数文件路径 |
|
||||
| `kie_config` | str | None | 指定关键信息提取模型的自定义配置路径 |
|
||||
| `kie_ckpt` | str | None | 指定关键信息提取的自定义参数文件路径 |
|
||||
| `device` | str | None | 推理时使用的设备标识, 支持 `torch.device` 所包含的所有设备字符. 例如, 'cuda:0' 或 'cpu'. |
|
||||
````
|
||||
|
||||
\[1\]: `kie` 当且仅当同时指定了文本检测和识别模型时才有效。
|
||||
`````
|
||||
|
||||
如要在 CPU 上创建一个推理器:
|
||||
|
||||
`````{tabs}
|
||||
|
||||
````{group-tab} MMOCRInferencer
|
||||
|
||||
```python
|
||||
>>> inferencer = MMOCRInferencer(det='DBNet', device='cpu')
|
||||
```
|
||||
|
||||
````
|
||||
|
||||
````{group-tab} 标准推理器
|
||||
|
||||
```python
|
||||
>>> inferencer = TextDetInferencer(model='DBNet', device='cpu')
|
||||
```
|
||||
|
||||
````
|
||||
|
||||
`````
|
||||
|
||||
请参考 [torch.device](torch.device) 了解 `device` 参数支持的所有形式。
|
||||
|
||||
## 推理
|
||||
|
||||
当推理器初始化后,你可以直接传入要推理的原始数据,从返回值中获取推理结果。
|
||||
|
||||
### 输入
|
||||
|
||||
`````{tabs}
|
||||
|
||||
````{tab} MMOCRInferencer / TextDetInferencer / TextRecInferencer / TextSpottingInferencer
|
||||
|
||||
输入可以是以下任意一种格式:
|
||||
|
||||
- str: 图像的路径/URL。
|
||||
|
||||
```python
|
||||
>>> inferencer('demo/demo_text_ocr.jpg')
|
||||
```
|
||||
|
||||
- array: 图像的 numpy 数组。它应该是 BGR 格式。
|
||||
|
||||
```python
|
||||
>>> import mmcv
|
||||
>>> array = mmcv.imread('demo/demo_text_ocr.jpg')
|
||||
>>> inferencer(array)
|
||||
```
|
||||
|
||||
- list: 基本类型的列表。列表中的每个元素都将单独处理。
|
||||
|
||||
```python
|
||||
>>> inferencer(['img_1.jpg', 'img_2.jpg])
|
||||
>>> # 列表内混合类型也是允许的
|
||||
>>> inferencer(['img_1.jpg', array])
|
||||
```
|
||||
|
||||
- str: 目录的路径。目录中的所有图像都将被处理。
|
||||
|
||||
```python
|
||||
>>> inferencer('tests/data/det_toy_dataset/imgs/test/')
|
||||
```
|
||||
|
||||
````
|
||||
|
||||
````{tab} KIEInferencer
|
||||
|
||||
输入可以是一个字典或者一个字典列表,其中每个字典包含以下键:
|
||||
|
||||
- `img` (str 或者 ndarray): 图像的路径或图像本身。如果 KIE 推理器在无可视模式下使用,则不需要此键。如果它是一个 numpy 数组,则应该是 BGR 顺序编码的图片。
|
||||
- `img_shape` (tuple(int, int)): 图像的形状 (H, W)。仅在 KIE 推理器在无可视模式下使用且没有提供 `img` 时才需要。
|
||||
- `instances` (list[dict]): 实例列表。
|
||||
|
||||
每个 `instance` 都应该包含以下键:
|
||||
|
||||
```python
|
||||
{
|
||||
# 一个嵌套列表,其中包含 4 个数字,表示实例的边界框,顺序为 (x1, y1, x2, y2)
|
||||
"bbox": np.array([[x1, y1, x2, y2], [x1, y1, x2, y2], ...],
|
||||
dtype=np.int32),
|
||||
|
||||
# 文本列表
|
||||
"texts": ['text1', 'text2', ...],
|
||||
}
|
||||
```
|
||||
|
||||
````
|
||||
`````
|
||||
|
||||
### 输出
|
||||
|
||||
默认情况下,每个推理器都以字典格式返回预测结果。
|
||||
|
||||
- `visualization` 包含可视化的预测结果。但默认情况下,它是一个空列表,除非 `return_vis=True`。
|
||||
|
||||
- `predictions` 包含以 json-可序列化格式返回的预测结果。如下所示,内容因任务类型而异。
|
||||
|
||||
`````{tabs}
|
||||
|
||||
:::{group-tab} MMOCRInferencer
|
||||
|
||||
```python
|
||||
{
|
||||
'predictions' : [
|
||||
# 每个实例都对应于一个输入图像
|
||||
{
|
||||
'det_polygons': [...], # 2d 列表,长度为 (N,),格式为 [x1, y1, x2, y2, ...]
|
||||
'det_scores': [...], # 浮点列表,长度为(N, )
|
||||
'det_bboxes': [...], # 2d 列表,形状为 (N, 4),格式为 [min_x, min_y, max_x, max_y]
|
||||
'rec_texts': [...], # 字符串列表,长度为(N, )
|
||||
'rec_scores': [...], # 浮点列表,长度为(N, )
|
||||
'kie_labels': [...], # 节点标签,长度为 (N, )
|
||||
'kie_scores': [...], # 节点置信度,长度为 (N, )
|
||||
'kie_edge_scores': [...], # 边预测置信度, 形状为 (N, N)
|
||||
'kie_edge_labels': [...] # 边标签, 形状为 (N, N)
|
||||
},
|
||||
...
|
||||
],
|
||||
'visualization' : [
|
||||
array(..., dtype=uint8),
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
:::
|
||||
|
||||
````{group-tab} 标准推理器
|
||||
|
||||
::::{tabs}
|
||||
:::{code-tab} python TextDetInferencer
|
||||
|
||||
{
|
||||
'predictions' : [
|
||||
# 每个实例都对应于一个输入图像
|
||||
{
|
||||
'polygons': [...], # 2d 列表,长度为 (N,),格式为 [x1, y1, x2, y2, ...]
|
||||
'bboxes': [...], # 2d 列表,形状为 (N, 4),格式为 [min_x, min_y, max_x, max_y]
|
||||
'scores': [...] # 浮点列表,长度为(N, )
|
||||
},
|
||||
...
|
||||
]
|
||||
'visualization' : [
|
||||
array(..., dtype=uint8),
|
||||
]
|
||||
}
|
||||
:::
|
||||
|
||||
:::{code-tab} python TextRecInferencer
|
||||
{
|
||||
'predictions' : [
|
||||
# 每个实例都对应于一个输入图像
|
||||
{
|
||||
'text': '...', # 字符串
|
||||
'scores': 0.1, # 浮点
|
||||
},
|
||||
...
|
||||
]
|
||||
'visualization' : [
|
||||
array(..., dtype=uint8),
|
||||
]
|
||||
}
|
||||
:::
|
||||
|
||||
:::{code-tab} python TextSpottingInferencer
|
||||
{
|
||||
'predictions' : [
|
||||
# 每个实例都对应于一个输入图像
|
||||
{
|
||||
'polygons': [...], # 2d 列表,长度为 (N,),格式为 [x1, y1, x2, y2, ...]
|
||||
'bboxes': [...], # 2d 列表,形状为 (N, 4),格式为 [min_x, min_y, max_x, max_y]
|
||||
'scores': [...] # 浮点列表,长度为(N, )
|
||||
'texts': ['...',] # 字符串列表,长度为(N, )
|
||||
},
|
||||
]
|
||||
'visualization' : [
|
||||
array(..., dtype=uint8),
|
||||
]
|
||||
}
|
||||
:::
|
||||
|
||||
:::{code-tab} python KIEInferencer
|
||||
{
|
||||
'predictions' : [
|
||||
# 每个实例都对应于一个输入图像
|
||||
{
|
||||
'labels': [...], # 节点标签,长度为 (N, )
|
||||
'scores': [...], # 节点置信度,长度为 (N, )
|
||||
'edge_scores': [...], # 边预测置信度, 形状为 (N, N)
|
||||
'edge_labels': [...], # 边标签, 形状为 (N, N)
|
||||
},
|
||||
]
|
||||
'visualization' : [
|
||||
array(..., dtype=uint8),
|
||||
]
|
||||
}
|
||||
:::
|
||||
::::
|
||||
````
|
||||
|
||||
`````
|
||||
|
||||
如果你想要从模型中获取原始输出,可以将 `return_datasamples` 设置为 `True` 来获取原始的 [DataSample](structures.md),它将存储在 `predictions` 中。
|
||||
|
||||
### 储存结果
|
||||
|
||||
除了从返回值中获取预测结果,你还可以通过设置 `out_dir` 和 `save_pred`/`save_vis` 参数将预测结果和可视化结果导出到文件中。
|
||||
|
||||
```python
|
||||
>>> inferencer('img_1.jpg', out_dir='outputs/', save_pred=True, save_vis=True)
|
||||
```
|
||||
|
||||
结果目录结构如下:
|
||||
|
||||
```text
|
||||
outputs
|
||||
├── preds
|
||||
│ └── img_1.json
|
||||
└── vis
|
||||
└── img_1.jpg
|
||||
```
|
||||
|
||||
文件名与对应的输入图像文件名相同。 如果输入图像是数组,则文件名将是从0开始的数字。
|
||||
|
||||
### 批量推理
|
||||
|
||||
你可以通过设置 `batch_size` 来自定义批量推理的批大小。 默认批大小为 1。
|
||||
|
||||
## API
|
||||
|
||||
这里列出了推理器详尽的参数列表。
|
||||
|
||||
````{tabs}
|
||||
|
||||
```{group-tab} MMOCRInferencer
|
||||
|
||||
**MMOCRInferencer.\_\_init\_\_():**
|
||||
|
||||
| 参数 | 类型 | 默认值 | 描述 |
|
||||
| ------------- | ----------------------------------------- | ------ | ------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `det` | str 或 [权重](../modelzoo.html#id2), 可选 | None | 预训练的文本检测算法。它是配置文件的路径或者是 metafile 中定义的模型名称。 |
|
||||
| `det_weights` | str, 可选 | None | det 模型的权重文件的路径。 |
|
||||
| `rec` | str 或 [权重](../modelzoo.html#id2), 可选 | None | 预训练的文本识别算法。它是配置文件的路径或者是 metafile 中定义的模型名称。 |
|
||||
| `rec_weights` | str, 可选 | None | rec 模型的权重文件的路径。 |
|
||||
| `kie` \[1\] | str 或 [权重](../modelzoo.html#id2), 可选 | None | 预训练的关键信息提取算法。它是配置文件的路径或者是 metafile 中定义的模型名称。 |
|
||||
| `kie_weights` | str, 可选 | None | kie 模型的权重文件的路径。 |
|
||||
| `device` | str, 可选 | None | 推理使用的设备,接受 `torch.device` 允许的所有字符串。例如,'cuda:0' 或 'cpu'。如果为 None,将自动使用可用设备。 默认为 None。 |
|
||||
|
||||
\[1\]: 当同时指定了文本检测和识别模型时,`kie` 才会生效。
|
||||
|
||||
**MMOCRInferencer.\_\_call\_\_()**
|
||||
|
||||
| 参数 | 类型 | 默认值 | 描述 |
|
||||
| -------------------- | ----------------------- | ---------- | ---------------------------------------------------------------------------------------------- |
|
||||
| `inputs` | str/list/tuple/np.array | **必需** | 它可以是一个图片/文件夹的路径,一个 numpy 数组,或者是一个包含图片路径或 numpy 数组的列表/元组 |
|
||||
| `return_datasamples` | bool | False | 是否将结果作为 DataSample 返回。如果为 False,结果将被打包成一个字典。 |
|
||||
| `batch_size` | int | 1 | 推理的批大小。 |
|
||||
| `return_vis` | bool | False | 是否返回可视化结果。 |
|
||||
| `print_result` | bool | False | 是否将推理结果打印到控制台。 |
|
||||
| `show` | bool | False | 是否在弹出窗口中显示可视化结果。 |
|
||||
| `wait_time` | float | 0 | 弹窗展示可视化结果的时间间隔。 |
|
||||
| `out_dir` | str | `results/` | 结果的输出目录。 |
|
||||
| `save_vis` | bool | False | 是否将可视化结果保存到 `out_dir`。 |
|
||||
| `save_pred` | bool | False | 是否将推理结果保存到 `out_dir`。 |
|
||||
|
||||
```
|
||||
|
||||
```{group-tab} 标准推理器
|
||||
|
||||
**Inferencer.\_\_init\_\_():**
|
||||
|
||||
| 参数 | 类型 | 默认值 | 描述 |
|
||||
| --------- | ----------------------------------------- | ------ | ---------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `model` | str 或 [权重](../modelzoo.html#id2), 可选 | None | 路径到配置文件或者在 metafile 中定义的模型名称。 |
|
||||
| `weights` | str, 可选 | None | 权重文件的路径。 |
|
||||
| `device` | str, 可选 | None | 推理使用的设备,接受 `torch.device` 允许的所有字符串。 例如,'cuda:0' 或 'cpu'。 如果为 None,则将自动使用可用设备。 默认为 None。 |
|
||||
|
||||
**Inferencer.\_\_call\_\_()**
|
||||
|
||||
| 参数 | 类型 | 默认值 | 描述 |
|
||||
| -------------------- | ----------------------- | ---------- | ----------------------------------------------------------------------------------- |
|
||||
| `inputs` | str/list/tuple/np.array | **必需** | 可以是图像的路径/文件夹,np 数组或列表/元组(带有图像路径或 np 数组) |
|
||||
| `return_datasamples` | bool | False | 是否将结果作为 DataSamples 返回。 如果为 False,则结果将被打包到一个 dict 中。 |
|
||||
| `batch_size` | int | 1 | 推理批大小。 |
|
||||
| `progress_bar` | bool | True | 是否显示进度条。 |
|
||||
| `return_vis` | bool | False | 是否返回可视化结果。 |
|
||||
| `print_result` | bool | False | 是否将推理结果打印到控制台。 |
|
||||
| `show` | bool | False | 是否在弹出窗口中显示可视化结果。 |
|
||||
| `wait_time` | float | 0 | 弹窗展示可视化结果的时间间隔。 |
|
||||
| `draw_pred` | bool | True | 是否绘制预测的边界框。 *仅适用于 `TextDetInferencer` 和 `TextSpottingInferencer`。* |
|
||||
| `out_dir` | str | `results/` | 结果的输出目录。 |
|
||||
| `save_vis` | bool | False | 是否将可视化结果保存到 `out_dir`。 |
|
||||
| `save_pred` | bool | False | 是否将推理结果保存到 `out_dir`。 |
|
||||
|
||||
```
|
||||
````
|
||||
|
||||
## 命令行接口
|
||||
|
||||
```{note}
|
||||
|
||||
mmocr 为了方便使用提供了预置的模型配置和对应的预训练权重,用户可以通过指定 `det` 和/或 `recog` 值来指定使用,这种方法等同于分别单独指定其对应的 `*_config` 和 `*_ckpt`。需要注意的是,手动指定 `*_config` 和 `*_ckpt` 会覆盖 `det` 和/或 `recog` 指定模型预置的配置和权重值。 同理 `kie`, `kie_config` 和 `kie_ckpt` 的参数设定逻辑相同。
|
||||
|
||||
该节仅适用于 `MMOCRInferencer`.
|
||||
```
|
||||
|
||||
### readtext()
|
||||
`MMOCRInferencer` 的命令行形式可以通过 `tools/infer.py` 调用,大致形式如下:
|
||||
|
||||
| 参数 | 类型 | 默认值 | 描述 |
|
||||
| -------------- | ----------------------- | -------- | --------------------------------------------------------------------- |
|
||||
| `img` | str/list/tuple/np.array | **必填** | 图像,文件夹路径,np array 或 list/tuple (包含图片路径或 np arrays) |
|
||||
| `img_out_dir` | str | None | 存放导出图片结果的文件夹 |
|
||||
| `show` | bool | False | 是否在屏幕展示可视化结果 |
|
||||
| `print_result` | bool | False | 是否展示每个图片的结果 |
|
||||
```bash
|
||||
python tools/infer.py INPUT_PATH [--det DET] [--det-weights ...] ...
|
||||
```
|
||||
|
||||
以上所有参数在命令行同样适用,只需要在参数前简单添加两个连接符,并且将下参数中的下划线替换为连接符即可。
|
||||
(*例如:* `img_out_dir` 变成了 `--img-out-dir`)
|
||||
其中,`INPUT_PATH` 为必须字段,内容应当为指向图片或文件目录的路径。其他参数与 Python 接口遵循的映射关系如下:
|
||||
|
||||
对于布尔类型参数,添加在命令中默认为 true。
|
||||
(*例如:* `python mmocr/demo/ocr.py --det DB_r18 demo/demo_text_det.jpg --print_result` 意为 `print_result` 的参数值设置为 `True`)
|
||||
- 在命令行中调用参数时,需要在 Python 接口的参数前面加上两个`-`,然后把下划线`_`替换成连字符`-`。例如, `out_dir` 会变成 `--out-dir`。
|
||||
- 对于布尔类型的参数,将参数放在命令中就相当于将其指定为 True。例如, `--show` 会将 `show` 参数指定为 True。
|
||||
|
||||
## 模型
|
||||
此外,命令行中默认不会回显推理结果,你可以通过 `--print-result` 参数来查看推理结果。
|
||||
|
||||
**文本检测:**
|
||||
下面是一个例子:
|
||||
|
||||
| 名称 | 引用 |
|
||||
| ------------- | :-------------------------------------------------------------------------------: |
|
||||
| DB_r18 | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textdet_models.html#dbnet) |
|
||||
| DB_r50 | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textdet_models.html#dbnet) |
|
||||
| DBPP_r50 | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textdet_models.html#dbnetpp) |
|
||||
| DRRG | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textdet_models.html#drrg) |
|
||||
| FCE_IC15 | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textdet_models.html#fcenet) |
|
||||
| FCE_CTW_DCNv2 | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textdet_models.html#fcenet) |
|
||||
| MaskRCNN_CTW | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textdet_models.html#mask-r-cnn) |
|
||||
| MaskRCNN_IC15 | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textdet_models.html#mask-r-cnn) |
|
||||
| PANet_CTW | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textdet_models.html#panet) |
|
||||
| PANet_IC15 | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textdet_models.html#panet) |
|
||||
| PS_CTW | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textdet_models.html#psenet) |
|
||||
| PS_IC15 | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textdet_models.html#psenet) |
|
||||
| TextSnake | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textdet_models.html#textsnake) |
|
||||
```bash
|
||||
python tools/infer.py demo/demo_text_ocr.jpg --det DBNet --rec SAR --show --print-result
|
||||
```
|
||||
|
||||
**文本识别:**
|
||||
运行该命令,可以得到如下结果:
|
||||
|
||||
| 名称 | 引用 |
|
||||
| ------------- | :------------------------------------------------------------------------------------: |
|
||||
| ABINet | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textrecog_models.html#abinet) |
|
||||
| ABINet_Vision | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textrecog_models.html#abinet) |
|
||||
| ASTER | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textrecog_models.html#aster) |
|
||||
| CRNN | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textrecog_models.html#crnn) |
|
||||
| MASTER | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textrecog_models.html#master) |
|
||||
| NRTR_1/16-1/8 | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textrecog_models.html#nrtr) |
|
||||
| NRTR_1/8-1/4 | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textrecog_models.html#nrtr) |
|
||||
| RobustScanner | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textrecog_models.html#robustscanner) |
|
||||
| SAR | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textrecog_models.html#sar) |
|
||||
| SATRN | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textrecog_models.html#satrn) |
|
||||
| SATRN_sm | [链接](https://mmocr.readthedocs.io/zh_CN/dev-1.x/textrecog_models.html#satrn) |
|
||||
|
||||
**关键信息提取:**
|
||||
|
||||
| 名称 |
|
||||
| ------------------------------------------------------------------- |
|
||||
| [SDMGR](https://mmocr.readthedocs.io/zh_CN/dev-1.x/kie_models.html) |
|
||||
|
||||
## 其他需要注意
|
||||
|
||||
- 执行检测+识别的推理(端到端 ocr),需要同时定义 `det` 和 `recog` 参数
|
||||
- 如果只需要执行检测,则 `recog` 参数设置为 `None`。
|
||||
- 如果只需要执行识别,则 `det` 参数设置为 `None`。
|
||||
|
||||
如果你对新特性有任何建议,请随时开一个 issue,甚至可以提一个 PR:)
|
||||
```bash
|
||||
{'predictions': [{'rec_texts': ['CBank', 'Docbcba', 'GROUP', 'MAUN', 'CROBINSONS', 'AOCOC', '916M3', 'BOO9', 'Oven', 'BRANDS', 'ARETAIL', '14', '70<UKN>S', 'ROUND', 'SALE', 'YEAR', 'ALLY', 'SALE', 'SALE'],
|
||||
'rec_scores': [0.9753464579582214, ...], 'det_polygons': [[551.9930285844646, 411.9138765335083, 553.6153911653112,
|
||||
383.53195309638977, 620.2410061195247, 387.33785033226013, 618.6186435386782, 415.71977376937866], ...], 'det_scores': [0.8230461478233337, ...]}]}
|
||||
```
|
||||
|
|
|
@ -0,0 +1,115 @@
|
|||
import os.path as osp
|
||||
|
||||
from mmengine.fileio import load
|
||||
from tabulate import tabulate
|
||||
|
||||
|
||||
class BaseWeightList:
|
||||
"""Class for generating model list in markdown format.
|
||||
|
||||
Args:
|
||||
dataset_list (list[str]): List of dataset names.
|
||||
table_header (list[str]): List of table header.
|
||||
msg (str): Message to be displayed.
|
||||
task_abbr (str): Abbreviation of task name.
|
||||
metric_name (str): Metric name.
|
||||
"""
|
||||
|
||||
base_url: str = 'https://github.com/open-mmlab/mmocr/blob/1.x/'
|
||||
table_cfg: dict = dict(
|
||||
tablefmt='pipe', floatfmt='.2f', numalign='right', stralign='center')
|
||||
dataset_list: list
|
||||
table_header: list
|
||||
msg: str
|
||||
task_abbr: str
|
||||
metric_name: str
|
||||
|
||||
def __init__(self):
|
||||
data = (d + f' ({self.metric_name})' for d in self.dataset_list)
|
||||
self.table_header = ['模型', 'README', *data]
|
||||
|
||||
def _get_model_info(self, task_name: str):
|
||||
meta_indexes = load('../../model-index.yml')
|
||||
for meta_path in meta_indexes['Import']:
|
||||
meta_path = osp.join('../../', meta_path)
|
||||
metainfo = load(meta_path)
|
||||
collection2md = {}
|
||||
for item in metainfo['Collections']:
|
||||
url = self.base_url + item['README']
|
||||
collection2md[item['Name']] = f'[链接]({url})'
|
||||
for item in metainfo['Models']:
|
||||
if task_name not in item['Config']:
|
||||
continue
|
||||
name = f'`{item["Name"]}`'
|
||||
if item.get('Alias', None):
|
||||
if isinstance(item['Alias'], str):
|
||||
item['Alias'] = [item['Alias']]
|
||||
aliases = [f'`{alias}`' for alias in item['Alias']]
|
||||
aliases.append(name)
|
||||
name = ' / '.join(aliases)
|
||||
readme = collection2md[item['In Collection']]
|
||||
eval_res = self._get_eval_res(item)
|
||||
yield (name, readme, *eval_res)
|
||||
|
||||
def _get_eval_res(self, item):
|
||||
eval_res = {k: '-' for k in self.dataset_list}
|
||||
for res in item['Results']:
|
||||
if res['Dataset'] in self.dataset_list:
|
||||
eval_res[res['Dataset']] = res['Metrics'][self.metric_name]
|
||||
return (eval_res[k] for k in self.dataset_list)
|
||||
|
||||
def gen_model_list(self):
|
||||
content = f'\n{self.msg}\n'
|
||||
content += '```{table}\n:class: model-summary nowrap field-list '
|
||||
content += 'table table-hover\n'
|
||||
content += tabulate(
|
||||
self._get_model_info(self.task_abbr), self.table_header,
|
||||
**self.table_cfg)
|
||||
content += '\n```\n'
|
||||
return content
|
||||
|
||||
|
||||
class TextDetWeightList(BaseWeightList):
|
||||
|
||||
dataset_list = ['ICDAR2015', 'CTW1500', 'Totaltext']
|
||||
msg = '### 文字检测'
|
||||
task_abbr = 'textdet'
|
||||
metric_name = 'hmean-iou'
|
||||
|
||||
|
||||
class TextRecWeightList(BaseWeightList):
|
||||
|
||||
dataset_list = [
|
||||
'Avg', 'IIIT5K', 'SVT', 'ICDAR2013', 'ICDAR2015', 'SVTP', 'CT80'
|
||||
]
|
||||
msg = ('### 文字识别\n'
|
||||
'```{note}\n'
|
||||
'Avg 指该模型在 IIIT5K、SVT、ICDAR2013、ICDAR2015、SVTP、'
|
||||
'CT80 上的平均结果。\n```\n')
|
||||
task_abbr = 'textrecog'
|
||||
metric_name = 'word_acc'
|
||||
|
||||
def _get_eval_res(self, item):
|
||||
eval_res = {k: '-' for k in self.dataset_list}
|
||||
avg = []
|
||||
for res in item['Results']:
|
||||
if res['Dataset'] in self.dataset_list:
|
||||
eval_res[res['Dataset']] = res['Metrics'][self.metric_name]
|
||||
avg.append(res['Metrics'][self.metric_name])
|
||||
eval_res['Avg'] = sum(avg) / len(avg)
|
||||
return (eval_res[k] for k in self.dataset_list)
|
||||
|
||||
|
||||
class KIEWeightList(BaseWeightList):
|
||||
|
||||
dataset_list = ['wildreceipt']
|
||||
task_abbr = 'kie'
|
||||
metric_name = 'macro_f1'
|
||||
msg = '### 关键信息提取'
|
||||
|
||||
|
||||
def gen_weight_list():
|
||||
content = TextDetWeightList().gen_model_list()
|
||||
content += TextRecWeightList().gen_model_list()
|
||||
content += KIEWeightList().gen_model_list()
|
||||
return content
|
|
@ -147,9 +147,6 @@ class BaseMMOCRInferencer(BaseInferencer):
|
|||
"out_dir". Defaults to False.
|
||||
print_result (bool): Whether to print the inference result w/o
|
||||
visualization to the console. Defaults to False.
|
||||
pred_out_file: File to save the inference results w/o
|
||||
visualization. If left as empty, no file will be saved.
|
||||
Defaults to ''.
|
||||
|
||||
**kwargs: Other keyword arguments passed to :meth:`preprocess`,
|
||||
:meth:`forward`, :meth:`visualize` and :meth:`postprocess`.
|
||||
|
@ -201,7 +198,7 @@ class BaseMMOCRInferencer(BaseInferencer):
|
|||
pred_out_dir=pred_out_dir,
|
||||
**postprocess_kwargs)
|
||||
results['predictions'].extend(batch_res['predictions'])
|
||||
if batch_res['visualization'] is not None:
|
||||
if return_vis and batch_res['visualization'] is not None:
|
||||
results['visualization'].extend(batch_res['visualization'])
|
||||
return results
|
||||
|
||||
|
|
|
@ -220,6 +220,7 @@ class MMOCRInferencer(BaseMMOCRInferencer):
|
|||
inputs: InputsType,
|
||||
batch_size: int = 1,
|
||||
out_dir: str = 'results/',
|
||||
return_vis: bool = False,
|
||||
save_vis: bool = False,
|
||||
save_pred: bool = False,
|
||||
**kwargs,
|
||||
|
@ -231,6 +232,8 @@ class MMOCRInferencer(BaseMMOCRInferencer):
|
|||
to image / image directory, or an array, or a list of these.
|
||||
batch_size (int): Batch size. Defaults to 1.
|
||||
out_dir (str): Output directory of results. Defaults to 'results/'.
|
||||
return_vis (bool): Whether to return the visualization result.
|
||||
Defaults to False.
|
||||
save_vis (bool): Whether to save the visualization results to
|
||||
"out_dir". Defaults to False.
|
||||
save_pred (bool): Whether to save the inference results to
|
||||
|
@ -260,7 +263,10 @@ class MMOCRInferencer(BaseMMOCRInferencer):
|
|||
visualize_kwargs,
|
||||
postprocess_kwargs,
|
||||
) = self._dispatch_kwargs(
|
||||
save_vis=save_vis, save_pred=save_pred, **kwargs)
|
||||
save_vis=save_vis,
|
||||
save_pred=save_pred,
|
||||
return_vis=return_vis,
|
||||
**kwargs)
|
||||
|
||||
ori_inputs = self._inputs_to_list(inputs)
|
||||
|
||||
|
@ -277,7 +283,7 @@ class MMOCRInferencer(BaseMMOCRInferencer):
|
|||
pred_out_dir=pred_out_dir,
|
||||
**postprocess_kwargs)
|
||||
results['predictions'].extend(batch_res['predictions'])
|
||||
if batch_res['visualization'] is not None:
|
||||
if return_vis and batch_res['visualization'] is not None:
|
||||
results['visualization'].extend(batch_res['visualization'])
|
||||
return results
|
||||
|
||||
|
|
|
@ -3,5 +3,7 @@ markdown>=3.4.0
|
|||
myst-parser
|
||||
-e git+https://github.com/open-mmlab/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
|
||||
sphinx==4.0.2
|
||||
sphinx-tabs
|
||||
sphinx_copybutton
|
||||
sphinx_markdown_tables>=0.0.16
|
||||
tabulate
|
||||
|
|
|
@ -55,8 +55,6 @@ class TestMMOCRInferencer(TestCase):
|
|||
'dbnet_resnet18_fpnc_1200e_icdar2015/'
|
||||
'dbnet_resnet18_fpnc_1200e_icdar2015_20220825_221614-7c0e94f2.pth')
|
||||
MMOCRInferencer(rec='crnn_mini-vgg_5e_mj')
|
||||
with self.assertRaises(ValueError):
|
||||
MMOCRInferencer(kie='sdmgr')
|
||||
with self.assertRaises(ValueError):
|
||||
MMOCRInferencer(det='dummy')
|
||||
|
||||
|
|
Loading…
Reference in New Issue