mirror of https://github.com/open-mmlab/mmocr.git
Compare commits
138 Commits
Author | SHA1 | Date |
---|---|---|
|
966296f26a | |
|
2caab0a4e7 | |
|
b18a09b2f0 | |
|
9551af6e5a | |
|
1dcd6fa695 | |
|
6b3f6f5285 | |
|
0cd2878b04 | |
|
bbe8964f00 | |
|
a344280bcb | |
|
4eb3cc7de5 | |
|
e9a31ddd70 | |
|
1e696887b9 | |
|
231cff5da2 | |
|
8afc79f370 | |
|
9e713c63fe | |
|
d7c59f3325 | |
|
a7e326f829 | |
|
97efb04c50 | |
|
e0a78c021b | |
|
16de16f8f8 | |
|
e6174b29fe | |
|
4842599191 | |
|
1c91a9820a | |
|
afe58a4a77 | |
|
67f25c6fb3 | |
|
6342ff262c | |
|
4b887676a3 | |
|
bb591d2b1b | |
|
59d89e10c7 | |
|
73df26d749 | |
|
f47cff5199 | |
|
c886936117 | |
|
22f40b79ed | |
|
1a379f2f1b | |
|
d0dc90253a | |
|
6d9582b6c7 | |
|
e0707bf5f2 | |
|
ae252626d3 | |
|
d80df99037 | |
|
506f7d296e | |
|
9caacc76ee | |
|
63a6ed4e6c | |
|
c6580a48c1 | |
|
7ef34c4407 | |
|
47f54304f5 | |
|
465316f193 | |
|
590af4b5e8 | |
|
a58c77df80 | |
|
e9b23c56ad | |
|
75c06d34bb | |
|
bfb36d81b3 | |
|
45a8d89fb9 | |
|
d56155c82d | |
|
33cbc9b92f | |
|
cc78866ed7 | |
|
f250ea2379 | |
|
5685bb0f38 | |
|
5670695338 | |
|
81fd74c266 | |
|
47f7fc06ed | |
|
82f81ff67c | |
|
3aa9572a64 | |
|
62d440fe8e | |
|
0894178343 | |
|
7cfd412ce7 | |
|
280a89c18e | |
|
6eaa0673f7 | |
|
9b0f1da1e7 | |
|
37c5d371c7 | |
|
e9bf689f74 | |
|
1127240108 | |
|
df0be646ea | |
|
f820470415 | |
|
7cea6a6419 | |
|
3240bace4a | |
|
b21d2b964a | |
|
332089ca11 | |
|
b3be8cfbb3 | |
|
d25e061b03 | |
|
20a87d476c | |
|
d8e615921d | |
|
2a2cab3c8c | |
|
c870046a4a | |
|
edf085c010 | |
|
c3aef21eea | |
|
03a23ca4db | |
|
3b0a41518d | |
|
ad470e323a | |
|
2d743cfa19 | |
|
2b5cdbdbfc | |
|
a82fc66812 | |
|
bed778fc3f | |
|
689ecf0f5f | |
|
bf41194965 | |
|
dff97edaad | |
|
50f55c2976 | |
|
b3f21dd95d | |
|
7f4a1eecdc | |
|
6992923768 | |
|
b64565c10f | |
|
39f99ac720 | |
|
27b6a68586 | |
|
37dca0600a | |
|
0aa5d7be6d | |
|
b0557c2c55 | |
|
d679691a02 | |
|
acae8da223 | |
|
4d5ed98177 | |
|
5dbacfe202 | |
|
65e746eb3d | |
|
7e9f7756bc | |
|
53e72e4440 | |
|
1413b5043a | |
|
b79382cd6b | |
|
e3fd570687 | |
|
9baf440d7a | |
|
89606a1cf1 | |
|
e1aa1f6f42 | |
|
101f2b6eef | |
|
d2a6845c64 | |
|
0ec1524f54 | |
|
e81bb13696 | |
|
24bfb18768 | |
|
fb78c942d6 | |
|
4396e8f5d8 | |
|
c38618bf51 | |
|
f6da8715b9 | |
|
b11c58897c | |
|
302efb9db3 | |
|
419f98d8a4 | |
|
0bd62d67c8 | |
|
e096df8b57 | |
|
547ed31eda | |
|
5cfe481f7f | |
|
ffe5237aa8 | |
|
58ea06d986 | |
|
38d2fc3438 | |
|
5ded52230a |
|
@ -80,7 +80,7 @@ jobs:
|
|||
type: string
|
||||
cuda:
|
||||
type: enum
|
||||
enum: ["10.1", "10.2", "11.1"]
|
||||
enum: ["10.1", "10.2", "11.1", "11.7"]
|
||||
cudnn:
|
||||
type: integer
|
||||
default: 7
|
||||
|
@ -129,6 +129,7 @@ workflows:
|
|||
ignore:
|
||||
- dev-1.x
|
||||
- 1.x
|
||||
- main
|
||||
pr_stage_test:
|
||||
when:
|
||||
not:
|
||||
|
@ -141,17 +142,18 @@ workflows:
|
|||
ignore:
|
||||
- dev-1.x
|
||||
- test-1.x
|
||||
- main
|
||||
- build_cpu:
|
||||
name: minimum_version_cpu
|
||||
torch: 1.6.0
|
||||
torchvision: 0.7.0
|
||||
python: 3.6.9 # The lowest python 3.6.x version available on CircleCI images
|
||||
python: "3.7"
|
||||
requires:
|
||||
- lint
|
||||
- build_cpu:
|
||||
name: maximum_version_cpu
|
||||
torch: 1.12.1
|
||||
torchvision: 0.13.1
|
||||
torch: 2.0.0
|
||||
torchvision: 0.15.1
|
||||
python: 3.9.0
|
||||
requires:
|
||||
- minimum_version_cpu
|
||||
|
@ -167,6 +169,15 @@ workflows:
|
|||
cuda: "10.2"
|
||||
requires:
|
||||
- hold
|
||||
- build_cuda:
|
||||
name: mainstream_version_gpu
|
||||
torch: 2.0.0
|
||||
# Use double quotation mark to explicitly specify its type
|
||||
# as string instead of number
|
||||
cuda: "11.7"
|
||||
cudnn: 8
|
||||
requires:
|
||||
- hold
|
||||
merge_stage_test:
|
||||
when:
|
||||
not:
|
||||
|
@ -182,3 +193,4 @@ workflows:
|
|||
branches:
|
||||
only:
|
||||
- dev-1.x
|
||||
- main
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
textdet/dbnet/dbnet_resnet18_fpnc_1200e_icdar2015.py
|
||||
textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py
|
||||
textdet/drrg/drrg_resnet50_fpn-unet_1200e_ctw1500.py
|
||||
textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015.py
|
||||
textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015.py
|
||||
textdet/panet/panet_resnet18_fpem-ffm_600e_icdar2015.py
|
||||
textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015.py
|
||||
textdet/textsnake/textsnake_resnet50_fpn-unet_1200e_ctw1500.py
|
||||
textrecog/abinet/abinet-vision_20e_st-an_mj.py
|
||||
textrecog/crnn/crnn_mini-vgg_5e_mj.py
|
||||
textrecog/master/master_resnet31_12e_st_mj_sa.py
|
||||
textrecog/nrtr/nrtr_resnet31-1by16-1by8_6e_st_mj.py
|
||||
textrecog/robust_scanner/robustscanner_resnet31_5e_st-sub_mj-sub_sa_real.py
|
||||
textrecog/sar/sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real.py
|
||||
textrecog/satrn/satrn_shallow-small_5e_st_mj.py
|
||||
textrecog/satrn/satrn_shallow-small_5e_st_mj.py
|
||||
textrecog/aster/aster_resnet45_6e_st_mj.py
|
||||
textrecog/svtr/svtr-small_20e_st_mj.py
|
|
@ -0,0 +1,7 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
third_part_libs = [
|
||||
'pip install -r ../requirements/albu.txt',
|
||||
]
|
||||
|
||||
default_floating_range = 0.5
|
|
@ -0,0 +1,9 @@
|
|||
textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py
|
||||
textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015.py
|
||||
textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015.py
|
||||
textrecog/abinet/abinet-vision_20e_st-an_mj.py
|
||||
textrecog/crnn/crnn_mini-vgg_5e_mj.py
|
||||
textrecog/aster/aster_resnet45_6e_st_mj.py
|
||||
textrecog/nrtr/nrtr_resnet31-1by16-1by8_6e_st_mj.py
|
||||
textrecog/sar/sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real.py
|
||||
textrecog/svtr/svtr-small_20e_st_mj.py
|
|
@ -0,0 +1,121 @@
|
|||
name: "🐞 Bug report"
|
||||
description: "Create a report to help us reproduce and fix the bug"
|
||||
labels: kind/bug
|
||||
title: "[Bug] "
|
||||
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Note
|
||||
For general usage questions or idea discussions, please post it to our [**Forum**](https://github.com/open-mmlab/mmocr/discussions)
|
||||
If this issue is about installing MMCV, please file an issue at [MMCV](https://github.com/open-mmlab/mmcv/issues/new/choose).
|
||||
If it's anything about model deployment, please raise it to [MMDeploy](https://github.com/open-mmlab/mmdeploy)
|
||||
|
||||
Please fill in as **much** of the following form as you're able to. **The clearer the description, the shorter it will take to solve it.**
|
||||
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Prerequisite
|
||||
description: Please check the following items before creating a new issue.
|
||||
options:
|
||||
- label: I have searched [Issues](https://github.com/open-mmlab/mmocr/issues) and [Discussions](https://github.com/open-mmlab/mmocr/discussions) but cannot get the expected help.
|
||||
required: true
|
||||
# - label: I have read the [FAQ documentation](https://mmocr.readthedocs.io/en/1.x/notes/4_faq.html) but cannot get the expected help.
|
||||
# required: true
|
||||
- label: The bug has not been fixed in the [latest version (0.x)](https://github.com/open-mmlab/mmocr) or [latest version (1.x)](https://github.com/open-mmlab/mmocr/tree/dev-1.x).
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: task
|
||||
attributes:
|
||||
label: Task
|
||||
description: The problem arises when
|
||||
options:
|
||||
- I'm using the official example scripts/configs for the officially supported tasks/models/datasets.
|
||||
- I have modified the scripts/configs, or I'm working on my own tasks/models/datasets.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: branch
|
||||
attributes:
|
||||
label: Branch
|
||||
description: The problem arises when I'm working on
|
||||
options:
|
||||
- main branch https://github.com/open-mmlab/mmocr
|
||||
- 1.x branch https://github.com/open-mmlab/mmocr/tree/dev-1.x
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Environment
|
||||
description: |
|
||||
Please run `python mmocr/utils/collect_env.py` to collect necessary environment information and copy-paste it here.
|
||||
You may add additional information that may be helpful for locating the problem, such as
|
||||
- How you installed PyTorch \[e.g., pip, conda, source\]
|
||||
- Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Reproduces the problem - code sample
|
||||
description: |
|
||||
Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
|
||||
placeholder: |
|
||||
```python
|
||||
# Sample code to reproduce the problem
|
||||
```
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Reproduces the problem - command or script
|
||||
description: |
|
||||
What command or script did you run?
|
||||
placeholder: |
|
||||
```shell
|
||||
The command or script you run.
|
||||
```
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Reproduces the problem - error message
|
||||
description: |
|
||||
Please provide the error message or logs you got, with the full traceback.
|
||||
|
||||
Tip: You can attach images or log files by dragging them into the text area..
|
||||
placeholder: |
|
||||
```
|
||||
The error message or logs you got, with the full traceback.
|
||||
```
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Additional information
|
||||
description: |
|
||||
Tell us anything else you think we should know.
|
||||
|
||||
Tip: You can attach images or log files by dragging them into the text area.
|
||||
placeholder: |
|
||||
1. What's your expected result?
|
||||
2. What dataset did you use?
|
||||
3. What do you think might be the reason?
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Acknowledgement
|
||||
Thanks for taking the time to fill out this report.
|
||||
|
||||
If you have already identified the reason, we strongly appreciate you creating a new PR to fix it [**Here**](https://github.com/open-mmlab/mmocr/pulls)!
|
||||
Please refer to [**Contribution Guide**](https://mmocr.readthedocs.io/en/dev-1.x/notes/contribution_guide.html) for contributing.
|
||||
|
||||
Welcome to join our [**Community**](https://mmocr.readthedocs.io/en/latest/contact.html) to discuss together. 👬
|
|
@ -0,0 +1,39 @@
|
|||
name: 🚀 Feature request
|
||||
description: Suggest an idea for this project
|
||||
labels: [feature-request]
|
||||
title: "[Feature] "
|
||||
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Note
|
||||
For general usage questions or idea discussions, please post it to our [**Forum**](https://github.com/open-mmlab/mmocr/discussions)
|
||||
|
||||
Please fill in as **much** of the following form as you're able to. **The clearer the description, the shorter it will take to solve it.**
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: What is the feature?
|
||||
description: Tell us more about the feature and how this feature can help.
|
||||
placeholder: |
|
||||
E.g., It is inconvenient when \[....\].
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Any other context?
|
||||
description: |
|
||||
Have you considered any alternative solutions or features? If so, what are they? Also, feel free to add any other context or screenshots about the feature request here.
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Acknowledgement
|
||||
Thanks for taking the time to fill out this report.
|
||||
|
||||
We strongly appreciate you creating a new PR to implement it [**Here**](https://github.com/open-mmlab/mmocr/pulls)!
|
||||
Please refer to [**Contribution Guide**](https://mmocr.readthedocs.io/en/dev-1.x/notes/contribution_guide.html) for contributing.
|
||||
|
||||
Welcome to join our [**Community**](https://mmocr.readthedocs.io/en/latest/contact.html) to discuss together. 👬
|
|
@ -0,0 +1,51 @@
|
|||
name: "\U0001F31F New model/dataset/scheduler addition"
|
||||
description: Submit a proposal/request to implement a new model / dataset / scheduler
|
||||
labels: [ "feature-request" ]
|
||||
title: "[New Models] "
|
||||
|
||||
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Note
|
||||
For general usage questions or idea discussions, please post it to our [**Forum**](https://github.com/open-mmlab/mmocr/discussions)
|
||||
|
||||
Please fill in as **much** of the following form as you're able to. **The clearer the description, the shorter it will take to solve it.**
|
||||
|
||||
- type: textarea
|
||||
id: description-request
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Model/Dataset/Scheduler description
|
||||
description: |
|
||||
Put any and all important information relative to the model/dataset/scheduler
|
||||
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Open source status
|
||||
description: |
|
||||
Please provide the open-source status, which would be very helpful
|
||||
options:
|
||||
- label: "The model implementation is available"
|
||||
- label: "The model weights are available."
|
||||
|
||||
- type: textarea
|
||||
id: additional-info
|
||||
attributes:
|
||||
label: Provide useful links for the implementation
|
||||
description: |
|
||||
Please provide information regarding the implementation, the weights, and the authors.
|
||||
Please mention the authors by @gh-username if you're aware of their usernames.
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Acknowledgement
|
||||
Thanks for taking the time to fill out this report.
|
||||
|
||||
We strongly appreciate you creating a new PR to implement it [**Here**](https://github.com/open-mmlab/mmocr/pulls)!
|
||||
Please refer to [**Contribution Guide**](https://mmocr.readthedocs.io/en/dev-1.x/notes/contribution_guide.html) for contributing.
|
||||
|
||||
Welcome to join our [**Community**](https://mmocr.readthedocs.io/en/latest/contact.html) to discuss together. 👬
|
|
@ -0,0 +1,48 @@
|
|||
name: 📚 Documentation
|
||||
description: Report an issue related to the documentation.
|
||||
labels: "docs"
|
||||
title: "[Docs] "
|
||||
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Note
|
||||
For general usage questions or idea discussions, please post it to our [**Forum**](https://github.com/open-mmlab/mmocr/discussions)
|
||||
Please fill in as **much** of the following form as you're able to. **The clearer the description, the shorter it will take to solve it.**
|
||||
|
||||
- type: dropdown
|
||||
id: branch
|
||||
attributes:
|
||||
label: Branch
|
||||
description: This issue is related to the
|
||||
options:
|
||||
- master branch https://mmocr.readthedocs.io/en/latest/
|
||||
- 1.x branch https://mmocr.readthedocs.io/en/dev-1.x/
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: 📚 The doc issue
|
||||
description: >
|
||||
A clear and concise description the issue.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Suggest a potential alternative/fix
|
||||
description: >
|
||||
Tell us how we could improve the documentation in this regard.
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Acknowledgement
|
||||
Thanks for taking the time to fill out this report.
|
||||
|
||||
If you have already identified the reason, we strongly appreciate you creating a new PR to fix it [**here**](https://github.com/open-mmlab/mmocr/pulls)!
|
||||
Please refer to [**Contribution Guide**](https://mmocr.readthedocs.io/en/dev-1.x/notes/contribution_guide.html) for contributing.
|
||||
|
||||
Welcome to join our [**Community**](https://mmocr.readthedocs.io/en/latest/contact.html) to discuss together. 👬
|
|
@ -1,6 +1,12 @@
|
|||
blank_issues_enabled: false
|
||||
|
||||
contact_links:
|
||||
- name: MMOCR Documentation
|
||||
url: https://mmocr.readthedocs.io/en/latest/
|
||||
about: Check if your question is answered in docs
|
||||
- name: ❔ FAQ
|
||||
url: https://mmocr.readthedocs.io/en/dev-1.x/get_started/faq.html
|
||||
about: Is your question frequently asked?
|
||||
- name: 💬 Forum
|
||||
url: https://github.com/open-mmlab/mmocr/discussions
|
||||
about: Ask general usage questions and discuss with other MMOCR community members
|
||||
- name: 🌐 Explore OpenMMLab
|
||||
url: https://openmmlab.com/
|
||||
about: Get know more about OpenMMLab
|
||||
|
|
|
@ -1,45 +0,0 @@
|
|||
---
|
||||
name: Error report
|
||||
about: Create a report to help us improve
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
---
|
||||
|
||||
Thanks for your error report and we appreciate it a lot.
|
||||
|
||||
**Checklist**
|
||||
|
||||
1. I have searched related issues but cannot get the expected help.
|
||||
2. The bug has not been fixed in the latest version.
|
||||
|
||||
**Describe the bug**
|
||||
A clear and concise description of what the bug is.
|
||||
|
||||
**Reproduction**
|
||||
|
||||
1. What command or script did you run?
|
||||
|
||||
```none
|
||||
A placeholder for the command.
|
||||
```
|
||||
|
||||
2. Did you make any modifications on the code or config? Did you understand what you have modified?
|
||||
3. What dataset did you use?
|
||||
|
||||
**Environment**
|
||||
|
||||
1. Please run `python mmocr/utils/collect_env.py` to collect necessary environment information and paste it here.
|
||||
2. You may add addition that may be helpful for locating the problem, such as
|
||||
- How you installed PyTorch \[e.g., pip, conda, source\]
|
||||
- Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
|
||||
|
||||
**Error traceback**
|
||||
If applicable, paste the error traceback here.
|
||||
|
||||
```none
|
||||
A placeholder for traceback.
|
||||
```
|
||||
|
||||
**Bug fix**
|
||||
If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!
|
|
@ -1,21 +0,0 @@
|
|||
---
|
||||
name: Feature request
|
||||
about: Suggest an idea for this project
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
---
|
||||
|
||||
**Describe the feature**
|
||||
|
||||
**Motivation**
|
||||
A clear and concise description of the motivation of the feature.
|
||||
Ex1. It is inconvenient when \[....\].
|
||||
Ex2. There is a recent paper \[....\], which is very helpful for \[....\].
|
||||
|
||||
**Related resources**
|
||||
If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful.
|
||||
|
||||
**Additional context**
|
||||
Add any other context or screenshots about the feature request here.
|
||||
If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated.
|
|
@ -1,7 +0,0 @@
|
|||
---
|
||||
name: General questions
|
||||
about: Ask general questions to get help
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
---
|
|
@ -1,67 +0,0 @@
|
|||
---
|
||||
name: Reimplementation Questions
|
||||
about: Ask about questions during model reimplementation
|
||||
title: ''
|
||||
labels: reimplementation
|
||||
assignees: ''
|
||||
---
|
||||
|
||||
**Notice**
|
||||
|
||||
There are several common situations in the reimplementation issues as below
|
||||
|
||||
1. Reimplement a model in the model zoo using the provided configs
|
||||
2. Reimplement a model in the model zoo on other dataset (e.g., custom datasets)
|
||||
3. Reimplement a custom model but all the components are implemented in MMOCR
|
||||
4. Reimplement a custom model with new modules implemented by yourself
|
||||
|
||||
There are several things to do for different cases as below.
|
||||
|
||||
- For case 1 & 3, please follow the steps in the following sections thus we could help to quick identify the issue.
|
||||
- For case 2 & 4, please understand that we are not able to do much help here because we usually do not know the full code and the users should be responsible to the code they write.
|
||||
- One suggestion for case 2 & 4 is that the users should first check whether the bug lies in the self-implemented code or the original code. For example, users can first make sure that the same model runs well on supported datasets. If you still need help, please describe what you have done and what you obtain in the issue, and follow the steps in the following sections and try as clear as possible so that we can better help you.
|
||||
|
||||
**Checklist**
|
||||
|
||||
1. I have searched related issues but cannot get the expected help.
|
||||
2. The issue has not been fixed in the latest version.
|
||||
|
||||
**Describe the issue**
|
||||
|
||||
A clear and concise description of what the problem you meet and what have you done.
|
||||
|
||||
**Reproduction**
|
||||
|
||||
1. What command or script did you run?
|
||||
|
||||
```none
|
||||
A placeholder for the command.
|
||||
```
|
||||
|
||||
2. What config dir you run?
|
||||
|
||||
```none
|
||||
A placeholder for the config.
|
||||
```
|
||||
|
||||
3. Did you make any modifications on the code or config? Did you understand what you have modified?
|
||||
4. What dataset did you use?
|
||||
|
||||
**Environment**
|
||||
|
||||
1. Please run `python mmocr/utils/collect_env.py` to collect necessary environment information and paste it here.
|
||||
2. You may add addition that may be helpful for locating the problem, such as
|
||||
1. How you installed PyTorch \[e.g., pip, conda, source\]
|
||||
2. Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
|
||||
|
||||
**Results**
|
||||
|
||||
If applicable, paste the related results here, e.g., what you expect and what you get.
|
||||
|
||||
```none
|
||||
A placeholder for results comparison
|
||||
```
|
||||
|
||||
**Issue fix**
|
||||
|
||||
If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!
|
|
@ -19,24 +19,24 @@ concurrency:
|
|||
|
||||
jobs:
|
||||
build_cpu_py:
|
||||
runs-on: ubuntu-18.04
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.6, 3.8, 3.9]
|
||||
python-version: [3.8, 3.9]
|
||||
torch: [1.8.1]
|
||||
include:
|
||||
- torch: 1.8.1
|
||||
torchvision: 0.9.1
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||
- name: Install MMEngine
|
||||
run: pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
- name: Install MMCV
|
||||
|
@ -56,11 +56,11 @@ jobs:
|
|||
coverage report -m
|
||||
|
||||
build_cpu_pt:
|
||||
runs-on: ubuntu-18.04
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
torch: [1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.1, 1.11.0, 1.12.1]
|
||||
torch: [1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.1, 1.11.0, 1.12.1, 1.13.0]
|
||||
include:
|
||||
- torch: 1.6.0
|
||||
torchvision: 0.7.0
|
||||
|
@ -76,16 +76,21 @@ jobs:
|
|||
torchvision: 0.12.0
|
||||
- torch: 1.12.1
|
||||
torchvision: 0.13.1
|
||||
- torch: 1.13.0
|
||||
torchvision: 0.14.0
|
||||
- torch: 2.0.0
|
||||
torchvision: 0.15.1
|
||||
python-version: 3.8
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||
- name: Install MMEngine
|
||||
run: pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
- name: Install MMCV
|
||||
|
@ -114,53 +119,20 @@ jobs:
|
|||
name: codecov-umbrella
|
||||
fail_ci_if_error: false
|
||||
|
||||
build_cu102:
|
||||
runs-on: ubuntu-18.04
|
||||
container:
|
||||
image: pytorch/pytorch:1.8.1-cuda10.2-cudnn7-devel
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
include:
|
||||
- torch: 1.8.1
|
||||
cuda: 10.2
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Fetch GPG keys
|
||||
run: |
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
|
||||
- name: Install Python-dev
|
||||
run: apt-get update && apt-get install -y python${{matrix.python-version}}-dev
|
||||
if: ${{matrix.python-version != 3.9}}
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6
|
||||
- name: Install mmocr dependencies
|
||||
run: |
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
pip install -U openmim
|
||||
mim install 'mmcv >= 2.0.0rc1'
|
||||
pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
pip install -r requirements/tests.txt
|
||||
- name: Build and install
|
||||
run: |
|
||||
python setup.py check -m -s
|
||||
TORCH_CUDA_ARCH_LIST=7.0 pip install -e .
|
||||
|
||||
build_windows:
|
||||
runs-on: ${{ matrix.os }}
|
||||
runs-on: windows-2022
|
||||
strategy:
|
||||
matrix:
|
||||
os: [windows-2022]
|
||||
python: [3.7]
|
||||
platform: [cpu, cu111]
|
||||
torch: [1.8.1]
|
||||
torchvision: [0.9.1]
|
||||
include:
|
||||
- python-version: 3.8
|
||||
platform: cu117
|
||||
torch: 2.0.0
|
||||
torchvision: 0.15.1
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python }}
|
||||
|
@ -172,7 +144,7 @@ jobs:
|
|||
- name: Install lmdb
|
||||
run: pip install lmdb
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==1.8.1+${{matrix.platform}} torchvision==0.9.1+${{matrix.platform}} -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
|
||||
run: pip install torch==${{matrix.torch}}+${{matrix.platform}} torchvision==${{matrix.torchvision}}+${{matrix.platform}} -f https://download.pytorch.org/whl/${{matrix.platform}}/torch_stable.html
|
||||
- name: Install mmocr dependencies
|
||||
run: |
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
|
|
|
@ -17,7 +17,7 @@ concurrency:
|
|||
|
||||
jobs:
|
||||
build_cpu:
|
||||
runs-on: ubuntu-18.04
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
|
@ -25,15 +25,15 @@ jobs:
|
|||
- torch: 1.8.1
|
||||
torchvision: 0.9.1
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||
- name: Install MMEngine
|
||||
run: pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
- name: Install MMCV
|
||||
|
@ -61,55 +61,24 @@ jobs:
|
|||
name: codecov-umbrella
|
||||
fail_ci_if_error: false
|
||||
|
||||
build_cu102:
|
||||
runs-on: ubuntu-18.04
|
||||
container:
|
||||
image: pytorch/pytorch:1.8.1-cuda10.2-cudnn7-devel
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.8]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Fetch GPG keys
|
||||
run: |
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
|
||||
- name: Install Python-dev
|
||||
run: apt-get update && apt-get install -y python${{matrix.python-version}}-dev
|
||||
if: ${{matrix.python-version != 3.9}}
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libxrender-dev
|
||||
- name: Install mmocr dependencies
|
||||
run: |
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
pip install -U openmim
|
||||
mim install 'mmcv >= 2.0.0rc1'
|
||||
pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
pip install -r requirements/tests.txt
|
||||
- name: Build and install
|
||||
run: |
|
||||
python setup.py check -m -s
|
||||
TORCH_CUDA_ARCH_LIST=7.0 pip install -e .
|
||||
|
||||
build_windows:
|
||||
runs-on: ${{ matrix.os }}
|
||||
runs-on: windows-2022
|
||||
strategy:
|
||||
matrix:
|
||||
os: [windows-2022]
|
||||
python: [3.7]
|
||||
platform: [cpu, cu111]
|
||||
torch: [1.8.1]
|
||||
torchvision: [0.9.1]
|
||||
include:
|
||||
- python-version: 3.8
|
||||
platform: cu117
|
||||
torch: 2.0.0
|
||||
torchvision: 0.15.1
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python }}
|
||||
uses: actions/setup-python@v2
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python }}
|
||||
- name: Upgrade pip
|
||||
|
@ -117,7 +86,7 @@ jobs:
|
|||
- name: Install lmdb
|
||||
run: pip install lmdb
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==1.8.1+${{matrix.platform}} torchvision==0.9.1+${{matrix.platform}} -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
|
||||
run: pip install torch==${{matrix.torch}}+${{matrix.platform}} torchvision==${{matrix.torchvision}}+${{matrix.platform}} -f https://download.pytorch.org/whl/${{matrix.platform}}/torch_stable.html
|
||||
- name: Install mmocr dependencies
|
||||
run: |
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
|
|
|
@ -67,6 +67,7 @@ instance/
|
|||
# Sphinx documentation
|
||||
docs/en/_build/
|
||||
docs/zh_cn/_build/
|
||||
docs/*/api/generated/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
@ -142,3 +143,4 @@ mmocr/.mim
|
|||
workdirs/
|
||||
.history/
|
||||
.dev/
|
||||
data/
|
||||
|
|
|
@ -6,5 +6,4 @@ assign:
|
|||
'*/1 * * * *'
|
||||
assignees:
|
||||
- gaotongxiao
|
||||
- xinke-wang
|
||||
- Harold-lkk
|
||||
|
|
|
@ -4,8 +4,8 @@ repos:
|
|||
rev: 5.0.4
|
||||
hooks:
|
||||
- id: flake8
|
||||
- repo: https://github.com/PyCQA/isort
|
||||
rev: 5.10.1
|
||||
- repo: https://github.com/zhouzaida/isort
|
||||
rev: 5.12.1
|
||||
hooks:
|
||||
- id: isort
|
||||
- repo: https://github.com/pre-commit/mirrors-yapf
|
||||
|
@ -21,16 +21,16 @@ repos:
|
|||
hooks:
|
||||
- id: trailing-whitespace
|
||||
exclude: |
|
||||
(?x)(
|
||||
^dicts/|
|
||||
^projects/ABCNet/dicts/
|
||||
(?x)^(
|
||||
dicts/|
|
||||
projects/.*?/dicts/
|
||||
)
|
||||
- id: check-yaml
|
||||
- id: end-of-file-fixer
|
||||
exclude: |
|
||||
(?x)(
|
||||
^dicts/|
|
||||
^projects/ABCNet/dicts/
|
||||
(?x)^(
|
||||
dicts/|
|
||||
projects/.*?/dicts/
|
||||
)
|
||||
- id: requirements-txt-fixer
|
||||
- id: double-quote-string-fixer
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
include requirements/*.txt
|
||||
include mmocr/.mim/model-index.yml
|
||||
include mmocr/.mim/dicts/*.txt
|
||||
recursive-include mmocr/.mim/configs *.py *.yml
|
||||
recursive-include mmocr/.mim/tools *.sh *.py
|
||||
|
|
96
README.md
96
README.md
|
@ -40,6 +40,39 @@
|
|||
English | [简体中文](README_zh-CN.md)
|
||||
|
||||
</div>
|
||||
<div align="center">
|
||||
<a href="https://openmmlab.medium.com/" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219255827-67c1a27f-f8c5-46a9-811d-5e57448c61d1.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://discord.gg/raweFPmdzG" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218347213-c080267f-cbb6-443e-8532-8e1ed9a58ea9.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://twitter.com/OpenMMLab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346637-d30c8a0f-3eba-4699-8131-512fb06d46db.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://www.youtube.com/openmmlab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346691-ceb2116a-465a-40af-8424-9f30d2348ca9.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://space.bilibili.com/1293512903" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219026751-d7d14cce-a7c9-4e82-9942-8375fca65b99.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://www.zhihu.com/people/openmmlab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219026120-ba71e48b-6e94-4bd4-b4e9-b7d175b5e362.png" width="3%" alt="" /></a>
|
||||
</div>
|
||||
|
||||
## Latest Updates
|
||||
|
||||
**The default branch is now `main` and the code on the branch has been upgraded to v1.0.0. The old `main` branch (v0.6.3) code now exists on the `0.x` branch.** If you have been using the `main` branch and encounter upgrade issues, please read the [Migration Guide](https://mmocr.readthedocs.io/en/dev-1.x/migration/overview.html) and notes on [Branches](https://mmocr.readthedocs.io/en/dev-1.x/migration/branches.html) .
|
||||
|
||||
v1.0.0 was released in 2023-04-06. Major updates from 1.0.0rc6 include:
|
||||
|
||||
1. Support for SCUT-CTW1500, SynthText, and MJSynth datasets in Dataset Preparer
|
||||
2. Updated FAQ and documentation
|
||||
3. Deprecation of file_client_args in favor of backend_args
|
||||
4. Added a new MMOCR tutorial notebook
|
||||
|
||||
To know more about the updates in MMOCR 1.0, please refer to [What's New in MMOCR 1.x](https://mmocr.readthedocs.io/en/dev-1.x/migration/news.html), or
|
||||
Read [Changelog](https://mmocr.readthedocs.io/en/dev-1.x/notes/changelog.html) for more details!
|
||||
|
||||
## Introduction
|
||||
|
||||
|
@ -69,26 +102,6 @@ The main branch works with **PyTorch 1.6+**.
|
|||
|
||||
The toolbox provides a comprehensive set of utilities which can help users assess the performance of models. It includes visualizers which allow visualization of images, ground truths as well as predicted bounding boxes, and a validation tool for evaluating checkpoints during training. It also includes data converters to demonstrate how to convert your own data to the annotation files which the toolbox supports.
|
||||
|
||||
## What's New
|
||||
|
||||
1. **New engines**. MMOCR 1.x is based on [MMEngine](https://github.com/open-mmlab/mmengine), which provides a general and powerful runner that allows more flexible customizations and significantly simplifies the entrypoints of high-level interfaces.
|
||||
|
||||
2. **Unified interfaces**. As a part of the OpenMMLab 2.0 projects, MMOCR 1.x unifies and refactors the interfaces and internal logics of train, testing, datasets, models, evaluation, and visualization. All the OpenMMLab 2.0 projects share the same design in those interfaces and logics to allow the emergence of multi-task/modality algorithms.
|
||||
|
||||
3. **Cross project calling**. Benefiting from the unified design, you can use the models implemented in other OpenMMLab projects, such as MMDet. We provide an example of how to use MMDetection's Mask R-CNN through `MMDetWrapper`. Check our documents for more details. More wrappers will be released in the future.
|
||||
|
||||
4. **Stronger visualization**. We provide a series of useful tools which are mostly based on brand-new visualizers. As a result, it is more convenient for the users to explore the models and datasets now.
|
||||
|
||||
5. **More documentation and tutorials**. We add a bunch of documentation and tutorials to help users get started more smoothly. Read it [here](https://mmocr.readthedocs.io/en/dev-1.x/).
|
||||
|
||||
6. **One-stop Dataset Preparaion**. Multiple datasets are instantly ready with only one line of command, via our [Dataset Preparer](https://mmocr.readthedocs.io/en/dev-1.x/user_guides/data_prepare/dataset_preparer.html).
|
||||
|
||||
7. **Embracing more `projects/`**: We now introduce `projects/` folder, where some experimental features, frameworks and models can be placed, only needed to satisfy the minimum requirement on the code quality. Everyone is welcome to post their implementation of any great ideas in this folder! Learn more from our [example project](https://github.com/open-mmlab/mmocr/blob/dev-1.x/projects/example_project/).
|
||||
|
||||
8. **More models**. MMOCR 1.0 supports more tasks and more state-of-the-art models!
|
||||
|
||||
Read [Changelog](https://mmocr.readthedocs.io/en/dev-1.x/notes/changelog.html) for more details!
|
||||
|
||||
## Installation
|
||||
|
||||
MMOCR depends on [PyTorch](https://pytorch.org/), [MMEngine](https://github.com/open-mmlab/mmengine), [MMCV](https://github.com/open-mmlab/mmcv) and [MMDetection](https://github.com/open-mmlab/mmdetection).
|
||||
|
@ -99,13 +112,9 @@ Please refer to [Install Guide](https://mmocr.readthedocs.io/en/dev-1.x/get_star
|
|||
conda create -n open-mmlab python=3.8 pytorch=1.10 cudatoolkit=11.3 torchvision -c pytorch -y
|
||||
conda activate open-mmlab
|
||||
pip3 install openmim
|
||||
mim install mmengine
|
||||
mim install 'mmcv>=2.0.0rc1'
|
||||
mim install 'mmdet>=3.0.0rc0'
|
||||
git clone https://github.com/open-mmlab/mmocr.git
|
||||
cd mmocr
|
||||
git checkout 1.x
|
||||
pip3 install -e .
|
||||
mim install -e .
|
||||
```
|
||||
|
||||
## Get Started
|
||||
|
@ -140,12 +149,14 @@ Supported algorithms:
|
|||
<summary>Text Recognition</summary>
|
||||
|
||||
- [x] [ABINet](configs/textrecog/abinet/README.md) (CVPR'2021)
|
||||
- [x] [ASTER](configs/textrecog/aster/README.md) (TPAMI'2018)
|
||||
- [x] [CRNN](configs/textrecog/crnn/README.md) (TPAMI'2016)
|
||||
- [x] [MASTER](configs/textrecog/master/README.md) (PR'2021)
|
||||
- [x] [NRTR](configs/textrecog/nrtr/README.md) (ICDAR'2019)
|
||||
- [x] [RobustScanner](configs/textrecog/robust_scanner/README.md) (ECCV'2020)
|
||||
- [x] [SAR](configs/textrecog/sar/README.md) (AAAI'2019)
|
||||
- [x] [SATRN](configs/textrecog/satrn/README.md) (CVPR'2020 Workshop on Text and Documents in the Deep Learning Era)
|
||||
- [x] [SVTR](configs/textrecog/svtr/README.md) (IJCAI'2022)
|
||||
|
||||
</details>
|
||||
|
||||
|
@ -160,11 +171,17 @@ Supported algorithms:
|
|||
<summary>Text Spotting</summary>
|
||||
|
||||
- [x] [ABCNet](projects/ABCNet/README.md) (CVPR'2020)
|
||||
- [x] [ABCNetV2](projects/ABCNet/README_V2.md) (TPAMI'2021)
|
||||
- [x] [SPTS](projects/SPTS/README.md) (ACM MM'2022)
|
||||
|
||||
</details>
|
||||
|
||||
Please refer to [model_zoo](https://mmocr.readthedocs.io/en/dev-1.x/modelzoo.html) for more details.
|
||||
|
||||
## Projects
|
||||
|
||||
[Here](projects/README.md) are some implementations of SOTA models and solutions built on MMOCR, which are supported and maintained by community users. These projects demonstrate the best practices based on MMOCR for research and product development. We welcome and appreciate all the contributions to OpenMMLab ecosystem.
|
||||
|
||||
## Contributing
|
||||
|
||||
We appreciate all contributions to improve MMOCR. Please refer to [CONTRIBUTING.md](.github/CONTRIBUTING.md) for the contributing guidelines.
|
||||
|
@ -179,11 +196,11 @@ We hope the toolbox and benchmark could serve the growing research community by
|
|||
If you find this project useful in your research, please consider cite:
|
||||
|
||||
```bibtex
|
||||
@article{mmocr2021,
|
||||
@article{mmocr2022,
|
||||
title={MMOCR: A Comprehensive Toolbox for Text Detection, Recognition and Understanding},
|
||||
author={Kuang, Zhanghui and Sun, Hongbin and Li, Zhizhong and Yue, Xiaoyu and Lin, Tsui Hin and Chen, Jianyong and Wei, Huaqiang and Zhu, Yiqin and Gao, Tong and Zhang, Wenwei and Chen, Kai and Zhang, Wayne and Lin, Dahua},
|
||||
journal= {arXiv preprint arXiv:2108.06543},
|
||||
year={2021}
|
||||
author={MMOCR Developer Team},
|
||||
howpublished = {\url{https://github.com/open-mmlab/mmocr}},
|
||||
year={2022}
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -191,7 +208,7 @@ If you find this project useful in your research, please consider cite:
|
|||
|
||||
This project is released under the [Apache 2.0 license](LICENSE).
|
||||
|
||||
## Projects in OpenMMLab
|
||||
## OpenMMLab Family
|
||||
|
||||
- [MMEngine](https://github.com/open-mmlab/mmengine): OpenMMLab foundational library for training deep learning models
|
||||
- [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab foundational library for computer vision.
|
||||
|
@ -213,3 +230,22 @@ This project is released under the [Apache 2.0 license](LICENSE).
|
|||
- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab image and video editing toolbox.
|
||||
- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab image and video generative models toolbox.
|
||||
- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab model deployment framework.
|
||||
|
||||
## Welcome to the OpenMMLab community
|
||||
|
||||
Scan the QR code below to follow the OpenMMLab team's [**Zhihu Official Account**](https://www.zhihu.com/people/openmmlab) and join the OpenMMLab team's [**QQ Group**](https://jq.qq.com/?_wv=1027&k=aCvMxdr3), or join the official communication WeChat group by adding the WeChat, or join our [**Slack**](https://join.slack.com/t/mmocrworkspace/shared_invite/zt-1ifqhfla8-yKnLO_aKhVA2h71OrK8GZw)
|
||||
|
||||
<div align="center">
|
||||
<img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/zhihu_qrcode.jpg" height="400" /> <img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/qq_group_qrcode.jpg" height="400" /> <img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/wechat_qrcode.jpg" height="400" />
|
||||
</div>
|
||||
|
||||
We will provide you with the OpenMMLab community
|
||||
|
||||
- 📢 share the latest core technologies of AI frameworks
|
||||
- 💻 Explaining PyTorch common module source Code
|
||||
- 📰 News related to the release of OpenMMLab
|
||||
- 🚀 Introduction of cutting-edge algorithms developed by OpenMMLab
|
||||
🏃 Get the more efficient answer and feedback
|
||||
- 🔥 Provide a platform for communication with developers from all walks of life
|
||||
|
||||
The OpenMMLab community looks forward to your participation! 👬
|
||||
|
|
|
@ -41,6 +41,39 @@
|
|||
|
||||
</div>
|
||||
|
||||
<div align="center">
|
||||
<a href="https://openmmlab.medium.com/" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219255827-67c1a27f-f8c5-46a9-811d-5e57448c61d1.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://discord.gg/raweFPmdzG" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218347213-c080267f-cbb6-443e-8532-8e1ed9a58ea9.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://twitter.com/OpenMMLab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346637-d30c8a0f-3eba-4699-8131-512fb06d46db.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://www.youtube.com/openmmlab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346691-ceb2116a-465a-40af-8424-9f30d2348ca9.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://space.bilibili.com/1293512903" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219026751-d7d14cce-a7c9-4e82-9942-8375fca65b99.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://www.zhihu.com/people/openmmlab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219026120-ba71e48b-6e94-4bd4-b4e9-b7d175b5e362.png" width="3%" alt="" /></a>
|
||||
</div>
|
||||
|
||||
## 近期更新
|
||||
|
||||
**默认分支目前为 `main`,且分支上的代码已经切换到 v1.0.0 版本。旧版 `main` 分支(v0.6.3)的代码现存在 `0.x` 分支上。** 如果您一直在使用 `main` 分支,并遇到升级问题,请阅读 [迁移指南](https://mmocr.readthedocs.io/zh_CN/dev-1.x/migration/overview.html) 和 [分支说明](https://mmocr.readthedocs.io/zh_CN/dev-1.x/migration/branches.html) 。
|
||||
|
||||
最新的版本 v1.0.0 于 2023-04-06 发布。其相对于 1.0.0rc6 的主要更新如下:
|
||||
|
||||
1. Dataset Preparer 中支持了 SCUT-CTW1500, SynthText 和 MJSynth 数据集;
|
||||
2. 更新了文档和 FAQ;
|
||||
3. 升级文件后端;使用了 `backend_args` 替换 `file_client_args`;
|
||||
4. 增加了 MMOCR 教程 notebook。
|
||||
|
||||
如果需要了解 MMOCR 1.0 相对于 0.x 的升级内容,请阅读 [MMOCR 1.x 更新汇总](https://mmocr.readthedocs.io/zh_CN/dev-1.x/migration/news.html);或者阅读[更新日志](https://mmocr.readthedocs.io/zh_CN/dev-1.x/notes/changelog.html)以获取更多信息。
|
||||
|
||||
## 简介
|
||||
|
||||
MMOCR 是基于 PyTorch 和 mmdetection 的开源工具箱,专注于文本检测,文本识别以及相应的下游任务,如关键信息提取。 它是 OpenMMLab 项目的一部分。
|
||||
|
@ -63,33 +96,12 @@ MMOCR 是基于 PyTorch 和 mmdetection 的开源工具箱,专注于文本检
|
|||
|
||||
-**模块化设计**
|
||||
|
||||
MMOCR 的模块化设计使用户可以定义自己的优化器,数据预处理器,模型组件如主干模块,颈部模块和头部模块,以及损失函数。有关如何构建自定义模型的信
|
||||
息,请参考[概览](https://mmocr.readthedocs.io/zh_CN/dev-1.x/get_started/overview.html)。
|
||||
MMOCR 的模块化设计使用户可以定义自己的优化器,数据预处理器,模型组件如主干模块,颈部模块和头部模块,以及损失函数。有关如何构建自定义模型的信息,请参考[概览](https://mmocr.readthedocs.io/zh_CN/dev-1.x/get_started/overview.html)。
|
||||
|
||||
-**众多实用工具**
|
||||
|
||||
该工具箱提供了一套全面的实用程序,可以帮助用户评估模型的性能。它包括可对图像,标注的真值以及预测结果进行可视化的可视化工具,以及用于在训练过程中评估模型的验证工具。它还包括数据转换器,演示了如何将用户自建的标注数据转换为 MMOCR 支持的标注文件。
|
||||
|
||||
## 最新进展
|
||||
|
||||
1. 架构升级:MMOCR 1.x 是基于 [MMEngine](https://github.com/open-mmlab/mmengine),提供了一个通用的、强大的执行器,允许更灵活的定制,提供了统一的训练和测试入口。
|
||||
|
||||
2. 统一接口:MMOCR 1.x 统一了数据集、模型、评估和可视化的接口和内部逻辑。支持更强的扩展性。
|
||||
|
||||
3. 跨项目调用:受益于统一的设计,你可以使用其他OpenMMLab项目中实现的模型,如MMDet。 我们提供了一个例子,说明如何通过MMDetWrapper使用MMDetection的Mask R-CNN。查看我们的文档以了解更多细节。更多的包装器将在未来发布。
|
||||
|
||||
4. 更强的可视化:我们提供了一系列可视化工具, 用户现在可以更方便可视化数据。
|
||||
|
||||
5. 更多的文档和教程:我们增加了更多的教程,降低用户的学习门槛。详见[教程](https://mmocr.readthedocs.io/zh_CN/dev-1.x/)。
|
||||
|
||||
6. 一站式数据准备:准备数据集已经不再是难事。使用我们的 [Dataset Preparer](https://mmocr.readthedocs.io/zh_CN/dev-1.x/user_guides/data_prepare/dataset_preparer.html),一行命令即可让多个数据集准备就绪。
|
||||
|
||||
7. 拥抱更多 `projects/`: 我们推出了 `projects/` 文件夹,用于存放一些实验性的新特性、框架和模型。我们对这个文件夹下的代码规范不作过多要求,力求让社区的所有想法第一时间得到实现和展示。请查看我们的[样例 project](https://github.com/open-mmlab/mmocr/blob/dev-1.x/projects/example_project/) 以了解更多。
|
||||
|
||||
8. 更多新模型:MMOCR 1.0 支持了更多模型和模型种类。
|
||||
|
||||
阅读[更新日志](https://mmocr.readthedocs.io/zh_CN/dev-1.x/notes/changelog.html)以获取更多信息。
|
||||
|
||||
## 安装
|
||||
|
||||
MMOCR 依赖 [PyTorch](https://pytorch.org/), [MMEngine](https://github.com/open-mmlab/mmengine), [MMCV](https://github.com/open-mmlab/mmcv) 和 [MMDetection](https://github.com/open-mmlab/mmdetection),以下是安装的简要步骤。
|
||||
|
@ -99,13 +111,9 @@ MMOCR 依赖 [PyTorch](https://pytorch.org/), [MMEngine](https://github.com/open
|
|||
conda create -n open-mmlab python=3.8 pytorch=1.10 cudatoolkit=11.3 torchvision -c pytorch -y
|
||||
conda activate open-mmlab
|
||||
pip3 install openmim
|
||||
mim install mmengine
|
||||
mim install 'mmcv>=2.0.0rc1'
|
||||
mim install 'mmdet>=3.0.0rc0'
|
||||
git clone https://github.com/open-mmlab/mmocr.git
|
||||
cd mmocr
|
||||
git checkout 1.x
|
||||
pip3 install -e .
|
||||
mim install -e .
|
||||
```
|
||||
|
||||
## 快速入门
|
||||
|
@ -140,12 +148,14 @@ pip3 install -e .
|
|||
<summary>文字识别</summary>
|
||||
|
||||
- [x] [ABINet](configs/textrecog/abinet/README.md) (CVPR'2021)
|
||||
- [x] [ASTER](configs/textrecog/aster/README.md) (TPAMI'2018)
|
||||
- [x] [CRNN](configs/textrecog/crnn/README.md) (TPAMI'2016)
|
||||
- [x] [MASTER](configs/textrecog/master/README.md) (PR'2021)
|
||||
- [x] [NRTR](configs/textrecog/nrtr/README.md) (ICDAR'2019)
|
||||
- [x] [RobustScanner](configs/textrecog/robust_scanner/README.md) (ECCV'2020)
|
||||
- [x] [SAR](configs/textrecog/sar/README.md) (AAAI'2019)
|
||||
- [x] [SATRN](configs/textrecog/satrn/README.md) (CVPR'2020 Workshop on Text and Documents in the Deep Learning Era)
|
||||
- [x] [SVTR](configs/textrecog/svtr/README.md) (IJCAI'2022)
|
||||
|
||||
</details>
|
||||
|
||||
|
@ -160,11 +170,18 @@ pip3 install -e .
|
|||
<summary>端对端 OCR</summary>
|
||||
|
||||
- [x] [ABCNet](projects/ABCNet/README.md) (CVPR'2020)
|
||||
- [x] [ABCNetV2](projects/ABCNet/README_V2.md) (TPAMI'2021)
|
||||
- [x] [SPTS](projects/SPTS/README.md) (ACM MM'2022)
|
||||
|
||||
</details>
|
||||
|
||||
请点击[模型库](https://mmocr.readthedocs.io/zh_CN/dev-1.x/modelzoo.html)查看更多关于上述算法的详细信息。
|
||||
|
||||
## 社区项目
|
||||
|
||||
[这里](projects/README.md)有一些由社区用户支持和维护的基于 MMOCR 的 SOTA 模型和解决方案的实现。这些项目展示了基于 MMOCR 的研究和产品开发的最佳实践。
|
||||
我们欢迎并感谢对 OpenMMLab 生态系统的所有贡献。
|
||||
|
||||
## 贡献指南
|
||||
|
||||
我们感谢所有的贡献者为改进和提升 MMOCR 所作出的努力。请参考[贡献指南](.github/CONTRIBUTING.md)来了解参与项目贡献的相关指引。
|
||||
|
@ -215,10 +232,10 @@ MMOCR 是一款由来自不同高校和企业的研发人员共同参与贡献
|
|||
|
||||
## 欢迎加入 OpenMMLab 社区
|
||||
|
||||
扫描下方的二维码可关注 OpenMMLab 团队的 [知乎官方账号](https://www.zhihu.com/people/openmmlab),加入 OpenMMLab 团队的 [官方交流 QQ 群](https://r.vansin.top/?r=join-qq),或通过添加微信“Open小喵Lab”加入官方交流微信群。
|
||||
扫描下方的二维码可关注 OpenMMLab 团队的 知乎官方账号,扫描下方微信二维码添加喵喵好友,进入 MMOCR 微信交流社群。【加好友申请格式:研究方向+地区+学校/公司+姓名】
|
||||
|
||||
<div align="center">
|
||||
<img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/zhihu_qrcode.jpg" height="400" /> <img src="https://cdn.vansin.top/OpenMMLab/q3.png" height="400" /> <img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/wechat_qrcode.jpg" height="400" />
|
||||
<img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/zhihu_qrcode.jpg" height="400" /> <img src="https://github.com/open-mmlab/mmocr/assets/62195058/bf1e53fe-df4f-4296-9e1b-61db8971985e" height="400" />
|
||||
</div>
|
||||
|
||||
我们会在 OpenMMLab 社区为大家
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
default_scope = 'mmocr'
|
||||
env_cfg = dict(
|
||||
cudnn_benchmark=True,
|
||||
cudnn_benchmark=False,
|
||||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||
dist_cfg=dict(backend='nccl'),
|
||||
)
|
||||
|
|
|
@ -10,7 +10,7 @@ model = dict(
|
|||
postprocessor=dict(type='SDMGRPostProcessor')),
|
||||
dictionary=dict(
|
||||
type='Dictionary',
|
||||
dict_file='data/kie/wildreceipt/dict.txt',
|
||||
dict_file='{{ fileDirname }}/../../../dicts/sdmgr_dict.txt',
|
||||
with_padding=True,
|
||||
with_unknown=True,
|
||||
unknown_token=None),
|
||||
|
|
|
@ -24,5 +24,5 @@ test_pipeline = [
|
|||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadKIEAnnotations'),
|
||||
dict(type='Resize', scale=(1024, 512), keep_ratio=True),
|
||||
dict(type='PackKIEInputs'),
|
||||
dict(type='PackKIEInputs', meta_keys=('img_path', )),
|
||||
]
|
||||
|
|
|
@ -15,6 +15,7 @@ Collections:
|
|||
|
||||
Models:
|
||||
- Name: sdmgr_unet16_60e_wildreceipt
|
||||
Alias: SDMGR
|
||||
In Collection: SDMGR
|
||||
Config: configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py
|
||||
Metadata:
|
||||
|
@ -25,3 +26,27 @@ Models:
|
|||
Metrics:
|
||||
macro_f1: 0.890
|
||||
Weights: https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_unet16_60e_wildreceipt/sdmgr_unet16_60e_wildreceipt_20220825_151648-22419f37.pth
|
||||
- Name: sdmgr_novisual_60e_wildreceipt
|
||||
In Collection: SDMGR
|
||||
Config: configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py
|
||||
Metadata:
|
||||
Training Data: wildreceipt
|
||||
Results:
|
||||
- Task: Key Information Extraction
|
||||
Dataset: wildreceipt
|
||||
Metrics:
|
||||
macro_f1: 0.873
|
||||
Weights: https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_novisual_60e_wildreceipt/sdmgr_novisual_60e_wildreceipt_20220831_193317-827649d8.pth
|
||||
- Name: sdmgr_novisual_60e_wildreceipt_openset
|
||||
In Collection: SDMGR
|
||||
Config: configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt-openset.py
|
||||
Metadata:
|
||||
Training Data: wildreceipt-openset
|
||||
Results:
|
||||
- Task: Key Information Extraction
|
||||
Dataset: wildreceipt
|
||||
Metrics:
|
||||
macro_f1: 0.931
|
||||
micro_f1: 0.940
|
||||
edge_micro_f1: 0.792
|
||||
Weights: https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_novisual_60e_wildreceipt-openset/sdmgr_novisual_60e_wildreceipt-openset_20220831_200807-dedf15ec.pth
|
||||
|
|
|
@ -1,17 +1,15 @@
|
|||
ctw1500_textdet_data_root = 'data/det/ctw1500'
|
||||
ctw1500_textdet_data_root = 'data/ctw1500'
|
||||
|
||||
ctw1500_textdet_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=ctw1500_textdet_data_root,
|
||||
ann_file='instances_training.json',
|
||||
data_prefix=dict(img_path='imgs/'),
|
||||
ann_file='textdet_train.json',
|
||||
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
||||
pipeline=None)
|
||||
|
||||
ctw1500_textdet_test = dict(
|
||||
type='OCRDataset',
|
||||
data_root=ctw1500_textdet_data_root,
|
||||
ann_file='instances_test.json',
|
||||
data_prefix=dict(img_path='imgs/'),
|
||||
ann_file='textdet_test.json',
|
||||
test_mode=True,
|
||||
pipeline=None)
|
||||
|
|
|
@ -1,17 +1,15 @@
|
|||
icdar2015_textdet_data_root = 'data/det/icdar2015'
|
||||
icdar2015_textdet_data_root = 'data/icdar2015'
|
||||
|
||||
icdar2015_textdet_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=icdar2015_textdet_data_root,
|
||||
ann_file='instances_training.json',
|
||||
data_prefix=dict(img_path='imgs/'),
|
||||
ann_file='textdet_train.json',
|
||||
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
||||
pipeline=None)
|
||||
|
||||
icdar2015_textdet_test = dict(
|
||||
type='OCRDataset',
|
||||
data_root=icdar2015_textdet_data_root,
|
||||
ann_file='instances_test.json',
|
||||
data_prefix=dict(img_path='imgs/'),
|
||||
ann_file='textdet_test.json',
|
||||
test_mode=True,
|
||||
pipeline=None)
|
||||
|
|
|
@ -1,17 +1,8 @@
|
|||
synthtext_textdet_data_root = 'data/det/synthtext'
|
||||
synthtext_textdet_data_root = 'data/synthtext'
|
||||
|
||||
synthtext_textdet_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=synthtext_textdet_data_root,
|
||||
ann_file='instances_training.json',
|
||||
data_prefix=dict(img_path='imgs/'),
|
||||
ann_file='textdet_train.json',
|
||||
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
||||
pipeline=None)
|
||||
|
||||
synthtext_textdet_test = dict(
|
||||
type='OCRDataset',
|
||||
data_root=synthtext_textdet_data_root,
|
||||
ann_file='instances_test.json',
|
||||
data_prefix=dict(img_path='imgs/'),
|
||||
test_mode=True,
|
||||
pipeline=None)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
default_scope = 'mmocr'
|
||||
env_cfg = dict(
|
||||
cudnn_benchmark=True,
|
||||
cudnn_benchmark=False,
|
||||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||
dist_cfg=dict(backend='nccl'),
|
||||
)
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
_base_ = 'default_runtime.py'
|
||||
|
||||
default_hooks = dict(
|
||||
logger=dict(type='LoggerHook', interval=1000),
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook',
|
||||
interval=10000,
|
||||
by_epoch=False,
|
||||
max_keep_ckpts=1),
|
||||
)
|
||||
|
||||
# Evaluation
|
||||
val_evaluator = None
|
||||
test_evaluator = None
|
|
@ -4,7 +4,7 @@ optim_wrapper = dict(
|
|||
optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001))
|
||||
|
||||
train_cfg = dict(type='IterBasedTrainLoop', max_iters=100000)
|
||||
test_cfg = dict(type='TestLoop')
|
||||
test_cfg = None
|
||||
val_cfg = None
|
||||
# learning policy
|
||||
param_scheduler = [
|
||||
|
|
|
@ -14,14 +14,26 @@ Recently, segmentation-based methods are quite popular in scene text detection,
|
|||
|
||||
## Results and models
|
||||
|
||||
### SynthText
|
||||
|
||||
| Method | Backbone | Training set | #iters | Download |
|
||||
| :-----------------------------------------------------------------------: | :------: | :----------: | :-----: | :--------------------------------------------------------------------------------------------------: |
|
||||
| [DBNet_r18](/configs/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext.py) | ResNet18 | SynthText | 100,000 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext/dbnet_resnet18_fpnc_100k_synthtext-2e9bf392.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext/20221214_150351.log) |
|
||||
|
||||
### ICDAR2015
|
||||
|
||||
| Method | Backbone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :----------------------------: | :------------------------------: | :--------------------------------------: | :-------------: | :------------: | :-----: | :-------: | :-------: | :----: | :----: | :------------------------------: |
|
||||
| [DBNet_r18](/configs/textdet/dbnet/dbnet_resnet18_fpnc_1200e_icdar2015.py) | ResNet18 | - | ICDAR2015 Train | ICDAR2015 Test | 1200 | 736 | 0.8853 | 0.7583 | 0.8169 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_1200e_icdar2015/dbnet_resnet18_fpnc_1200e_icdar2015_20220825_221614-7c0e94f2.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_1200e_icdar2015/20220825_221614.log) |
|
||||
| [DBNet_r50](/configs/textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py) | ResNet50 | - | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.8744 | 0.8276 | 0.8504 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50_1200e_icdar2015/dbnet_resnet50_1200e_icdar2015_20221102_115917-54f50589.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50_1200e_icdar2015/20221102_115917.log) |
|
||||
| [DBNet_r50](/configs/textdet/dbnet/dbnet_resnet50_1200e_icdar2015.py) | ResNet50 | - | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.8744 | 0.8276 | 0.8504 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50_1200e_icdar2015/dbnet_resnet50_1200e_icdar2015_20221102_115917-54f50589.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50_1200e_icdar2015/20221102_115917.log) |
|
||||
| [DBNet_r50dcn](/configs/textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py) | ResNet50-DCN | [Synthtext](https://download.openmmlab.com/mmocr/textdet/dbnet/tmp_1.0_pretrain/dbnet_r50dcnv2_fpnc_sbn_2e_synthtext_20210325-ed322016.pth) | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.8784 | 0.8315 | 0.8543 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015_20220828_124917-452c443c.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015/20220828_124917.log) |
|
||||
| [DBNet_r50-oclip](/configs/textdet/dbnet/dbnet_resnet50-oclip_fpnc_1200e_icdar2015.py) | [ResNet50-oCLIP](https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth) | - | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9052 | 0.8272 | 0.8644 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015/dbnet_resnet50-oclip_1200e_icdar2015_20221102_115917-bde8c87a.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015/20221102_115917.log) |
|
||||
| [DBNet_r50-oclip](/configs/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015.py) | [ResNet50-oCLIP](https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth) | - | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9052 | 0.8272 | 0.8644 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015/dbnet_resnet50-oclip_1200e_icdar2015_20221102_115917-bde8c87a.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015/20221102_115917.log) |
|
||||
|
||||
### Total Text
|
||||
|
||||
| Method | Backbone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :----------------------------------------------------: | :------: | :--------------: | :-------------: | :------------: | :-----: | :-------: | :-------: | :----: | :----: | :------------------------------------------------------: |
|
||||
| [DBNet_r18](/configs/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext.py) | ResNet18 | - | Totaltext Train | Totaltext Test | 1200 | 736 | 0.8640 | 0.7770 | 0.8182 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext/dbnet_resnet18_fpnc_1200e_totaltext-3ed3233c.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext/20221219_201038.log) |
|
||||
|
||||
## Citation
|
||||
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
file_client_args = dict(backend='disk')
|
||||
|
||||
model = dict(
|
||||
type='DBNet',
|
||||
backbone=dict(
|
||||
|
@ -27,10 +25,7 @@ model = dict(
|
|||
pad_size_divisor=32))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
|
@ -55,10 +50,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1333, 736), keep_ratio=True),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
file_client_args = dict(backend='disk')
|
||||
|
||||
model = dict(
|
||||
type='DBNet',
|
||||
backbone=dict(
|
||||
|
@ -29,10 +27,7 @@ model = dict(
|
|||
pad_size_divisor=32))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_bbox=True,
|
||||
|
@ -57,10 +52,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(4068, 1024), keep_ratio=True),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
|
|
|
@ -1,15 +1,39 @@
|
|||
_base_ = [
|
||||
'_base_dbnet_resnet18_fpnc.py',
|
||||
'../_base_/datasets/synthtext.py',
|
||||
'../_base_/default_runtime.py',
|
||||
'../_base_/pretrain_runtime.py',
|
||||
'../_base_/schedules/schedule_sgd_100k.py',
|
||||
]
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(type='FixInvalidPolygon'),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5),
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
|
||||
dict(type='RandomCrop', min_side_ratio=0.1),
|
||||
dict(type='Resize', scale=(640, 640), keep_ratio=True),
|
||||
dict(type='Pad', size=(640, 640)),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape'))
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
synthtext_textdet_train = _base_.synthtext_textdet_train
|
||||
synthtext_textdet_train.pipeline = _base_.train_pipeline
|
||||
synthtext_textdet_test = _base_.synthtext_textdet_test
|
||||
synthtext_textdet_test.pipeline = _base_.test_pipeline
|
||||
synthtext_textdet_train.pipeline = train_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
|
@ -18,13 +42,4 @@ train_dataloader = dict(
|
|||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=synthtext_textdet_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=synthtext_textdet_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
auto_scale_lr = dict(base_batch_size=16)
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
_base_ = [
|
||||
'_base_dbnet_resnet18_fpnc.py',
|
||||
'../_base_/datasets/totaltext.py',
|
||||
'../_base_/default_runtime.py',
|
||||
'../_base_/schedules/schedule_sgd_1200e.py',
|
||||
]
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(type='FixInvalidPolygon', min_poly_points=4),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5),
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
|
||||
dict(type='RandomCrop', min_side_ratio=0.1),
|
||||
dict(type='Resize', scale=(640, 640), keep_ratio=True),
|
||||
dict(type='Pad', size=(640, 640)),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape'))
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1333, 736), keep_ratio=True),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(type='FixInvalidPolygon', min_poly_points=4),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
totaltext_textdet_train = _base_.totaltext_textdet_train
|
||||
totaltext_textdet_test = _base_.totaltext_textdet_test
|
||||
totaltext_textdet_train.pipeline = train_pipeline
|
||||
totaltext_textdet_test.pipeline = test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=16,
|
||||
pin_memory=True,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=totaltext_textdet_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
pin_memory=True,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=totaltext_textdet_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
auto_scale_lr = dict(base_batch_size=16)
|
|
@ -16,6 +16,7 @@ Collections:
|
|||
|
||||
Models:
|
||||
- Name: dbnet_resnet18_fpnc_1200e_icdar2015
|
||||
Alias: DB_r18
|
||||
In Collection: DBNet
|
||||
Config: configs/textdet/dbnet/dbnet_resnet18_fpnc_1200e_icdar2015.py
|
||||
Metadata:
|
||||
|
@ -53,7 +54,10 @@ Models:
|
|||
|
||||
- Name: dbnet_resnet50-oclip_fpnc_1200e_icdar2015
|
||||
In Collection: DBNet
|
||||
Config: configs/textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py
|
||||
Alias:
|
||||
- DB_r50
|
||||
- DBNet
|
||||
Config: configs/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015.py
|
||||
Metadata:
|
||||
Training Data: ICDAR2015
|
||||
Results:
|
||||
|
@ -62,3 +66,15 @@ Models:
|
|||
Metrics:
|
||||
hmean-iou: 0.8644
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015/dbnet_resnet50-oclip_1200e_icdar2015_20221102_115917-bde8c87a.pth
|
||||
|
||||
- Name: dbnet_resnet18_fpnc_1200e_totaltext
|
||||
In Collection: DBNet
|
||||
Config: configs/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext.py
|
||||
Metadata:
|
||||
Training Data: Totaltext
|
||||
Results:
|
||||
- Task: Text Detection
|
||||
Dataset: Totaltext
|
||||
Metrics:
|
||||
hmean-iou: 0.8182
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext/dbnet_resnet18_fpnc_1200e_totaltext-3ed3233c.pth
|
||||
|
|
|
@ -14,12 +14,18 @@ Recently, segmentation-based scene text detection methods have drawn extensive a
|
|||
|
||||
## Results and models
|
||||
|
||||
### SynthText
|
||||
|
||||
| Method | BackBone | Training set | #iters | Download |
|
||||
| :--------------------------------------------------------------------------------: | :------------: | :----------: | :-----: | :-----------------------------------------------------------------------------------: |
|
||||
| [DBNetpp_r50dcn](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py) | ResNet50-dcnv2 | SynthText | 100,000 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext-00f0a80b.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext/20221215_013531.log) |
|
||||
|
||||
### ICDAR2015
|
||||
|
||||
| Method | BackBone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :----------------------------: | :------------------------------: | :--------------------------------------: | :-------------: | :------------: | :-----: | :-------: | :-------: | :----: | :----: | :------------------------------: |
|
||||
| [DBNetpp_r50](/configs/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015.py) | ResNet50 | - | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9079 | 0.8209 | 0.8622 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015/dbnetpp_resnet50_fpnc_1200e_icdar2015_20221025_185550-013730aa.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015/20221025_185550.log) |
|
||||
| [DBNetpp_r50dcn](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py) | ResNet50 | [Synthtext](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py) ([model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/tmp_1.0_pretrain/dbnetpp_r50dcnv2_fpnc_100k_iter_synthtext-20220502-352fec8a.pth)) | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9116 | 0.8291 | 0.8684 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015_20220829_230108-f289bd20.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/20220829_230108.log) |
|
||||
| [DBNetpp_r50](/configs/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015.py) | ResNet50 | - | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9079 | 0.8209 | 0.8622 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015/dbnetpp_resnet50_fpnc_1200e_icdar2015_20221025_185550-013730aa.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015/20221025_185550.log) |
|
||||
| [DBNetpp_r50dcn](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py) | ResNet50-dcnv2 | [Synthtext](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py) ([model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/tmp_1.0_pretrain/dbnetpp_r50dcnv2_fpnc_100k_iter_synthtext-20220502-352fec8a.pth)) | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9116 | 0.8291 | 0.8684 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015_20220829_230108-f289bd20.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/20220829_230108.log) |
|
||||
| [DBNetpp_r50-oclip](/configs/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015.py) | [ResNet50-oCLIP](https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth) | - | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9174 | 0.8609 | 0.8882 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015_20221101_124139-4ecb39ac.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015/20221101_124139.log) |
|
||||
|
||||
## Citation
|
||||
|
|
|
@ -1,34 +1,44 @@
|
|||
_base_ = [
|
||||
'_base_dbnetpp_resnet50-dcnv2_fpnc.py',
|
||||
'../_base_/default_runtime.py',
|
||||
'../_base_/pretrain_runtime.py',
|
||||
'../_base_/datasets/synthtext.py',
|
||||
'../_base_/schedules/schedule_sgd_100k.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = [_base_.synthtext_textdet_train]
|
||||
test_list = [_base_.synthtext_textdet_test]
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_bbox=True,
|
||||
with_polygon=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(type='FixInvalidPolygon'),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5),
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
|
||||
dict(type='RandomCrop', min_side_ratio=0.1),
|
||||
dict(type='Resize', scale=(640, 640), keep_ratio=True),
|
||||
dict(type='Pad', size=(640, 640)),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape'))
|
||||
]
|
||||
|
||||
synthtext_textdet_train = _base_.synthtext_textdet_train
|
||||
synthtext_textdet_train.pipeline = train_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='ConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=_base_.train_pipeline))
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type='ConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=_base_.test_pipeline))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
dataset=synthtext_textdet_train)
|
||||
|
||||
auto_scale_lr = dict(base_batch_size=16)
|
||||
|
|
|
@ -17,6 +17,8 @@ Collections:
|
|||
Models:
|
||||
- Name: dbnetpp_resnet50_fpnc_1200e_icdar2015
|
||||
In Collection: DBNetpp
|
||||
Alias:
|
||||
- DBPP_r50
|
||||
Config: configs/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015.py
|
||||
Metadata:
|
||||
Training Data: ICDAR2015
|
||||
|
@ -40,6 +42,8 @@ Models:
|
|||
Weights: https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015_20220829_230108-f289bd20.pth
|
||||
|
||||
- Name: dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015
|
||||
Alias:
|
||||
- DBNetpp
|
||||
In Collection: DBNetpp
|
||||
Config: configs/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015.py
|
||||
Metadata:
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
file_client_args = dict(backend='disk')
|
||||
|
||||
model = dict(
|
||||
type='DRRG',
|
||||
backbone=dict(
|
||||
|
@ -29,10 +27,7 @@ model = dict(
|
|||
pad_size_divisor=32))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_bbox=True,
|
||||
|
@ -82,10 +77,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1024, 640), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
|
|
|
@ -15,6 +15,7 @@ Collections:
|
|||
|
||||
Models:
|
||||
- Name: drrg_resnet50_fpn-unet_1200e_ctw1500
|
||||
Alias: DRRG
|
||||
In Collection: DRRG
|
||||
Config: configs/textdet/drrg/drrg_resnet50_fpn-unet_1200e_ctw1500.py
|
||||
Metadata:
|
||||
|
@ -25,15 +26,3 @@ Models:
|
|||
Metrics:
|
||||
hmean-iou: 0.8467
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/drrg/drrg_resnet50_fpn-unet_1200e_ctw1500/drrg_resnet50_fpn-unet_1200e_ctw1500_20220827_105233-d5c702dd.pth
|
||||
|
||||
- Name: drrg_resnet50-oclip_fpn-unet_1200e_ctw1500
|
||||
In Collection: DRRG
|
||||
Config: configs/textdet/drrg/drrg_resnet50-oclip_fpn-unet_1200e_ctw1500.py
|
||||
Metadata:
|
||||
Training Data: CTW1500
|
||||
Results:
|
||||
- Task: Text Detection
|
||||
Dataset: CTW1500
|
||||
Metrics:
|
||||
hmean-iou:
|
||||
Weights:
|
||||
|
|
|
@ -18,16 +18,22 @@ One of the main challenges for arbitrary-shaped text detection is to design a go
|
|||
|
||||
| Method | Backbone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :------------------------------------: | :---------------------------------------: | :--------------: | :-----------: | :----------: | :-----: | :---------: | :-------: | :----: | :----: | :---------------------------------------: |
|
||||
| [FCENet](/configs/textdet/fcenet/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500.py) | ResNet50 + DCNv2 | - | CTW1500 Train | CTW1500 Test | 1500 | (736, 1080) | 0.8689 | 0.8296 | 0.8488 | [model](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500_20220825_221510-4d705392.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500/20220825_221510.log) |
|
||||
| [FCENet_r50dcn](/configs/textdet/fcenet/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500.py) | ResNet50 + DCNv2 | - | CTW1500 Train | CTW1500 Test | 1500 | (736, 1080) | 0.8689 | 0.8296 | 0.8488 | [model](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500_20220825_221510-4d705392.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500/20220825_221510.log) |
|
||||
| [FCENet_r50-oclip](/configs/textdet/fcenet/fcenet_resnet50-oclip-dcnv2_fpn_1500e_ctw1500.py) | [ResNet50-oCLIP](https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth) | - | CTW1500 Train | CTW1500 Test | 1500 | (736, 1080) | 0.8383 | 0.801 | 0.8192 | [model](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_ctw1500/fcenet_resnet50-oclip_fpn_1500e_ctw1500_20221102_121909-101df7e6.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_ctw1500/20221102_121909.log) |
|
||||
|
||||
### ICDAR2015
|
||||
|
||||
| Method | Backbone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :---------------------------------------------------: | :------------: | :--------------: | :----------: | :-------: | :-----: | :----------: | :-------: | :----: | :----: | :------------------------------------------------------: |
|
||||
| [FCENet](/configs/textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015.py) | ResNet50 | - | IC15 Train | IC15 Test | 1500 | (2260, 2260) | 0.8243 | 0.8834 | 0.8528 | [model](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015/fcenet_resnet50_fpn_1500e_icdar2015_20220826_140941-167d9042.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015/20220826_140941.log) |
|
||||
| [FCENet_r50](/configs/textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015.py) | ResNet50 | - | IC15 Train | IC15 Test | 1500 | (2260, 2260) | 0.8243 | 0.8834 | 0.8528 | [model](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015/fcenet_resnet50_fpn_1500e_icdar2015_20220826_140941-167d9042.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015/20220826_140941.log) |
|
||||
| [FCENet_r50-oclip](/configs/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_icdar2015.py) | ResNet50-oCLIP | - | IC15 Train | IC15 Test | 1500 | (2260, 2260) | 0.9176 | 0.8098 | 0.8604 | [model](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_icdar2015/fcenet_resnet50-oclip_fpn_1500e_icdar2015_20221101_150145-5a6fc412.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_icdar2015/20221101_150145.log) |
|
||||
|
||||
### Total Text
|
||||
|
||||
| Method | Backbone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :---------------------------------------------------: | :------: | :--------------: | :-------------: | :------------: | :-----: | :---------: | :-------: | :----: | :----: | :-----------------------------------------------------: |
|
||||
| [FCENet_r50](/configs/textdet/fcenet/fcenet_resnet50_fpn_1500e_totaltext.py) | ResNet50 | - | Totaltext Train | Totaltext Test | 1500 | (1280, 960) | 0.8485 | 0.7810 | 0.8134 | [model](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50_fpn_1500e_totaltext/fcenet_resnet50_fpn_1500e_totaltext-91bd37af.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50_fpn_1500e_totaltext/20221219_201107.log) |
|
||||
|
||||
## Citation
|
||||
|
||||
```bibtex
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
file_client_args = dict(backend='disk')
|
||||
|
||||
model = dict(
|
||||
type='FCENet',
|
||||
backbone=dict(
|
||||
|
@ -41,10 +39,7 @@ model = dict(
|
|||
pad_size_divisor=32))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
|
@ -96,10 +91,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(2260, 2260), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
|
|
|
@ -12,17 +12,13 @@ param_scheduler = [
|
|||
dict(type='PolyLR', power=0.9, eta_min=1e-7, end=1500),
|
||||
]
|
||||
|
||||
file_client_args = dict(backend='disk')
|
||||
# dataset settings
|
||||
ctw1500_textdet_train = _base_.ctw1500_textdet_train
|
||||
ctw1500_textdet_test = _base_.ctw1500_textdet_test
|
||||
|
||||
# test pipeline for CTW1500
|
||||
ctw_test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1080, 736), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
_base_ = [
|
||||
'_base_fcenet_resnet50_fpn.py',
|
||||
'../_base_/datasets/totaltext.py',
|
||||
'../_base_/default_runtime.py',
|
||||
'../_base_/schedules/schedule_sgd_base.py',
|
||||
]
|
||||
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook',
|
||||
save_best='icdar/hmean',
|
||||
rule='greater',
|
||||
_delete_=True))
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(type='FixInvalidPolygon'),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(800, 800),
|
||||
ratio_range=(0.75, 2.5),
|
||||
keep_ratio=True),
|
||||
dict(
|
||||
type='TextDetRandomCropFlip',
|
||||
crop_ratio=0.5,
|
||||
iter_num=1,
|
||||
min_area_ratio=0.2),
|
||||
dict(
|
||||
type='RandomApply',
|
||||
transforms=[dict(type='RandomCrop', min_side_ratio=0.3)],
|
||||
prob=0.8),
|
||||
dict(
|
||||
type='RandomApply',
|
||||
transforms=[
|
||||
dict(
|
||||
type='RandomRotate',
|
||||
max_angle=30,
|
||||
pad_with_fixed_color=False,
|
||||
use_canvas=True)
|
||||
],
|
||||
prob=0.5),
|
||||
dict(
|
||||
type='RandomChoice',
|
||||
transforms=[[
|
||||
dict(type='Resize', scale=800, keep_ratio=True),
|
||||
dict(type='SourceImagePad', target_scale=800)
|
||||
],
|
||||
dict(type='Resize', scale=800, keep_ratio=False)],
|
||||
prob=[0.6, 0.4]),
|
||||
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5,
|
||||
contrast=0.5),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1280, 960), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True),
|
||||
dict(type='FixInvalidPolygon'),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
optim_wrapper = dict(optimizer=dict(lr=1e-3, weight_decay=5e-4))
|
||||
train_cfg = dict(max_epochs=1500)
|
||||
# learning policy
|
||||
param_scheduler = [
|
||||
dict(type='StepLR', gamma=0.8, step_size=200, end=1200),
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
totaltext_textdet_train = _base_.totaltext_textdet_train
|
||||
totaltext_textdet_test = _base_.totaltext_textdet_test
|
||||
totaltext_textdet_train.pipeline = train_pipeline
|
||||
totaltext_textdet_test.pipeline = test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=16,
|
||||
persistent_workers=True,
|
||||
pin_memory=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=totaltext_textdet_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
pin_memory=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=totaltext_textdet_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
auto_scale_lr = dict(base_batch_size=16)
|
||||
|
||||
find_unused_parameters = True
|
|
@ -16,6 +16,7 @@ Collections:
|
|||
|
||||
Models:
|
||||
- Name: fcenet_resnet50-dcnv2_fpn_1500e_ctw1500
|
||||
Alias: FCE_CTW_DCNv2
|
||||
In Collection: FCENet
|
||||
Config: configs/textdet/fcenet/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500.py
|
||||
Metadata:
|
||||
|
@ -40,6 +41,7 @@ Models:
|
|||
Weights: https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_ctw1500/fcenet_resnet50-oclip_fpn_1500e_ctw1500_20221102_121909-101df7e6.pth
|
||||
|
||||
- Name: fcenet_resnet50_fpn_1500e_icdar2015
|
||||
Alias: FCE_IC15
|
||||
In Collection: FCENet
|
||||
Config: configs/textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015.py
|
||||
Metadata:
|
||||
|
@ -52,6 +54,7 @@ Models:
|
|||
Weights: https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015/fcenet_resnet50_fpn_1500e_icdar2015_20220826_140941-167d9042.pth
|
||||
|
||||
- Name: fcenet_resnet50-oclip_fpn_1500e_icdar2015
|
||||
Alias: FCENet
|
||||
In Collection: FCENet
|
||||
Config: configs/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_icdar2015.py
|
||||
Metadata:
|
||||
|
@ -62,3 +65,15 @@ Models:
|
|||
Metrics:
|
||||
hmean-iou: 0.8604
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_icdar2015/fcenet_resnet50-oclip_fpn_1500e_icdar2015_20221101_150145-5a6fc412.pth
|
||||
|
||||
- Name: fcenet_resnet50_fpn_1500e_totaltext
|
||||
In Collection: FCENet
|
||||
Config: configs/textdet/fcenet/fcenet_resnet50_fpn_1500e_totaltext.py
|
||||
Metadata:
|
||||
Training Data: Totaltext
|
||||
Results:
|
||||
- Task: Text Detection
|
||||
Dataset: Totaltext
|
||||
Metrics:
|
||||
hmean-iou: 0.8134
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50_fpn_1500e_totaltext/fcenet_resnet50_fpn_1500e_totaltext-91bd37af.pth
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
_base_ = ['mmdet::_base_/models/mask-rcnn_r50_fpn.py']
|
||||
|
||||
file_client_args = dict(backend='disk')
|
||||
|
||||
mask_rcnn = _base_.pop('model')
|
||||
# Adapt Mask R-CNN model to OCR task
|
||||
mask_rcnn.update(
|
||||
|
@ -18,10 +16,7 @@ mask_rcnn.update(
|
|||
model = dict(type='MMDetWrapper', text_repr_type='poly', cfg=mask_rcnn)
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
|
@ -49,10 +44,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1920, 1920), keep_ratio=True),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
|
|
|
@ -20,10 +20,7 @@ ctw1500_textdet_test = _base_.ctw1500_textdet_test
|
|||
|
||||
# test pipeline for CTW1500
|
||||
ctw_test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=dict(backend='disk'),
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1600, 1600), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
|
|
|
@ -18,6 +18,7 @@ Collections:
|
|||
Models:
|
||||
- Name: mask-rcnn_resnet50_fpn_160e_ctw1500
|
||||
In Collection: Mask R-CNN
|
||||
Alias: MaskRCNN_CTW
|
||||
Config: configs/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_ctw1500.py
|
||||
Metadata:
|
||||
Training Data: CTW1500
|
||||
|
@ -25,7 +26,7 @@ Models:
|
|||
- Task: Text Detection
|
||||
Dataset: CTW1500
|
||||
Metrics:
|
||||
hmean: 0.7458
|
||||
hmean-iou: 0.7458
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_ctw1500/mask-rcnn_resnet50_fpn_160e_ctw1500_20220826_154755-ce68ee8e.pth
|
||||
|
||||
- Name: mask-rcnn_resnet50-oclip_fpn_160e_ctw1500
|
||||
|
@ -37,11 +38,12 @@ Models:
|
|||
- Task: Text Detection
|
||||
Dataset: CTW1500
|
||||
Metrics:
|
||||
hmean: 0.7562
|
||||
hmean-iou: 0.7562
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_ctw1500/mask-rcnn_resnet50-oclip_fpn_160e_ctw1500_20221101_154448-6e9e991c.pth
|
||||
|
||||
- Name: mask-rcnn_resnet50_fpn_160e_icdar2015
|
||||
In Collection: Mask R-CNN
|
||||
Alias: MaskRCNN_IC15
|
||||
Config: configs/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015.py
|
||||
Metadata:
|
||||
Training Data: ICDAR2015
|
||||
|
@ -49,11 +51,12 @@ Models:
|
|||
- Task: Text Detection
|
||||
Dataset: ICDAR2015
|
||||
Metrics:
|
||||
hmean: 0.8182
|
||||
hmean-iou: 0.8182
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015/mask-rcnn_resnet50_fpn_160e_icdar2015_20220826_154808-ff5c30bf.pth
|
||||
|
||||
- Name: mask-rcnn_resnet50-oclip_fpn_160e_icdar2015
|
||||
In Collection: Mask R-CNN
|
||||
Alias: MaskRCNN
|
||||
Config: configs/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_icdar2015.py
|
||||
Metadata:
|
||||
Training Data: ICDAR2015
|
||||
|
@ -61,5 +64,5 @@ Models:
|
|||
- Task: Text Detection
|
||||
Dataset: ICDAR2015
|
||||
Metrics:
|
||||
hmean: 0.8513
|
||||
hmean-iou: 0.8513
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_icdar2015/mask-rcnn_resnet50-oclip_fpn_160e_icdar2015_20221101_131357-a19f7802.pth
|
||||
|
|
|
@ -32,12 +32,8 @@ model = dict(
|
|||
),
|
||||
postprocessor=dict(type='PANPostprocessor', text_repr_type='quad')))
|
||||
|
||||
file_client_args = dict(backend='disk')
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
|
@ -60,10 +56,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
# TODO Replace with mmcv.RescaleToShort when it's ready
|
||||
dict(
|
||||
type='ShortScaleAspectJitter',
|
||||
|
|
|
@ -15,6 +15,7 @@ Collections:
|
|||
|
||||
Models:
|
||||
- Name: panet_resnet18_fpem-ffm_600e_ctw1500
|
||||
Alias: PANet_CTW
|
||||
In Collection: PANet
|
||||
Config: configs/textdet/panet/panet_resnet18_fpem-ffm_600e_ctw1500.py
|
||||
Metadata:
|
||||
|
@ -27,6 +28,7 @@ Models:
|
|||
Weights: https://download.openmmlab.com/mmocr/textdet/panet/panet_resnet18_fpem-ffm_600e_ctw1500/panet_resnet18_fpem-ffm_600e_ctw1500_20220826_144818-980f32d0.pth
|
||||
|
||||
- Name: panet_resnet18_fpem-ffm_600e_icdar2015
|
||||
Alias: PANet_IC15
|
||||
In Collection: PANet
|
||||
Config: configs/textdet/panet/panet_resnet18_fpem-ffm_600e_icdar2015.py
|
||||
Metadata:
|
||||
|
|
|
@ -9,12 +9,8 @@ model = dict(det_head=dict(module_loss=dict(shrink_ratio=(1, 0.7))))
|
|||
|
||||
default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=20), )
|
||||
|
||||
file_client_args = dict(backend='disk')
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
|
@ -37,10 +33,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
# TODO Replace with mmcv.RescaleToShort when it's ready
|
||||
dict(
|
||||
type='ShortScaleAspectJitter',
|
||||
|
|
|
@ -7,12 +7,8 @@ _base_ = [
|
|||
|
||||
default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=20), )
|
||||
|
||||
file_client_args = dict(backend='disk')
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
|
@ -35,10 +31,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
# TODO Replace with mmcv.RescaleToShort when it's ready
|
||||
dict(
|
||||
type='ShortScaleAspectJitter',
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
file_client_args = dict(backend='disk')
|
||||
|
||||
model = dict(
|
||||
type='PSENet',
|
||||
backbone=dict(
|
||||
|
@ -32,10 +30,7 @@ model = dict(
|
|||
pad_size_divisor=32))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
|
@ -58,10 +53,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(2240, 2240), keep_ratio=True),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
|
|
|
@ -16,6 +16,7 @@ Collections:
|
|||
|
||||
Models:
|
||||
- Name: psenet_resnet50_fpnf_600e_ctw1500
|
||||
Alias: PS_CTW
|
||||
In Collection: PSENet
|
||||
Config: configs/textdet/psenet/psenet_resnet50_fpnf_600e_ctw1500.py
|
||||
Metadata:
|
||||
|
@ -40,6 +41,7 @@ Models:
|
|||
Weights: https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50-oclip_fpnf_600e_ctw1500/psenet_resnet50-oclip_fpnf_600e_ctw1500_20221101_140406-d431710d.pth
|
||||
|
||||
- Name: psenet_resnet50_fpnf_600e_icdar2015
|
||||
Alias: PS_IC15
|
||||
In Collection: PSENet
|
||||
Config: configs/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015.py
|
||||
Metadata:
|
||||
|
@ -49,9 +51,10 @@ Models:
|
|||
Dataset: ICDAR2015
|
||||
Metrics:
|
||||
hmean-iou: 0.7998
|
||||
Weights:
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015/psenet_resnet50_fpnf_600e_icdar2015_20220825_222709-b6741ec3.pth
|
||||
|
||||
- Name: psenet_resnet50-oclip_fpnf_600e_icdar2015
|
||||
Alias: PSENet
|
||||
In Collection: PSENet
|
||||
Config: configs/textdet/psenet/psenet_resnet50-oclip_fpnf_600e_icdar2015.py
|
||||
Metadata:
|
||||
|
|
|
@ -17,10 +17,7 @@ ctw1500_textdet_train = _base_.ctw1500_textdet_train
|
|||
ctw1500_textdet_test = _base_.ctw1500_textdet_test
|
||||
|
||||
test_pipeline_ctw = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=_base_.file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1280, 1280), keep_ratio=True),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
file_client_args = dict(backend='disk')
|
||||
|
||||
model = dict(
|
||||
type='TextSnake',
|
||||
backbone=dict(
|
||||
|
@ -28,10 +26,7 @@ model = dict(
|
|||
pad_size_divisor=32))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_bbox=True,
|
||||
|
@ -72,10 +67,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1333, 736), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
|
|
|
@ -27,6 +27,7 @@ Models:
|
|||
Weights: https://download.openmmlab.com/mmocr/textdet/textsnake/textsnake_resnet50_fpn-unet_1200e_ctw1500/textsnake_resnet50_fpn-unet_1200e_ctw1500_20220825_221459-c0b6adc4.pth
|
||||
|
||||
- Name: textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500
|
||||
Alias: TextSnake
|
||||
In Collection: TextSnake
|
||||
Config: configs/textdet/textsnake/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500.py
|
||||
Metadata:
|
||||
|
|
|
@ -1,17 +1,13 @@
|
|||
mjsynth_textrecog_data_root = 'data/rec/Syn90k/'
|
||||
mjsynth_textrecog_data_root = 'data/mjsynth'
|
||||
|
||||
mjsynth_textrecog_test = dict(
|
||||
mjsynth_textrecog_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=mjsynth_textrecog_data_root,
|
||||
data_prefix=dict(img_path='mnt/ramdisk/max/90kDICT32px'),
|
||||
ann_file='train_labels.json',
|
||||
test_mode=False,
|
||||
ann_file='textrecog_train.json',
|
||||
pipeline=None)
|
||||
|
||||
mjsynth_sub_textrecog_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=mjsynth_textrecog_data_root,
|
||||
data_prefix=dict(img_path='mnt/ramdisk/max/90kDICT32px'),
|
||||
ann_file='subset_train_labels.json',
|
||||
test_mode=False,
|
||||
ann_file='subset_textrecog_train.json',
|
||||
pipeline=None)
|
||||
|
|
|
@ -1,25 +1,19 @@
|
|||
synthtext_textrecog_data_root = 'data/rec/SynthText/'
|
||||
synthtext_textrecog_data_root = 'data/synthtext'
|
||||
|
||||
synthtext_textrecog_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=synthtext_textrecog_data_root,
|
||||
data_prefix=dict(img_path='synthtext/SynthText_patch_horizontal'),
|
||||
ann_file='train_labels.json',
|
||||
test_mode=False,
|
||||
pipeline=None)
|
||||
|
||||
synthtext_an_textrecog_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=synthtext_textrecog_data_root,
|
||||
data_prefix=dict(img_path='synthtext/SynthText_patch_horizontal'),
|
||||
ann_file='alphanumeric_train_labels.json',
|
||||
test_mode=False,
|
||||
ann_file='textrecog_train.json',
|
||||
pipeline=None)
|
||||
|
||||
synthtext_sub_textrecog_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=synthtext_textrecog_data_root,
|
||||
data_prefix=dict(img_path='synthtext/SynthText_patch_horizontal'),
|
||||
ann_file='subset_train_labels.json',
|
||||
test_mode=False,
|
||||
ann_file='subset_textrecog_train.json',
|
||||
pipeline=None)
|
||||
|
||||
synthtext_an_textrecog_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=synthtext_textrecog_data_root,
|
||||
ann_file='alphanumeric_textrecog_train.json',
|
||||
pipeline=None)
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
default_scope = 'mmocr'
|
||||
env_cfg = dict(
|
||||
cudnn_benchmark=True,
|
||||
cudnn_benchmark=False,
|
||||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||
dist_cfg=dict(backend='nccl'),
|
||||
)
|
||||
|
@ -46,3 +46,5 @@ visualizer = dict(
|
|||
type='TextRecogLocalVisualizer',
|
||||
name='visualizer',
|
||||
vis_backends=vis_backends)
|
||||
|
||||
tta_model = dict(type='EncoderDecoderRecognizerTTAModel')
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
# optimizer
|
||||
optim_wrapper = dict(
|
||||
type='OptimWrapper',
|
||||
optimizer=dict(
|
||||
type='AdamW',
|
||||
lr=4e-4,
|
||||
betas=(0.9, 0.999),
|
||||
eps=1e-08,
|
||||
weight_decay=0.05))
|
||||
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=6, val_interval=1)
|
||||
val_cfg = dict(type='ValLoop')
|
||||
test_cfg = dict(type='TestLoop')
|
||||
|
||||
# learning policy
|
||||
param_scheduler = [
|
||||
dict(
|
||||
type='CosineAnnealingLR',
|
||||
T_max=6,
|
||||
eta_min=4e-6,
|
||||
convert_to_iter_based=True)
|
||||
]
|
|
@ -38,7 +38,9 @@ Linguistic knowledge is of great benefit to scene text recognition. However, how
|
|||
| :--------------------------------------------: | :------------------------------------------------: | :----: | :----------: | :-------: | :-------: | :------------: | :----: | :----------------------------------------------- |
|
||||
| | | IIIT5K | SVT | IC13-1015 | IC15-2077 | SVTP | CT80 | |
|
||||
| [ABINet-Vision](/configs/textrecog/abinet/abinet-vision_20e_st-an_mj.py) | - | 0.9523 | 0.9196 | 0.9369 | 0.7896 | 0.8403 | 0.8437 | [model](https://download.openmmlab.com/mmocr/textrecog/abinet/abinet-vision_20e_st-an_mj/abinet-vision_20e_st-an_mj_20220915_152445-85cfb03d.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/abinet/abinet-vision_20e_st-an_mj/20220915_152445.log) |
|
||||
| [ABINet-Vision-TTA](/configs/textrecog/abinet/abinet-vision_20e_st-an_mj.py) | - | 0.9523 | 0.9196 | 0.9360 | 0.8175 | 0.8450 | 0.8542 | |
|
||||
| [ABINet](/configs/textrecog/abinet/abinet_20e_st-an_mj.py) | [Pretrained](https://download.openmmlab.com/mmocr/textrecog/abinet/abinet_pretrain-45deac15.pth) | 0.9603 | 0.9397 | 0.9557 | 0.8146 | 0.8868 | 0.8785 | [model](https://download.openmmlab.com/mmocr/textrecog/abinet/abinet_20e_st-an_mj/abinet_20e_st-an_mj_20221005_012617-ead8c139.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/abinet/abinet_20e_st-an_mj/20221005_012617.log) |
|
||||
| [ABINet-TTA](/configs/textrecog/abinet/abinet_20e_st-an_mj.py) | [Pretrained](https://download.openmmlab.com/mmocr/textrecog/abinet/abinet_pretrain-45deac15.pth) | 0.9597 | 0.9397 | 0.9527 | 0.8426 | 0.8930 | 0.8854 | |
|
||||
|
||||
```{note}
|
||||
1. ABINet allows its encoder to run and be trained without decoder and fuser. Its encoder is designed to recognize texts as a stand-alone model and therefore can work as an independent text recognizer. We release it as ABINet-Vision.
|
||||
|
|
|
@ -39,14 +39,8 @@ model = dict(
|
|||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375]))
|
||||
|
||||
file_client_args = dict(backend='disk')
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
ignore_empty=True,
|
||||
min_size=2),
|
||||
dict(type='LoadImageFromFile', ignore_empty=True, min_size=2),
|
||||
dict(type='LoadOCRAnnotations', with_text=True),
|
||||
dict(type='Resize', scale=(128, 32)),
|
||||
dict(
|
||||
|
@ -86,7 +80,7 @@ train_pipeline = [
|
|||
type='mmdet.Albu',
|
||||
transforms=[
|
||||
dict(type='GaussNoise', var_limit=(20, 20), p=0.5),
|
||||
dict(type='MotionBlur', blur_limit=6, p=0.5),
|
||||
dict(type='MotionBlur', blur_limit=7, p=0.5),
|
||||
]),
|
||||
]),
|
||||
dict(
|
||||
|
@ -107,7 +101,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(128, 32)),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
|
@ -116,3 +110,50 @@ test_pipeline = [
|
|||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
|
||||
]
|
||||
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=0, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=1, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=3, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
],
|
||||
[dict(type='Resize', scale=(128, 32))],
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
[dict(type='LoadOCRAnnotations', with_text=True)],
|
||||
[
|
||||
dict(
|
||||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape',
|
||||
'valid_ratio'))
|
||||
]
|
||||
])
|
||||
]
|
||||
|
|
|
@ -24,7 +24,7 @@ param_scheduler = [
|
|||
|
||||
# dataset settings
|
||||
train_list = [
|
||||
_base_.mjsynth_textrecog_test, _base_.synthtext_an_textrecog_train
|
||||
_base_.mjsynth_textrecog_train, _base_.synthtext_an_textrecog_train
|
||||
]
|
||||
test_list = [
|
||||
_base_.cute80_textrecog_test, _base_.iiit5k_textrecog_test,
|
||||
|
|
|
@ -26,7 +26,7 @@ param_scheduler = [
|
|||
|
||||
# dataset settings
|
||||
train_list = [
|
||||
_base_.mjsynth_textrecog_test, _base_.synthtext_an_textrecog_train
|
||||
_base_.mjsynth_textrecog_train, _base_.synthtext_an_textrecog_train
|
||||
]
|
||||
test_list = [
|
||||
_base_.cute80_textrecog_test, _base_.iiit5k_textrecog_test,
|
||||
|
|
|
@ -34,6 +34,7 @@ Collections:
|
|||
|
||||
Models:
|
||||
- Name: abinet-vision_20e_st-an_mj
|
||||
Alias: ABINet_Vision
|
||||
In Collection: ABINet-vision
|
||||
Config: configs/textrecog/abinet/abinet-vision_20e_st-an_mj.py
|
||||
Metadata:
|
||||
|
@ -67,6 +68,7 @@ Models:
|
|||
word_acc: 0.8437
|
||||
Weights: https://download.openmmlab.com/mmocr/textrecog/abinet/abinet-vision_20e_st-an_mj/abinet-vision_20e_st-an_mj_20220915_152445-85cfb03d.pth
|
||||
- Name: abinet_20e_st-an_mj
|
||||
Alias: ABINet
|
||||
In Collection: ABINet
|
||||
Config: configs/textrecog/abinet/abinet_20e_st-an_mj.py
|
||||
Metadata:
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
# ASTER
|
||||
|
||||
> [ASTER: An Attentional Scene Text Recognizer with Flexible Rectification](https://ieeexplore.ieee.org/abstract/document/8395027/)
|
||||
|
||||
<!-- [ALGORITHM] -->
|
||||
|
||||
## Abstract
|
||||
|
||||
A challenging aspect of scene text recognition is to handle text with distortions or irregular layout. In particular, perspective text and curved text are common in natural scenes and are difficult to recognize. In this work, we introduce ASTER, an end-to-end neural network model that comprises a rectification network and a recognition network. The rectification network adaptively transforms an input image into a new one, rectifying the text in it. It is powered by a flexible Thin-Plate Spline transformation which handles a variety of text irregularities and is trained without human annotations. The recognition network is an attentional sequence-to-sequence model that predicts a character sequence directly from the rectified image. The whole model is trained end to end, requiring only images and their groundtruth text. Through extensive experiments, we verify the effectiveness of the rectification and demonstrate the state-of-the-art recognition performance of ASTER. Furthermore, we demonstrate that ASTER is a powerful component in end-to-end recognition systems, for its ability to enhance the detector.
|
||||
|
||||
<div align=center>
|
||||
<img src="https://user-images.githubusercontent.com/65173622/207841597-fcd596cf-20eb-42db-9108-21e586dd9109.png"/>
|
||||
</div>
|
||||
|
||||
## Dataset
|
||||
|
||||
### Train Dataset
|
||||
|
||||
| trainset | instance_num | repeat_num | note |
|
||||
| :-------: | :----------: | :--------: | :----------: |
|
||||
| Syn90k | 8919273 | 1 | synth |
|
||||
| SynthText | 7239272 | 1 | alphanumeric |
|
||||
|
||||
### Test Dataset
|
||||
|
||||
| testset | instance_num | note |
|
||||
| :-----: | :----------: | :-------: |
|
||||
| IIIT5K | 3000 | regular |
|
||||
| SVT | 647 | regular |
|
||||
| IC13 | 1015 | regular |
|
||||
| IC15 | 2077 | irregular |
|
||||
| SVTP | 645 | irregular |
|
||||
| CT80 | 288 | irregular |
|
||||
|
||||
## Results and models
|
||||
|
||||
| Methods | Backbone | | Regular Text | | | | Irregular Text | | download |
|
||||
| :--------------------------------------------------------------: | :------: | :----: | :----------: | :-------: | :-: | :-------: | :------------: | :----: | :-------------------------------------------------------------------: |
|
||||
| | | IIIT5K | SVT | IC13-1015 | | IC15-2077 | SVTP | CT80 | |
|
||||
| [ASTER](/configs/textrecog/aster/aster_resnet45_6e_st_mj.py) | ResNet45 | 0.9357 | 0.8949 | 0.9281 | | 0.7665 | 0.8062 | 0.8507 | [model](https://download.openmmlab.com/mmocr/textrecog/aster/aster_resnet45_6e_st_mj/aster_resnet45_6e_st_mj-cc56eca4.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/aster/aster_resnet45_6e_st_mj/20221214_232605.log) |
|
||||
| [ASTER-TTA](/configs/textrecog/aster/aster_resnet45_6e_st_mj.py) | ResNet45 | 0.9337 | 0.8949 | 0.9251 | | 0.7925 | 0.8109 | 0.8507 | |
|
||||
|
||||
## Citation
|
||||
|
||||
```bibtex
|
||||
@article{shi2018aster,
|
||||
title={Aster: An attentional scene text recognizer with flexible rectification},
|
||||
author={Shi, Baoguang and Yang, Mingkun and Wang, Xinggang and Lyu, Pengyuan and Yao, Cong and Bai, Xiang},
|
||||
journal={IEEE transactions on pattern analysis and machine intelligence},
|
||||
volume={41},
|
||||
number={9},
|
||||
pages={2035--2048},
|
||||
year={2018},
|
||||
publisher={IEEE}
|
||||
}
|
||||
```
|
|
@ -0,0 +1,104 @@
|
|||
dictionary = dict(
|
||||
type='Dictionary',
|
||||
dict_file='{{ fileDirname }}/../../../dicts/english_digits_symbols.txt',
|
||||
with_padding=True,
|
||||
with_unknown=True,
|
||||
same_start_end=True,
|
||||
with_start=True,
|
||||
with_end=True)
|
||||
|
||||
model = dict(
|
||||
type='ASTER',
|
||||
preprocessor=dict(
|
||||
type='STN',
|
||||
in_channels=3,
|
||||
resized_image_size=(32, 64),
|
||||
output_image_size=(32, 100),
|
||||
num_control_points=20),
|
||||
backbone=dict(
|
||||
type='ResNet',
|
||||
in_channels=3,
|
||||
stem_channels=[32],
|
||||
block_cfgs=dict(type='BasicBlock', use_conv1x1='True'),
|
||||
arch_layers=[3, 4, 6, 6, 3],
|
||||
arch_channels=[32, 64, 128, 256, 512],
|
||||
strides=[(2, 2), (2, 2), (2, 1), (2, 1), (2, 1)],
|
||||
init_cfg=[
|
||||
dict(type='Kaiming', layer='Conv2d'),
|
||||
dict(type='Constant', val=1, layer='BatchNorm2d'),
|
||||
]),
|
||||
encoder=dict(type='ASTEREncoder', in_channels=512),
|
||||
decoder=dict(
|
||||
type='ASTERDecoder',
|
||||
max_seq_len=25,
|
||||
in_channels=512,
|
||||
emb_dims=512,
|
||||
attn_dims=512,
|
||||
hidden_size=512,
|
||||
postprocessor=dict(type='AttentionPostprocessor'),
|
||||
module_loss=dict(
|
||||
type='CEModuleLoss', flatten=True, ignore_first_char=True),
|
||||
dictionary=dictionary,
|
||||
),
|
||||
data_preprocessor=dict(
|
||||
type='TextRecogDataPreprocessor',
|
||||
mean=[127.5, 127.5, 127.5],
|
||||
std=[127.5, 127.5, 127.5]))
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', ignore_empty=True, min_size=5),
|
||||
dict(type='LoadOCRAnnotations', with_text=True),
|
||||
dict(type='Resize', scale=(256, 64)),
|
||||
dict(
|
||||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(256, 64)),
|
||||
dict(type='LoadOCRAnnotations', with_text=True),
|
||||
dict(
|
||||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio',
|
||||
'instances'))
|
||||
]
|
||||
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[[
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=0, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=1, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=3, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"),
|
||||
], [dict(type='Resize', scale=(256, 64))],
|
||||
[dict(type='LoadOCRAnnotations', with_text=True)],
|
||||
[
|
||||
dict(
|
||||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape',
|
||||
'valid_ratio', 'instances'))
|
||||
]])
|
||||
]
|
|
@ -0,0 +1,57 @@
|
|||
# training schedule for 1x
|
||||
_base_ = [
|
||||
'_base_aster.py',
|
||||
'../_base_/datasets/mjsynth.py',
|
||||
'../_base_/datasets/synthtext.py',
|
||||
'../_base_/datasets/cute80.py',
|
||||
'../_base_/datasets/iiit5k.py',
|
||||
'../_base_/datasets/svt.py',
|
||||
'../_base_/datasets/svtp.py',
|
||||
'../_base_/datasets/icdar2013.py',
|
||||
'../_base_/datasets/icdar2015.py',
|
||||
'../_base_/default_runtime.py',
|
||||
'../_base_/schedules/schedule_adamw_cos_6e.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = [
|
||||
_base_.mjsynth_textrecog_train,
|
||||
_base_.synthtext_textrecog_train,
|
||||
]
|
||||
test_list = [
|
||||
_base_.cute80_textrecog_test, _base_.iiit5k_textrecog_test,
|
||||
_base_.svt_textrecog_test, _base_.svtp_textrecog_test,
|
||||
_base_.icdar2013_textrecog_test, _base_.icdar2015_textrecog_test
|
||||
]
|
||||
|
||||
default_hooks = dict(logger=dict(type='LoggerHook', interval=50))
|
||||
|
||||
train_dataset = dict(
|
||||
type='ConcatDataset', datasets=train_list, pipeline=_base_.train_pipeline)
|
||||
test_dataset = dict(
|
||||
type='ConcatDataset', datasets=test_list, pipeline=_base_.test_pipeline)
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=1024,
|
||||
num_workers=24,
|
||||
persistent_workers=True,
|
||||
pin_memory=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=train_dataset)
|
||||
|
||||
auto_scale_lr = dict(base_batch_size=1024)
|
||||
|
||||
test_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
pin_memory=True,
|
||||
drop_last=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=test_dataset)
|
||||
|
||||
val_dataloader = test_dataloader
|
||||
|
||||
val_evaluator = dict(
|
||||
dataset_prefixes=['CUTE80', 'IIIT5K', 'SVT', 'SVTP', 'IC13', 'IC15'])
|
||||
test_evaluator = val_evaluator
|
|
@ -0,0 +1,52 @@
|
|||
Collections:
|
||||
- Name: ASTER
|
||||
Metadata:
|
||||
Training Data: OCRDataset
|
||||
Training Techniques:
|
||||
- AdamW
|
||||
Epochs: 6
|
||||
Batch Size: 4096
|
||||
Training Resources: 4 x NVIDIA A100-SXM4-80GB
|
||||
Architecture:
|
||||
- ResNet45
|
||||
- ASTERDecoder
|
||||
Paper:
|
||||
URL: https://ieeexplore.ieee.org/abstract/document/8395027/
|
||||
Title: 'ASTER: An Attentional Scene Text Recognizer with Flexible Rectification'
|
||||
README: configs/textrecog/aster/README.md
|
||||
|
||||
Models:
|
||||
- Name: aster_resnet45_6e_st_mj
|
||||
Alias: ASTER
|
||||
In Collection: ASTER
|
||||
Config: configs/textrecog/aster/aster_resnet45_6e_st_mj.py
|
||||
Metadata:
|
||||
Training Data:
|
||||
- SynthText
|
||||
- Syn90k
|
||||
Results:
|
||||
- Task: Text Recognition
|
||||
Dataset: IIIT5K
|
||||
Metrics:
|
||||
word_acc: 0.9357
|
||||
- Task: Text Recognition
|
||||
Dataset: SVT
|
||||
Metrics:
|
||||
word_acc: 0.8949
|
||||
- Task: Text Recognition
|
||||
Dataset: ICDAR2013
|
||||
Metrics:
|
||||
word_acc: 0.9281
|
||||
- Task: Text Recognition
|
||||
Dataset: ICDAR2015
|
||||
Metrics:
|
||||
word_acc: 0.7665
|
||||
- Task: Text Recognition
|
||||
Dataset: SVTP
|
||||
Metrics:
|
||||
word_acc: 0.8062
|
||||
- Task: Text Recognition
|
||||
Dataset: CT80
|
||||
Metrics:
|
||||
word_acc: 0.8507
|
||||
Weights: https://download.openmmlab.com/mmocr/textrecog/aster/aster_resnet45_6e_st_mj/aster_resnet45_6e_st_mj-cc56eca4.pth
|
|
@ -33,10 +33,11 @@ Image-based sequence recognition has been a long-standing research topic in comp
|
|||
|
||||
## Results and models
|
||||
|
||||
| methods | | Regular Text | | | | Irregular Text | | download |
|
||||
| :----------------------------------------------------: | :----: | :----------: | :-------: | :-: | :-------: | :------------: | :----: | :-------------------------------------------------------------------------------------: |
|
||||
| methods | IIIT5K | SVT | IC13-1015 | | IC15-2077 | SVTP | CT80 | |
|
||||
| [CRNN](/configs/textrecog/crnn/crnn_mini-vgg_5e_mj.py) | 0.8053 | 0.7991 | 0.8739 | | 0.5571 | 0.6093 | 0.5694 | [model](https://download.openmmlab.com/mmocr/textrecog/crnn/crnn_mini-vgg_5e_mj/crnn_mini-vgg_5e_mj_20220826_224120-8afbedbb.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/crnn/crnn_mini-vgg_5e_mj/20220826_224120.log) |
|
||||
| methods | | Regular Text | | | | Irregular Text | | download |
|
||||
| :--------------------------------------------------------: | :----: | :----------: | :-------: | :-: | :-------: | :------------: | :----: | :---------------------------------------------------------------------------------: |
|
||||
| methods | IIIT5K | SVT | IC13-1015 | | IC15-2077 | SVTP | CT80 | |
|
||||
| [CRNN](/configs/textrecog/crnn/crnn_mini-vgg_5e_mj.py) | 0.8053 | 0.7991 | 0.8739 | | 0.5571 | 0.6093 | 0.5694 | [model](https://download.openmmlab.com/mmocr/textrecog/crnn/crnn_mini-vgg_5e_mj/crnn_mini-vgg_5e_mj_20220826_224120-8afbedbb.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/crnn/crnn_mini-vgg_5e_mj/20220826_224120.log) |
|
||||
| [CRNN-TTA](/configs/textrecog/crnn/crnn_mini-vgg_5e_mj.py) | 0.8013 | 0.7975 | 0.8631 | | 0.5763 | 0.6093 | 0.5764 | [model](https://download.openmmlab.com/mmocr/textrecog/crnn/crnn_mini-vgg_5e_mj/crnn_mini-vgg_5e_mj_20220826_224120-8afbedbb.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/crnn/crnn_mini-vgg_5e_mj/20220826_224120.log) |
|
||||
|
||||
## Citation
|
||||
|
||||
|
|
|
@ -18,12 +18,10 @@ model = dict(
|
|||
data_preprocessor=dict(
|
||||
type='TextRecogDataPreprocessor', mean=[127], std=[127]))
|
||||
|
||||
file_client_args = dict(backend='disk')
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
color_type='grayscale',
|
||||
file_client_args=file_client_args,
|
||||
ignore_empty=True,
|
||||
min_size=2),
|
||||
dict(type='LoadOCRAnnotations', with_text=True),
|
||||
|
@ -34,10 +32,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
color_type='grayscale',
|
||||
file_client_args=file_client_args),
|
||||
dict(type='LoadImageFromFile', color_type='grayscale'),
|
||||
dict(
|
||||
type='RescaleToHeight',
|
||||
height=32,
|
||||
|
@ -51,3 +46,57 @@ test_pipeline = [
|
|||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
|
||||
]
|
||||
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='grayscale'),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=0, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=1, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=3, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
],
|
||||
[
|
||||
dict(
|
||||
type='RescaleToHeight',
|
||||
height=32,
|
||||
min_width=32,
|
||||
max_width=None,
|
||||
width_divisor=16)
|
||||
],
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
[dict(type='LoadOCRAnnotations', with_text=True)],
|
||||
[
|
||||
dict(
|
||||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape',
|
||||
'valid_ratio'))
|
||||
]
|
||||
])
|
||||
]
|
||||
|
|
|
@ -12,7 +12,7 @@ _base_ = [
|
|||
'_base_crnn_mini-vgg.py',
|
||||
]
|
||||
# dataset settings
|
||||
train_list = [_base_.mjsynth_textrecog_test]
|
||||
train_list = [_base_.mjsynth_textrecog_train]
|
||||
test_list = [
|
||||
_base_.cute80_textrecog_test, _base_.iiit5k_textrecog_test,
|
||||
_base_.svt_textrecog_test, _base_.svtp_textrecog_test,
|
||||
|
|
|
@ -33,7 +33,9 @@ val_dataloader = dict(
|
|||
pipeline=_base_.test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
model = dict(decoder=dict(dictionary=dict(with_unknown=True)))
|
||||
_base_.model.decoder.dictionary.update(
|
||||
dict(with_unknown=True, unknown_token=None))
|
||||
_base_.train_cfg.update(dict(max_epochs=200, val_interval=10))
|
||||
|
||||
val_evaluator = dict(dataset_prefixes=['Toy'])
|
||||
test_evaluator = val_evaluator
|
||||
|
|
|
@ -17,6 +17,7 @@ Collections:
|
|||
|
||||
Models:
|
||||
- Name: crnn_mini-vgg_5e_mj
|
||||
Alias: CRNN
|
||||
In Collection: CRNN
|
||||
Config: configs/textrecog/crnn/crnn_mini-vgg_5e_mj.py
|
||||
Metadata:
|
||||
|
|
|
@ -39,12 +39,13 @@ Attention-based scene text recognizers have gained huge success, which leverages
|
|||
| :-------------------------------------------------------------: | :-----------: | :----: | :----------: | :-------: | :-: | :-------: | :------------: | :----: | :---------------------------------------------------------------: |
|
||||
| | | IIIT5K | SVT | IC13-1015 | | IC15-2077 | SVTP | CT80 | |
|
||||
| [MASTER](/configs/textrecog/master/master_resnet31_12e_st_mj_sa.py) | R31-GCAModule | 0.9490 | 0.8887 | 0.9517 | | 0.7650 | 0.8465 | 0.8889 | [model](https://download.openmmlab.com/mmocr/textrecog/master/master_resnet31_12e_st_mj_sa/master_resnet31_12e_st_mj_sa_20220915_152443-f4a5cabc.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/master/master_resnet31_12e_st_mj_sa/20220915_152443.log) |
|
||||
| [MASTER-TTA](/configs/textrecog/master/master_resnet31_12e_st_mj_sa.py) | R31-GCAModule | 0.9450 | 0.8887 | 0.9478 | | 0.7906 | 0.8481 | 0.8958 | |
|
||||
|
||||
## Citation
|
||||
|
||||
```bibtex
|
||||
@article{Lu2021MASTER,
|
||||
title={{MASTER}: Multi-Aspect Non-local Network for Scene Text Recognition},
|
||||
title={MASTER: Multi-Aspect Non-local Network for Scene Text Recognition},
|
||||
author={Ning Lu and Wenwen Yu and Xianbiao Qi and Yihao Chen and Ping Gong and Rong Xiao and Xiang Bai},
|
||||
journal={Pattern Recognition},
|
||||
year={2021}
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
file_client_args = dict(backend='disk')
|
||||
|
||||
dictionary = dict(
|
||||
type='Dictionary',
|
||||
dict_file='{{ fileDirname }}/../../../dicts/english_digits_symbols.txt',
|
||||
|
@ -75,11 +73,7 @@ model = dict(
|
|||
std=[127.5, 127.5, 127.5]))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
ignore_empty=True,
|
||||
min_size=2),
|
||||
dict(type='LoadImageFromFile', ignore_empty=True, min_size=2),
|
||||
dict(type='LoadOCRAnnotations', with_text=True),
|
||||
dict(
|
||||
type='RescaleToHeight',
|
||||
|
@ -94,7 +88,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='RescaleToHeight',
|
||||
height=48,
|
||||
|
@ -109,3 +103,58 @@ test_pipeline = [
|
|||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
|
||||
]
|
||||
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=0, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=1, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=3, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
],
|
||||
[
|
||||
dict(
|
||||
type='RescaleToHeight',
|
||||
height=48,
|
||||
min_width=48,
|
||||
max_width=160,
|
||||
width_divisor=16)
|
||||
],
|
||||
[dict(type='PadToWidth', width=160)],
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
[dict(type='LoadOCRAnnotations', with_text=True)],
|
||||
[
|
||||
dict(
|
||||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape',
|
||||
'valid_ratio'))
|
||||
]
|
||||
])
|
||||
]
|
||||
|
|
|
@ -23,7 +23,7 @@ param_scheduler = [
|
|||
|
||||
# dataset settings
|
||||
train_list = [
|
||||
_base_.mjsynth_textrecog_test, _base_.synthtext_textrecog_train,
|
||||
_base_.mjsynth_textrecog_train, _base_.synthtext_textrecog_train,
|
||||
_base_.synthtext_add_textrecog_train
|
||||
]
|
||||
test_list = [
|
||||
|
|
|
@ -17,6 +17,7 @@ Collections:
|
|||
|
||||
Models:
|
||||
- Name: master_resnet31_12e_st_mj_sa
|
||||
Alias: MASTER
|
||||
In Collection: MASTER
|
||||
Config: configs/textrecog/master/master_resnet31_12e_st_mj_sa.py
|
||||
Metadata:
|
||||
|
|
|
@ -38,8 +38,11 @@ Scene text recognition has attracted a great many researches due to its importan
|
|||
| :---------------------------------------------------------: | :-------------------: | :----: | :----------: | :-------: | :-: | :-------: | :------------: | :----: | :-----------------------------------------------------------: |
|
||||
| | | IIIT5K | SVT | IC13-1015 | | IC15-2077 | SVTP | CT80 | |
|
||||
| [NRTR](/configs/textrecog/nrtr/nrtr_modality-transform_6e_st_mj.py) | NRTRModalityTransform | 0.9147 | 0.8841 | 0.9369 | | 0.7246 | 0.7783 | 0.7500 | [model](https://download.openmmlab.com/mmocr/textrecog/nrtr/nrtr_modality-transform_6e_st_mj/nrtr_modality-transform_6e_st_mj_20220916_103322-bd9425be.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/nrtr/nrtr_modality-transform_6e_st_mj/20220916_103322.log) |
|
||||
| [NRTR-TTA](/configs/textrecog/nrtr/nrtr_modality-transform_6e_st_mj.py) | NRTRModalityTransform | 0.9123 | 0.8825 | 0.9310 | | 0.7492 | 0.7798 | 0.7535 | |
|
||||
| [NRTR](/configs/textrecog/nrtr/nrtr_resnet31-1by8-1by4_6e_st_mj.py) | R31-1/8-1/4 | 0.9483 | 0.8918 | 0.9507 | | 0.7578 | 0.8016 | 0.8889 | [model](https://download.openmmlab.com/mmocr/textrecog/nrtr/nrtr_resnet31-1by8-1by4_6e_st_mj/nrtr_resnet31-1by8-1by4_6e_st_mj_20220916_103322-a6a2a123.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/nrtr/nrtr_resnet31-1by8-1by4_6e_st_mj/20220916_103322.log) |
|
||||
| [NRTR-TTA](/configs/textrecog/nrtr/nrtr_resnet31-1by8-1by4_6e_st_mj.py) | R31-1/8-1/4 | 0.9443 | 0.8903 | 0.9478 | | 0.7790 | 0.8078 | 0.8854 | |
|
||||
| [NRTR](/configs/textrecog/nrtr/nrtr_resnet31-1by16-1by8_6e_st_mj.py) | R31-1/16-1/8 | 0.9470 | 0.8918 | 0.9399 | | 0.7376 | 0.7969 | 0.8854 | [model](https://download.openmmlab.com/mmocr/textrecog/nrtr/nrtr_resnet31-1by16-1by8_6e_st_mj/nrtr_resnet31-1by16-1by8_6e_st_mj_20220920_143358-43767036.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/nrtr/nrtr_resnet31-1by16-1by8_6e_st_mj/20220920_143358.log) |
|
||||
| [NRTR-TTA](/configs/textrecog/nrtr/nrtr_resnet31-1by16-1by8_6e_st_mj.py) | R31-1/16-1/8 | 0.9423 | 0.8903 | 0.9360 | | 0.7641 | 0.8016 | 0.8854 | |
|
||||
|
||||
## Citation
|
||||
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
file_client_args = dict(backend='disk')
|
||||
|
||||
dictionary = dict(
|
||||
type='Dictionary',
|
||||
dict_file='{{ fileDirname }}/../../../dicts/english_digits_symbols.txt',
|
||||
|
@ -26,11 +24,7 @@ model = dict(
|
|||
std=[58.395, 57.12, 57.375]))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
ignore_empty=True,
|
||||
min_size=2),
|
||||
dict(type='LoadImageFromFile', ignore_empty=True, min_size=2),
|
||||
dict(type='LoadOCRAnnotations', with_text=True),
|
||||
dict(
|
||||
type='RescaleToHeight',
|
||||
|
@ -45,7 +39,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='RescaleToHeight',
|
||||
height=32,
|
||||
|
@ -60,3 +54,58 @@ test_pipeline = [
|
|||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
|
||||
]
|
||||
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=0, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=1, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=3, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
],
|
||||
[
|
||||
dict(
|
||||
type='RescaleToHeight',
|
||||
height=32,
|
||||
min_width=32,
|
||||
max_width=160,
|
||||
width_divisor=16)
|
||||
],
|
||||
[dict(type='PadToWidth', width=160)],
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
[dict(type='LoadOCRAnnotations', with_text=True)],
|
||||
[
|
||||
dict(
|
||||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape',
|
||||
'valid_ratio'))
|
||||
]
|
||||
])
|
||||
]
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
file_client_args = dict(backend='disk')
|
||||
|
||||
dictionary = dict(
|
||||
type='Dictionary',
|
||||
dict_file='{{ fileDirname }}/../../../dicts/english_digits_symbols.txt',
|
||||
|
@ -32,11 +30,7 @@ model = dict(
|
|||
std=[58.395, 57.12, 57.375]))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
ignore_empty=True,
|
||||
min_size=2),
|
||||
dict(type='LoadImageFromFile', ignore_empty=True, min_size=2),
|
||||
dict(type='LoadOCRAnnotations', with_text=True),
|
||||
dict(
|
||||
type='RescaleToHeight',
|
||||
|
@ -51,7 +45,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='RescaleToHeight',
|
||||
height=32,
|
||||
|
@ -66,3 +60,58 @@ test_pipeline = [
|
|||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
|
||||
]
|
||||
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=0, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=1, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=3, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
],
|
||||
[
|
||||
dict(
|
||||
type='RescaleToHeight',
|
||||
height=32,
|
||||
min_width=32,
|
||||
max_width=160,
|
||||
width_divisor=16)
|
||||
],
|
||||
[dict(type='PadToWidth', width=160)],
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
[dict(type='LoadOCRAnnotations', with_text=True)],
|
||||
[
|
||||
dict(
|
||||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape',
|
||||
'valid_ratio'))
|
||||
]
|
||||
])
|
||||
]
|
||||
|
|
|
@ -51,6 +51,9 @@ Models:
|
|||
word_acc: 0.7500
|
||||
Weights: https://download.openmmlab.com/mmocr/textrecog/nrtr/nrtr_modality-transform_6e_st_mj/nrtr_modality-transform_6e_st_mj_20220916_103322-bd9425be.pth
|
||||
- Name: nrtr_resnet31-1by8-1by4_6e_st_mj
|
||||
Alias:
|
||||
- NRTR
|
||||
- NRTR_1/8-1/4
|
||||
In Collection: NRTR
|
||||
Config: configs/textrecog/nrtr/nrtr_resnet31-1by8-1by4_6e_st_mj.py
|
||||
Metadata:
|
||||
|
@ -84,6 +87,7 @@ Models:
|
|||
word_acc: 0.8889
|
||||
Weights: https://download.openmmlab.com/mmocr/textrecog/nrtr/nrtr_resnet31-1by8-1by4_6e_st_mj/nrtr_resnet31-1by8-1by4_6e_st_mj_20220916_103322-a6a2a123.pth
|
||||
- Name: nrtr_resnet31-1by16-1by8_6e_st_mj
|
||||
Alias: NRTR_1/16-1/8
|
||||
In Collection: NRTR
|
||||
Config: configs/textrecog/nrtr/nrtr_resnet31-1by16-1by8_6e_st_mj.py
|
||||
Metadata:
|
||||
|
|
|
@ -20,7 +20,7 @@ param_scheduler = [
|
|||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = [_base_.mjsynth_textrecog_test, _base_.synthtext_textrecog_train]
|
||||
train_list = [_base_.mjsynth_textrecog_train, _base_.synthtext_textrecog_train]
|
||||
test_list = [
|
||||
_base_.cute80_textrecog_test, _base_.iiit5k_textrecog_test,
|
||||
_base_.svt_textrecog_test, _base_.svtp_textrecog_test,
|
||||
|
|
|
@ -20,7 +20,7 @@ param_scheduler = [
|
|||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = [_base_.mjsynth_textrecog_test, _base_.synthtext_textrecog_train]
|
||||
train_list = [_base_.mjsynth_textrecog_train, _base_.synthtext_textrecog_train]
|
||||
test_list = [
|
||||
_base_.cute80_textrecog_test, _base_.iiit5k_textrecog_test,
|
||||
_base_.svt_textrecog_test, _base_.svtp_textrecog_test,
|
||||
|
|
|
@ -44,6 +44,7 @@ The attention-based encoder-decoder framework has recently achieved impressive r
|
|||
| :------------------------------------------------------------------: | :--: | :----: | :----------: | :-------: | :-: | :-------: | :------------: | :----: | :-------------------------------------------------------------------: |
|
||||
| | | IIIT5K | SVT | IC13-1015 | | IC15-2077 | SVTP | CT80 | |
|
||||
| [RobustScanner](/configs/textrecog/robust_scanner/robustscanner_resnet31_5e_st-sub_mj-sub_sa_real.py) | 4 | 0.9510 | 0.9011 | 0.9320 | | 0.7578 | 0.8078 | 0.8750 | [model](https://download.openmmlab.com/mmocr/textrecog/robust_scanner/robustscanner_resnet31_5e_st-sub_mj-sub_sa_real/robustscanner_resnet31_5e_st-sub_mj-sub_sa_real_20220915_152447-7fc35929.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/robust_scanner/robustscanner_resnet31_5e_st-sub_mj-sub_sa_real/20220915_152447.log) |
|
||||
| [RobustScanner-TTA](/configs/textrecog/robust_scanner/robustscanner_resnet31_5e_st-sub_mj-sub_sa_real.py) | 4 | 0.9487 | 0.9011 | 0.9261 | | 0.7805 | 0.8124 | 0.8819 | |
|
||||
|
||||
## References
|
||||
|
||||
|
|
|
@ -29,14 +29,8 @@ model = dict(
|
|||
dictionary=dictionary,
|
||||
max_seq_len=30))
|
||||
|
||||
file_client_args = dict(backend='disk')
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
ignore_empty=True,
|
||||
min_size=2),
|
||||
dict(type='LoadImageFromFile', ignore_empty=True, min_size=2),
|
||||
dict(type='LoadOCRAnnotations', with_text=True),
|
||||
dict(
|
||||
type='RescaleToHeight',
|
||||
|
@ -51,7 +45,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='RescaleToHeight',
|
||||
height=48,
|
||||
|
@ -66,3 +60,58 @@ test_pipeline = [
|
|||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
|
||||
]
|
||||
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=0, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=1, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=3, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
],
|
||||
[
|
||||
dict(
|
||||
type='RescaleToHeight',
|
||||
height=48,
|
||||
min_width=48,
|
||||
max_width=160,
|
||||
width_divisor=4),
|
||||
],
|
||||
[dict(type='PadToWidth', width=160)],
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
[dict(type='LoadOCRAnnotations', with_text=True)],
|
||||
[
|
||||
dict(
|
||||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape',
|
||||
'valid_ratio'))
|
||||
]
|
||||
])
|
||||
]
|
||||
|
|
|
@ -18,6 +18,7 @@ Collections:
|
|||
|
||||
Models:
|
||||
- Name: robustscanner_resnet31_5e_st-sub_mj-sub_sa_real
|
||||
Alias: RobustScanner
|
||||
In Collection: RobustScanner
|
||||
Config: configs/textrecog/robust_scanner/robustscanner_resnet31_5e_st-sub_mj-sub_sa_real.py
|
||||
Metadata:
|
||||
|
|
|
@ -44,7 +44,9 @@ Recognizing irregular text in natural scene images is challenging due to the lar
|
|||
| :----------------------------------------------------: | :---------: | :------------------: | :----: | :----------: | :-------: | :-: | :-------: | :------------: | :----: | :------------------------------------------------------: |
|
||||
| | | | IIIT5K | SVT | IC13-1015 | | IC15-2077 | SVTP | CT80 | |
|
||||
| [SAR](/configs/textrecog/sar/sar_r31_parallel_decoder_academic.py) | R31-1/8-1/4 | ParallelSARDecoder | 0.9533 | 0.8964 | 0.9369 | | 0.7602 | 0.8326 | 0.9062 | [model](https://download.openmmlab.com/mmocr/textrecog/sar/sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real/sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real_20220915_171910-04eb4e75.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/sar/sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real/20220915_171910.log) |
|
||||
| [SAR-TTA](/configs/textrecog/sar/sar_r31_parallel_decoder_academic.py) | R31-1/8-1/4 | ParallelSARDecoder | 0.9510 | 0.8964 | 0.9340 | | 0.7862 | 0.8372 | 0.9132 | |
|
||||
| [SAR](/configs/textrecog/sar/sar_r31_sequential_decoder_academic.py) | R31-1/8-1/4 | SequentialSARDecoder | 0.9553 | 0.9073 | 0.9409 | | 0.7761 | 0.8093 | 0.8958 | [model](https://download.openmmlab.com/mmocr/textrecog/sar/sar_resnet31_sequential-decoder_5e_st-sub_mj-sub_sa_real/sar_resnet31_sequential-decoder_5e_st-sub_mj-sub_sa_real_20220915_185451-1fd6b1fc.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/sar/sar_resnet31_sequential-decoder_5e_st-sub_mj-sub_sa_real/20220915_185451.log) |
|
||||
| [SAR-TTA](/configs/textrecog/sar/sar_r31_sequential_decoder_academic.py) | R31-1/8-1/4 | SequentialSARDecoder | 0.9530 | 0.9073 | 0.9389 | | 0.8002 | 0.8124 | 0.9028 | |
|
||||
|
||||
## Citation
|
||||
|
||||
|
|
|
@ -35,13 +35,8 @@ model = dict(
|
|||
dictionary=dictionary,
|
||||
max_seq_len=30))
|
||||
|
||||
file_client_args = dict(backend='disk')
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
ignore_empty=True,
|
||||
min_size=2),
|
||||
dict(type='LoadImageFromFile', ignore_empty=True, min_size=2),
|
||||
dict(type='LoadOCRAnnotations', with_text=True),
|
||||
dict(
|
||||
type='RescaleToHeight',
|
||||
|
@ -56,7 +51,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='RescaleToHeight',
|
||||
height=48,
|
||||
|
@ -71,3 +66,58 @@ test_pipeline = [
|
|||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
|
||||
]
|
||||
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=0, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=1, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=3, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
],
|
||||
[
|
||||
dict(
|
||||
type='RescaleToHeight',
|
||||
height=48,
|
||||
min_width=48,
|
||||
max_width=160,
|
||||
width_divisor=4)
|
||||
],
|
||||
[dict(type='PadToWidth', width=160)],
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
[dict(type='LoadOCRAnnotations', with_text=True)],
|
||||
[
|
||||
dict(
|
||||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape',
|
||||
'valid_ratio'))
|
||||
]
|
||||
])
|
||||
]
|
||||
|
|
|
@ -18,6 +18,7 @@ Collections:
|
|||
|
||||
Models:
|
||||
- Name: sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real
|
||||
Alias: SAR
|
||||
In Collection: SAR
|
||||
Config: configs/textrecog/sar/sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real.py
|
||||
Metadata:
|
||||
|
|
|
@ -38,7 +38,9 @@ Scene text recognition (STR) is the task of recognizing character sequences in n
|
|||
| :--------------------------------------------------------------------: | :----: | :----------: | :-------: | :-: | :-------: | :------------: | :----: | :---------------------------------------------------------------------: |
|
||||
| | IIIT5K | SVT | IC13-1015 | | IC15-2077 | SVTP | CT80 | |
|
||||
| [Satrn](/configs/textrecog/satrn/satrn_shallow_5e_st_mj.py) | 0.9600 | 0.9181 | 0.9606 | | 0.8045 | 0.8837 | 0.8993 | [model](https://download.openmmlab.com/mmocr/textrecog/satrn/satrn_shallow_5e_st_mj/satrn_shallow_5e_st_mj_20220915_152443-5fd04a4c.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/satrn/satrn_shallow_5e_st_mj/20220915_152443.log) |
|
||||
| [Satrn-TTA](/configs/textrecog/satrn/satrn_shallow_5e_st_mj.py) | 0.9530 | 0.9181 | 0.9527 | | 0.8276 | 0.8884 | 0.9028 | |
|
||||
| [Satrn_small](/configs/textrecog/satrn/satrn_shallow-small_5e_st_mj.py) | 0.9423 | 0.9011 | 0.9567 | | 0.7886 | 0.8574 | 0.8472 | [model](https://download.openmmlab.com/mmocr/textrecog/satrn/satrn_shallow-small_5e_st_mj/satrn_shallow-small_5e_st_mj_20220915_152442-5591bf27.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/satrn/satrn_shallow-small_5e_st_mj/20220915_152442.log) |
|
||||
| [Satrn_small-TTA](/configs/textrecog/satrn/satrn_shallow-small_5e_st_mj.py) | 0.9380 | 0.8995 | 0.9488 | | 0.8122 | 0.8620 | 0.8507 | |
|
||||
|
||||
## Citation
|
||||
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
file_client_args = dict(backend='disk')
|
||||
|
||||
dictionary = dict(
|
||||
type='Dictionary',
|
||||
dict_file='{{ fileDirname }}/../../../dicts/english_digits_symbols.txt',
|
||||
|
@ -42,11 +40,7 @@ model = dict(
|
|||
std=[58.395, 57.12, 57.375]))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
ignore_empty=True,
|
||||
min_size=2),
|
||||
dict(type='LoadImageFromFile', ignore_empty=True, min_size=2),
|
||||
dict(type='LoadOCRAnnotations', with_text=True),
|
||||
dict(type='Resize', scale=(100, 32), keep_ratio=False),
|
||||
dict(
|
||||
|
@ -54,9 +48,8 @@ train_pipeline = [
|
|||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
|
||||
]
|
||||
|
||||
# TODO Add Test Time Augmentation `MultiRotateAugOCR`
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', file_client_args=file_client_args),
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(100, 32), keep_ratio=False),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
|
@ -65,3 +58,50 @@ test_pipeline = [
|
|||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
|
||||
]
|
||||
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[
|
||||
[
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=0, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=1, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=3, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"
|
||||
),
|
||||
],
|
||||
[dict(type='Resize', scale=(100, 32), keep_ratio=False)],
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
[dict(type='LoadOCRAnnotations', with_text=True)],
|
||||
[
|
||||
dict(
|
||||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape',
|
||||
'valid_ratio'))
|
||||
]
|
||||
])
|
||||
]
|
||||
|
|
|
@ -18,6 +18,7 @@ Collections:
|
|||
|
||||
Models:
|
||||
- Name: satrn_shallow_5e_st_mj
|
||||
Alias: SATRN
|
||||
In Collection: SATRN
|
||||
Config: configs/textrecog/satrn/satrn_shallow_5e_st_mj.py
|
||||
Metadata:
|
||||
|
@ -52,6 +53,7 @@ Models:
|
|||
Weights: https://download.openmmlab.com/mmocr/textrecog/satrn/satrn_shallow_5e_st_mj/satrn_shallow_5e_st_mj_20220915_152443-5fd04a4c.pth
|
||||
|
||||
- Name: satrn_shallow-small_5e_st_mj
|
||||
Alias: SATRN_sm
|
||||
In Collection: SATRN
|
||||
Config: configs/textrecog/satrn/satrn_shallow-small_5e_st_mj.py
|
||||
Metadata:
|
||||
|
|
|
@ -12,8 +12,10 @@ _base_ = [
|
|||
'_base_satrn_shallow.py',
|
||||
]
|
||||
|
||||
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=20, val_interval=1)
|
||||
|
||||
# dataset settings
|
||||
train_list = [_base_.mjsynth_textrecog_test, _base_.synthtext_textrecog_train]
|
||||
train_list = [_base_.mjsynth_textrecog_train, _base_.synthtext_textrecog_train]
|
||||
test_list = [
|
||||
_base_.cute80_textrecog_test, _base_.iiit5k_textrecog_test,
|
||||
_base_.svt_textrecog_test, _base_.svtp_textrecog_test,
|
||||
|
|
|
@ -0,0 +1,69 @@
|
|||
# SVTR
|
||||
|
||||
> [SVTR: Scene Text Recognition with a Single Visual Model](https://arxiv.org/abs/2205.00159)
|
||||
|
||||
<!-- [ALGORITHM] -->
|
||||
|
||||
## Abstract
|
||||
|
||||
Dominant scene text recognition models commonly contain two building blocks, a visual model for feature extraction and a sequence model for text transcription. This hybrid architecture, although accurate, is complex and less efficient. In this study, we propose a Single Visual model for Scene Text recognition within the patch-wise image tokenization framework, which dispenses with the sequential modeling entirely. The method, termed SVTR, firstly decomposes an image text into small patches named character components. Afterward, hierarchical stages are recurrently carried out by component-level mixing, merging and/or combining. Global and local mixing blocks are devised to perceive the inter-character and intra-character patterns, leading to a multi-grained character component perception. Thus, characters are recognized by a simple linear prediction. Experimental results on both English and Chinese scene text recognition tasks demonstrate the effectiveness of SVTR. SVTR-L (Large) achieves highly competitive accuracy in English and outperforms existing methods by a large margin in Chinese, while running faster. In addition, SVTR-T (Tiny) is an effective and much smaller model, which shows appealing speed at inference.
|
||||
|
||||
<div align=center>
|
||||
<img src="https://user-images.githubusercontent.com/22607038/210541576-025df5d5-f4d2-4037-82e0-246cf8cd3c25.png"/>
|
||||
</div>
|
||||
|
||||
## Dataset
|
||||
|
||||
### Train Dataset
|
||||
|
||||
| trainset | instance_num | repeat_num | source |
|
||||
| :-------: | :----------: | :--------: | :----: |
|
||||
| SynthText | 7266686 | 1 | synth |
|
||||
| Syn90k | 8919273 | 1 | synth |
|
||||
|
||||
### Test Dataset
|
||||
|
||||
| testset | instance_num | type |
|
||||
| :-----: | :----------: | :-------: |
|
||||
| IIIT5K | 3000 | regular |
|
||||
| SVT | 647 | regular |
|
||||
| IC13 | 1015 | regular |
|
||||
| IC15 | 2077 | irregular |
|
||||
| SVTP | 645 | irregular |
|
||||
| CT80 | 288 | irregular |
|
||||
|
||||
## Results and Models
|
||||
|
||||
| Methods | | Regular Text | | | | Irregular Text | | download |
|
||||
| :---------------------------------------------------------------: | :----: | :----------: | :-------: | :-: | :-------: | :------------: | :----: | :--------------------------------------------------------------------------: |
|
||||
| | IIIT5K | SVT | IC13-1015 | | IC15-2077 | SVTP | CT80 | |
|
||||
| [SVTR-tiny](/configs/textrecog/svtr/svtr-tiny_20e_st_mj.py) | - | - | - | | - | - | - | - |
|
||||
| [SVTR-small](/configs/textrecog/svtr/svtr-small_20e_st_mj.py) | 0.8553 | 0.9026 | 0.9448 | | 0.7496 | 0.8496 | 0.8854 | [model](https://download.openmmlab.com/mmocr/textrecog/svtr/svtr-small_20e_st_mj/svtr-small_20e_st_mj-35d800d6.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/svtr/svtr-small_20e_st_mj/20230105_184454.log) |
|
||||
| [SVTR-small-TTA](/configs/textrecog/svtr/svtr-small_20e_st_mj.py) | 0.8397 | 0.8964 | 0.9241 | | 0.7597 | 0.8124 | 0.8646 | |
|
||||
| [SVTR-base](/configs/textrecog/svtr/svtr-base_20e_st_mj.py) | 0.8570 | 0.9181 | 0.9438 | | 0.7448 | 0.8388 | 0.9028 | [model](https://download.openmmlab.com/mmocr/textrecog/svtr/svtr-base_20e_st_mj/svtr-base_20e_st_mj-ea500101.pth) \| [log](https://download.openmmlab.com/mmocr/textrecog/svtr/svtr-base_20e_st_mj/20221227_175415.log) |
|
||||
| [SVTR-base-TTA](/configs/textrecog/svtr/svtr-base_20e_st_mj.py) | 0.8517 | 0.9011 | 0.9379 | | 0.7569 | 0.8279 | 0.8819 | |
|
||||
| [SVTR-large](/configs/textrecog/svtr/svtr-large_20e_st_mj.py) | - | - | - | | - | - | - | - |
|
||||
|
||||
```{note}
|
||||
The implementation and configuration follow the original code and paper, but there is still a gap between the reproduced results and the official ones. We appreciate any suggestions to improve its performance.
|
||||
```
|
||||
|
||||
## Citation
|
||||
|
||||
```bibtex
|
||||
@inproceedings{ijcai2022p124,
|
||||
title = {SVTR: Scene Text Recognition with a Single Visual Model},
|
||||
author = {Du, Yongkun and Chen, Zhineng and Jia, Caiyan and Yin, Xiaoting and Zheng, Tianlun and Li, Chenxia and Du, Yuning and Jiang, Yu-Gang},
|
||||
booktitle = {Proceedings of the Thirty-First International Joint Conference on
|
||||
Artificial Intelligence, {IJCAI-22}},
|
||||
publisher = {International Joint Conferences on Artificial Intelligence Organization},
|
||||
editor = {Lud De Raedt},
|
||||
pages = {884--890},
|
||||
year = {2022},
|
||||
month = {7},
|
||||
note = {Main Track},
|
||||
doi = {10.24963/ijcai.2022/124},
|
||||
url = {https://doi.org/10.24963/ijcai.2022/124},
|
||||
}
|
||||
|
||||
```
|
|
@ -0,0 +1,159 @@
|
|||
dictionary = dict(
|
||||
type='Dictionary',
|
||||
dict_file='{{ fileDirname }}/../../../dicts/lower_english_digits.txt',
|
||||
with_padding=True,
|
||||
with_unknown=True,
|
||||
)
|
||||
|
||||
model = dict(
|
||||
type='SVTR',
|
||||
preprocessor=dict(
|
||||
type='STN',
|
||||
in_channels=3,
|
||||
resized_image_size=(32, 64),
|
||||
output_image_size=(32, 100),
|
||||
num_control_points=20,
|
||||
margins=[0.05, 0.05]),
|
||||
encoder=dict(
|
||||
type='SVTREncoder',
|
||||
img_size=[32, 100],
|
||||
in_channels=3,
|
||||
out_channels=192,
|
||||
embed_dims=[64, 128, 256],
|
||||
depth=[3, 6, 3],
|
||||
num_heads=[2, 4, 8],
|
||||
mixer_types=['Local'] * 6 + ['Global'] * 6,
|
||||
window_size=[[7, 11], [7, 11], [7, 11]],
|
||||
merging_types='Conv',
|
||||
prenorm=False,
|
||||
max_seq_len=25),
|
||||
decoder=dict(
|
||||
type='SVTRDecoder',
|
||||
in_channels=192,
|
||||
module_loss=dict(
|
||||
type='CTCModuleLoss', letter_case='lower', zero_infinity=True),
|
||||
postprocessor=dict(type='CTCPostProcessor'),
|
||||
dictionary=dictionary),
|
||||
data_preprocessor=dict(
|
||||
type='TextRecogDataPreprocessor', mean=[127.5], std=[127.5]))
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', ignore_empty=True, min_size=5),
|
||||
dict(type='LoadOCRAnnotations', with_text=True),
|
||||
dict(
|
||||
type='RandomApply',
|
||||
prob=0.4,
|
||||
transforms=[
|
||||
dict(type='TextRecogGeneralAug', ),
|
||||
],
|
||||
),
|
||||
dict(
|
||||
type='RandomApply',
|
||||
prob=0.4,
|
||||
transforms=[
|
||||
dict(type='CropHeight', ),
|
||||
],
|
||||
),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
condition='min(results["img_shape"])>10',
|
||||
true_transforms=dict(
|
||||
type='RandomApply',
|
||||
prob=0.4,
|
||||
transforms=[
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='GaussianBlur',
|
||||
kernel_size=5,
|
||||
sigma=1,
|
||||
),
|
||||
],
|
||||
)),
|
||||
dict(
|
||||
type='RandomApply',
|
||||
prob=0.4,
|
||||
transforms=[
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=0.5,
|
||||
saturation=0.5,
|
||||
contrast=0.5,
|
||||
hue=0.1),
|
||||
]),
|
||||
dict(
|
||||
type='RandomApply',
|
||||
prob=0.4,
|
||||
transforms=[
|
||||
dict(type='ImageContentJitter', ),
|
||||
],
|
||||
),
|
||||
dict(
|
||||
type='RandomApply',
|
||||
prob=0.4,
|
||||
transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='AdditiveGaussianNoise', scale=0.1**0.5)]),
|
||||
],
|
||||
),
|
||||
dict(
|
||||
type='RandomApply',
|
||||
prob=0.4,
|
||||
transforms=[
|
||||
dict(type='ReversePixels', ),
|
||||
],
|
||||
),
|
||||
dict(type='Resize', scale=(256, 64)),
|
||||
dict(
|
||||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='Resize', scale=(256, 64)),
|
||||
dict(type='LoadOCRAnnotations', with_text=True),
|
||||
dict(
|
||||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'valid_ratio'))
|
||||
]
|
||||
|
||||
tta_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='TestTimeAug',
|
||||
transforms=[[
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=0, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=1, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"),
|
||||
dict(
|
||||
type='ConditionApply',
|
||||
true_transforms=[
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[dict(cls='Rot90', k=3, keep_size=False)])
|
||||
],
|
||||
condition="results['img_shape'][1]<results['img_shape'][0]"),
|
||||
], [dict(type='Resize', scale=(256, 64))],
|
||||
[dict(type='LoadOCRAnnotations', with_text=True)],
|
||||
[
|
||||
dict(
|
||||
type='PackTextRecogInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape',
|
||||
'valid_ratio'))
|
||||
]])
|
||||
]
|
|
@ -0,0 +1,89 @@
|
|||
Collections:
|
||||
- Name: SVTR
|
||||
Metadata:
|
||||
Training Data: OCRDataset
|
||||
Training Techniques:
|
||||
- AdamW
|
||||
Training Resources: 4x Tesla A100
|
||||
Epochs: 20
|
||||
Batch Size: 2048
|
||||
Architecture:
|
||||
- STN
|
||||
- SVTREncoder
|
||||
- SVTRDecoder
|
||||
Paper:
|
||||
URL: https://arxiv.org/pdf/2205.00159.pdf
|
||||
Title: 'SVTR: Scene Text Recognition with a Single Visual Model'
|
||||
README: configs/textrecog/svtr/README.md
|
||||
|
||||
Models:
|
||||
- Name: svtr-small_20e_st_mj
|
||||
Alias: svtr-small
|
||||
In Collection: SVTR
|
||||
Config: configs/textrecog/svtr/svtr-small_20e_st_mj.py
|
||||
Metadata:
|
||||
Training Data:
|
||||
- SynthText
|
||||
- Syn90k
|
||||
Results:
|
||||
- Task: Text Recognition
|
||||
Dataset: IIIT5K
|
||||
Metrics:
|
||||
word_acc: 0.8553
|
||||
- Task: Text Recognition
|
||||
Dataset: SVT
|
||||
Metrics:
|
||||
word_acc: 0.9026
|
||||
- Task: Text Recognition
|
||||
Dataset: ICDAR2013
|
||||
Metrics:
|
||||
word_acc: 0.9448
|
||||
- Task: Text Recognition
|
||||
Dataset: ICDAR2015
|
||||
Metrics:
|
||||
word_acc: 0.7496
|
||||
- Task: Text Recognition
|
||||
Dataset: SVTP
|
||||
Metrics:
|
||||
word_acc: 0.8496
|
||||
- Task: Text Recognition
|
||||
Dataset: CT80
|
||||
Metrics:
|
||||
word_acc: 0.8854
|
||||
Weights: https://download.openmmlab.com/mmocr/textrecog/svtr/svtr-small_20e_st_mj/svtr-small_20e_st_mj-35d800d6.pth
|
||||
|
||||
- Name: svtr-base_20e_st_mj
|
||||
Alias: svtr-base
|
||||
Batch Size: 1024
|
||||
In Collection: SVTR
|
||||
Config: configs/textrecog/svtr/svtr-base_20e_st_mj.py
|
||||
Metadata:
|
||||
Training Data:
|
||||
- SynthText
|
||||
- Syn90k
|
||||
Results:
|
||||
- Task: Text Recognition
|
||||
Dataset: IIIT5K
|
||||
Metrics:
|
||||
word_acc: 0.8570
|
||||
- Task: Text Recognition
|
||||
Dataset: SVT
|
||||
Metrics:
|
||||
word_acc: 0.9181
|
||||
- Task: Text Recognition
|
||||
Dataset: ICDAR2013
|
||||
Metrics:
|
||||
word_acc: 0.9438
|
||||
- Task: Text Recognition
|
||||
Dataset: ICDAR2015
|
||||
Metrics:
|
||||
word_acc: 0.7448
|
||||
- Task: Text Recognition
|
||||
Dataset: SVTP
|
||||
Metrics:
|
||||
word_acc: 0.8388
|
||||
- Task: Text Recognition
|
||||
Dataset: CT80
|
||||
Metrics:
|
||||
word_acc: 0.9028
|
||||
Weights: https://download.openmmlab.com/mmocr/textrecog/svtr/svtr-base_20e_st_mj/svtr-base_20e_st_mj-ea500101.pth
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue