mirror of https://github.com/open-mmlab/mmocr.git
Compare commits
195 Commits
Author | SHA1 | Date |
---|---|---|
|
966296f26a | |
|
2caab0a4e7 | |
|
b18a09b2f0 | |
|
9551af6e5a | |
|
1dcd6fa695 | |
|
6b3f6f5285 | |
|
0cd2878b04 | |
|
bbe8964f00 | |
|
a344280bcb | |
|
4eb3cc7de5 | |
|
e9a31ddd70 | |
|
1e696887b9 | |
|
231cff5da2 | |
|
8afc79f370 | |
|
9e713c63fe | |
|
d7c59f3325 | |
|
a7e326f829 | |
|
97efb04c50 | |
|
e0a78c021b | |
|
16de16f8f8 | |
|
e6174b29fe | |
|
4842599191 | |
|
1c91a9820a | |
|
afe58a4a77 | |
|
67f25c6fb3 | |
|
6342ff262c | |
|
4b887676a3 | |
|
bb591d2b1b | |
|
59d89e10c7 | |
|
73df26d749 | |
|
f47cff5199 | |
|
c886936117 | |
|
22f40b79ed | |
|
1a379f2f1b | |
|
d0dc90253a | |
|
6d9582b6c7 | |
|
e0707bf5f2 | |
|
ae252626d3 | |
|
d80df99037 | |
|
506f7d296e | |
|
9caacc76ee | |
|
63a6ed4e6c | |
|
c6580a48c1 | |
|
7ef34c4407 | |
|
47f54304f5 | |
|
465316f193 | |
|
590af4b5e8 | |
|
a58c77df80 | |
|
e9b23c56ad | |
|
75c06d34bb | |
|
bfb36d81b3 | |
|
45a8d89fb9 | |
|
d56155c82d | |
|
33cbc9b92f | |
|
cc78866ed7 | |
|
f250ea2379 | |
|
5685bb0f38 | |
|
5670695338 | |
|
81fd74c266 | |
|
47f7fc06ed | |
|
82f81ff67c | |
|
3aa9572a64 | |
|
62d440fe8e | |
|
0894178343 | |
|
7cfd412ce7 | |
|
280a89c18e | |
|
6eaa0673f7 | |
|
9b0f1da1e7 | |
|
37c5d371c7 | |
|
e9bf689f74 | |
|
1127240108 | |
|
df0be646ea | |
|
f820470415 | |
|
7cea6a6419 | |
|
3240bace4a | |
|
b21d2b964a | |
|
332089ca11 | |
|
b3be8cfbb3 | |
|
d25e061b03 | |
|
20a87d476c | |
|
d8e615921d | |
|
2a2cab3c8c | |
|
c870046a4a | |
|
edf085c010 | |
|
c3aef21eea | |
|
03a23ca4db | |
|
3b0a41518d | |
|
ad470e323a | |
|
2d743cfa19 | |
|
2b5cdbdbfc | |
|
a82fc66812 | |
|
bed778fc3f | |
|
689ecf0f5f | |
|
bf41194965 | |
|
dff97edaad | |
|
50f55c2976 | |
|
b3f21dd95d | |
|
7f4a1eecdc | |
|
6992923768 | |
|
b64565c10f | |
|
39f99ac720 | |
|
27b6a68586 | |
|
37dca0600a | |
|
0aa5d7be6d | |
|
b0557c2c55 | |
|
d679691a02 | |
|
acae8da223 | |
|
4d5ed98177 | |
|
5dbacfe202 | |
|
65e746eb3d | |
|
7e9f7756bc | |
|
53e72e4440 | |
|
1413b5043a | |
|
b79382cd6b | |
|
e3fd570687 | |
|
9baf440d7a | |
|
89606a1cf1 | |
|
e1aa1f6f42 | |
|
101f2b6eef | |
|
d2a6845c64 | |
|
0ec1524f54 | |
|
e81bb13696 | |
|
24bfb18768 | |
|
fb78c942d6 | |
|
4396e8f5d8 | |
|
c38618bf51 | |
|
f6da8715b9 | |
|
b11c58897c | |
|
302efb9db3 | |
|
419f98d8a4 | |
|
0bd62d67c8 | |
|
e096df8b57 | |
|
547ed31eda | |
|
5cfe481f7f | |
|
ffe5237aa8 | |
|
58ea06d986 | |
|
38d2fc3438 | |
|
5ded52230a | |
|
ebdf1cf90d | |
|
f4940de2a4 | |
|
79a4b2042c | |
|
e095107518 | |
|
d9ea92191e | |
|
3a0aa05d9c | |
|
9ac9a227ec | |
|
5940d6bc9c | |
|
fa4fd1fd42 | |
|
08cab32832 | |
|
b9152a2239 | |
|
782bcc446d | |
|
a12c215e85 | |
|
b8c445b04f | |
|
d9356252af | |
|
c957ded662 | |
|
2b6d258ae1 | |
|
c32ce6baa3 | |
|
31a353a892 | |
|
f6472eab2a | |
|
24aaec2675 | |
|
26e7ea6e77 | |
|
cfce57ad87 | |
|
37f3b88a05 | |
|
29107ef81d | |
|
3433c8cba4 | |
|
e067ddea23 | |
|
d8c0df4827 | |
|
b8e395ed71 | |
|
b1a3b94508 | |
|
06a20fae71 | |
|
5fbb22cd4e | |
|
9785dc616c | |
|
00254f0390 | |
|
cad55f6178 | |
|
e28fc326ae | |
|
6b2077ef19 | |
|
1d5f43e79f | |
|
d514784878 | |
|
34e97abcb0 | |
|
62ff782b71 | |
|
99c86a74b8 | |
|
79a778689d | |
|
baa2b4f863 | |
|
31c41d82c9 | |
|
8737675445 | |
|
b65b65e8f8 | |
|
0afbb70b5d | |
|
abf5a8972c | |
|
cf454ca76c | |
|
d92444097d | |
|
f1dd437d8d | |
|
1c06edc68f | |
|
8864fa174b | |
|
a09437adaa | |
|
9040263b04 | |
|
52a7873973 |
|
@ -80,7 +80,7 @@ jobs:
|
|||
type: string
|
||||
cuda:
|
||||
type: enum
|
||||
enum: ["10.1", "10.2", "11.1"]
|
||||
enum: ["10.1", "10.2", "11.1", "11.7"]
|
||||
cudnn:
|
||||
type: integer
|
||||
default: 7
|
||||
|
@ -129,6 +129,7 @@ workflows:
|
|||
ignore:
|
||||
- dev-1.x
|
||||
- 1.x
|
||||
- main
|
||||
pr_stage_test:
|
||||
when:
|
||||
not:
|
||||
|
@ -141,17 +142,18 @@ workflows:
|
|||
ignore:
|
||||
- dev-1.x
|
||||
- test-1.x
|
||||
- main
|
||||
- build_cpu:
|
||||
name: minimum_version_cpu
|
||||
torch: 1.6.0
|
||||
torchvision: 0.7.0
|
||||
python: 3.6.9 # The lowest python 3.6.x version available on CircleCI images
|
||||
python: "3.7"
|
||||
requires:
|
||||
- lint
|
||||
- build_cpu:
|
||||
name: maximum_version_cpu
|
||||
torch: 1.12.1
|
||||
torchvision: 0.13.1
|
||||
torch: 2.0.0
|
||||
torchvision: 0.15.1
|
||||
python: 3.9.0
|
||||
requires:
|
||||
- minimum_version_cpu
|
||||
|
@ -167,6 +169,15 @@ workflows:
|
|||
cuda: "10.2"
|
||||
requires:
|
||||
- hold
|
||||
- build_cuda:
|
||||
name: mainstream_version_gpu
|
||||
torch: 2.0.0
|
||||
# Use double quotation mark to explicitly specify its type
|
||||
# as string instead of number
|
||||
cuda: "11.7"
|
||||
cudnn: 8
|
||||
requires:
|
||||
- hold
|
||||
merge_stage_test:
|
||||
when:
|
||||
not:
|
||||
|
@ -182,3 +193,4 @@ workflows:
|
|||
branches:
|
||||
only:
|
||||
- dev-1.x
|
||||
- main
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
textdet/dbnet/dbnet_resnet18_fpnc_1200e_icdar2015.py
|
||||
textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py
|
||||
textdet/drrg/drrg_resnet50_fpn-unet_1200e_ctw1500.py
|
||||
textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015.py
|
||||
textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015.py
|
||||
textdet/panet/panet_resnet18_fpem-ffm_600e_icdar2015.py
|
||||
textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015.py
|
||||
textdet/textsnake/textsnake_resnet50_fpn-unet_1200e_ctw1500.py
|
||||
textrecog/abinet/abinet-vision_20e_st-an_mj.py
|
||||
textrecog/crnn/crnn_mini-vgg_5e_mj.py
|
||||
textrecog/master/master_resnet31_12e_st_mj_sa.py
|
||||
textrecog/nrtr/nrtr_resnet31-1by16-1by8_6e_st_mj.py
|
||||
textrecog/robust_scanner/robustscanner_resnet31_5e_st-sub_mj-sub_sa_real.py
|
||||
textrecog/sar/sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real.py
|
||||
textrecog/satrn/satrn_shallow-small_5e_st_mj.py
|
||||
textrecog/satrn/satrn_shallow-small_5e_st_mj.py
|
||||
textrecog/aster/aster_resnet45_6e_st_mj.py
|
||||
textrecog/svtr/svtr-small_20e_st_mj.py
|
|
@ -0,0 +1,7 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
third_part_libs = [
|
||||
'pip install -r ../requirements/albu.txt',
|
||||
]
|
||||
|
||||
default_floating_range = 0.5
|
|
@ -0,0 +1,9 @@
|
|||
textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py
|
||||
textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015.py
|
||||
textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015.py
|
||||
textrecog/abinet/abinet-vision_20e_st-an_mj.py
|
||||
textrecog/crnn/crnn_mini-vgg_5e_mj.py
|
||||
textrecog/aster/aster_resnet45_6e_st_mj.py
|
||||
textrecog/nrtr/nrtr_resnet31-1by16-1by8_6e_st_mj.py
|
||||
textrecog/sar/sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real.py
|
||||
textrecog/svtr/svtr-small_20e_st_mj.py
|
|
@ -13,3 +13,6 @@ mmocr/models/textdet/detectors/mmdet_wrapper.py
|
|||
|
||||
# It will be removed after KieVisualizer and TextSpotterVisualizer
|
||||
mmocr/visualization/visualize.py
|
||||
|
||||
# Add tests for data preparers later
|
||||
mmocr/datasets/preparers
|
||||
|
|
|
@ -0,0 +1,121 @@
|
|||
name: "🐞 Bug report"
|
||||
description: "Create a report to help us reproduce and fix the bug"
|
||||
labels: kind/bug
|
||||
title: "[Bug] "
|
||||
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Note
|
||||
For general usage questions or idea discussions, please post it to our [**Forum**](https://github.com/open-mmlab/mmocr/discussions)
|
||||
If this issue is about installing MMCV, please file an issue at [MMCV](https://github.com/open-mmlab/mmcv/issues/new/choose).
|
||||
If it's anything about model deployment, please raise it to [MMDeploy](https://github.com/open-mmlab/mmdeploy)
|
||||
|
||||
Please fill in as **much** of the following form as you're able to. **The clearer the description, the shorter it will take to solve it.**
|
||||
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Prerequisite
|
||||
description: Please check the following items before creating a new issue.
|
||||
options:
|
||||
- label: I have searched [Issues](https://github.com/open-mmlab/mmocr/issues) and [Discussions](https://github.com/open-mmlab/mmocr/discussions) but cannot get the expected help.
|
||||
required: true
|
||||
# - label: I have read the [FAQ documentation](https://mmocr.readthedocs.io/en/1.x/notes/4_faq.html) but cannot get the expected help.
|
||||
# required: true
|
||||
- label: The bug has not been fixed in the [latest version (0.x)](https://github.com/open-mmlab/mmocr) or [latest version (1.x)](https://github.com/open-mmlab/mmocr/tree/dev-1.x).
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: task
|
||||
attributes:
|
||||
label: Task
|
||||
description: The problem arises when
|
||||
options:
|
||||
- I'm using the official example scripts/configs for the officially supported tasks/models/datasets.
|
||||
- I have modified the scripts/configs, or I'm working on my own tasks/models/datasets.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: branch
|
||||
attributes:
|
||||
label: Branch
|
||||
description: The problem arises when I'm working on
|
||||
options:
|
||||
- main branch https://github.com/open-mmlab/mmocr
|
||||
- 1.x branch https://github.com/open-mmlab/mmocr/tree/dev-1.x
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Environment
|
||||
description: |
|
||||
Please run `python mmocr/utils/collect_env.py` to collect necessary environment information and copy-paste it here.
|
||||
You may add additional information that may be helpful for locating the problem, such as
|
||||
- How you installed PyTorch \[e.g., pip, conda, source\]
|
||||
- Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Reproduces the problem - code sample
|
||||
description: |
|
||||
Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
|
||||
placeholder: |
|
||||
```python
|
||||
# Sample code to reproduce the problem
|
||||
```
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Reproduces the problem - command or script
|
||||
description: |
|
||||
What command or script did you run?
|
||||
placeholder: |
|
||||
```shell
|
||||
The command or script you run.
|
||||
```
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Reproduces the problem - error message
|
||||
description: |
|
||||
Please provide the error message or logs you got, with the full traceback.
|
||||
|
||||
Tip: You can attach images or log files by dragging them into the text area..
|
||||
placeholder: |
|
||||
```
|
||||
The error message or logs you got, with the full traceback.
|
||||
```
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Additional information
|
||||
description: |
|
||||
Tell us anything else you think we should know.
|
||||
|
||||
Tip: You can attach images or log files by dragging them into the text area.
|
||||
placeholder: |
|
||||
1. What's your expected result?
|
||||
2. What dataset did you use?
|
||||
3. What do you think might be the reason?
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Acknowledgement
|
||||
Thanks for taking the time to fill out this report.
|
||||
|
||||
If you have already identified the reason, we strongly appreciate you creating a new PR to fix it [**Here**](https://github.com/open-mmlab/mmocr/pulls)!
|
||||
Please refer to [**Contribution Guide**](https://mmocr.readthedocs.io/en/dev-1.x/notes/contribution_guide.html) for contributing.
|
||||
|
||||
Welcome to join our [**Community**](https://mmocr.readthedocs.io/en/latest/contact.html) to discuss together. 👬
|
|
@ -0,0 +1,39 @@
|
|||
name: 🚀 Feature request
|
||||
description: Suggest an idea for this project
|
||||
labels: [feature-request]
|
||||
title: "[Feature] "
|
||||
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Note
|
||||
For general usage questions or idea discussions, please post it to our [**Forum**](https://github.com/open-mmlab/mmocr/discussions)
|
||||
|
||||
Please fill in as **much** of the following form as you're able to. **The clearer the description, the shorter it will take to solve it.**
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: What is the feature?
|
||||
description: Tell us more about the feature and how this feature can help.
|
||||
placeholder: |
|
||||
E.g., It is inconvenient when \[....\].
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Any other context?
|
||||
description: |
|
||||
Have you considered any alternative solutions or features? If so, what are they? Also, feel free to add any other context or screenshots about the feature request here.
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Acknowledgement
|
||||
Thanks for taking the time to fill out this report.
|
||||
|
||||
We strongly appreciate you creating a new PR to implement it [**Here**](https://github.com/open-mmlab/mmocr/pulls)!
|
||||
Please refer to [**Contribution Guide**](https://mmocr.readthedocs.io/en/dev-1.x/notes/contribution_guide.html) for contributing.
|
||||
|
||||
Welcome to join our [**Community**](https://mmocr.readthedocs.io/en/latest/contact.html) to discuss together. 👬
|
|
@ -0,0 +1,51 @@
|
|||
name: "\U0001F31F New model/dataset/scheduler addition"
|
||||
description: Submit a proposal/request to implement a new model / dataset / scheduler
|
||||
labels: [ "feature-request" ]
|
||||
title: "[New Models] "
|
||||
|
||||
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Note
|
||||
For general usage questions or idea discussions, please post it to our [**Forum**](https://github.com/open-mmlab/mmocr/discussions)
|
||||
|
||||
Please fill in as **much** of the following form as you're able to. **The clearer the description, the shorter it will take to solve it.**
|
||||
|
||||
- type: textarea
|
||||
id: description-request
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Model/Dataset/Scheduler description
|
||||
description: |
|
||||
Put any and all important information relative to the model/dataset/scheduler
|
||||
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Open source status
|
||||
description: |
|
||||
Please provide the open-source status, which would be very helpful
|
||||
options:
|
||||
- label: "The model implementation is available"
|
||||
- label: "The model weights are available."
|
||||
|
||||
- type: textarea
|
||||
id: additional-info
|
||||
attributes:
|
||||
label: Provide useful links for the implementation
|
||||
description: |
|
||||
Please provide information regarding the implementation, the weights, and the authors.
|
||||
Please mention the authors by @gh-username if you're aware of their usernames.
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Acknowledgement
|
||||
Thanks for taking the time to fill out this report.
|
||||
|
||||
We strongly appreciate you creating a new PR to implement it [**Here**](https://github.com/open-mmlab/mmocr/pulls)!
|
||||
Please refer to [**Contribution Guide**](https://mmocr.readthedocs.io/en/dev-1.x/notes/contribution_guide.html) for contributing.
|
||||
|
||||
Welcome to join our [**Community**](https://mmocr.readthedocs.io/en/latest/contact.html) to discuss together. 👬
|
|
@ -0,0 +1,48 @@
|
|||
name: 📚 Documentation
|
||||
description: Report an issue related to the documentation.
|
||||
labels: "docs"
|
||||
title: "[Docs] "
|
||||
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Note
|
||||
For general usage questions or idea discussions, please post it to our [**Forum**](https://github.com/open-mmlab/mmocr/discussions)
|
||||
Please fill in as **much** of the following form as you're able to. **The clearer the description, the shorter it will take to solve it.**
|
||||
|
||||
- type: dropdown
|
||||
id: branch
|
||||
attributes:
|
||||
label: Branch
|
||||
description: This issue is related to the
|
||||
options:
|
||||
- master branch https://mmocr.readthedocs.io/en/latest/
|
||||
- 1.x branch https://mmocr.readthedocs.io/en/dev-1.x/
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: 📚 The doc issue
|
||||
description: >
|
||||
A clear and concise description the issue.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Suggest a potential alternative/fix
|
||||
description: >
|
||||
Tell us how we could improve the documentation in this regard.
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Acknowledgement
|
||||
Thanks for taking the time to fill out this report.
|
||||
|
||||
If you have already identified the reason, we strongly appreciate you creating a new PR to fix it [**here**](https://github.com/open-mmlab/mmocr/pulls)!
|
||||
Please refer to [**Contribution Guide**](https://mmocr.readthedocs.io/en/dev-1.x/notes/contribution_guide.html) for contributing.
|
||||
|
||||
Welcome to join our [**Community**](https://mmocr.readthedocs.io/en/latest/contact.html) to discuss together. 👬
|
|
@ -1,6 +1,12 @@
|
|||
blank_issues_enabled: false
|
||||
|
||||
contact_links:
|
||||
- name: MMOCR Documentation
|
||||
url: https://mmocr.readthedocs.io/en/latest/
|
||||
about: Check if your question is answered in docs
|
||||
- name: ❔ FAQ
|
||||
url: https://mmocr.readthedocs.io/en/dev-1.x/get_started/faq.html
|
||||
about: Is your question frequently asked?
|
||||
- name: 💬 Forum
|
||||
url: https://github.com/open-mmlab/mmocr/discussions
|
||||
about: Ask general usage questions and discuss with other MMOCR community members
|
||||
- name: 🌐 Explore OpenMMLab
|
||||
url: https://openmmlab.com/
|
||||
about: Get know more about OpenMMLab
|
||||
|
|
|
@ -1,45 +0,0 @@
|
|||
---
|
||||
name: Error report
|
||||
about: Create a report to help us improve
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
---
|
||||
|
||||
Thanks for your error report and we appreciate it a lot.
|
||||
|
||||
**Checklist**
|
||||
|
||||
1. I have searched related issues but cannot get the expected help.
|
||||
2. The bug has not been fixed in the latest version.
|
||||
|
||||
**Describe the bug**
|
||||
A clear and concise description of what the bug is.
|
||||
|
||||
**Reproduction**
|
||||
|
||||
1. What command or script did you run?
|
||||
|
||||
```none
|
||||
A placeholder for the command.
|
||||
```
|
||||
|
||||
2. Did you make any modifications on the code or config? Did you understand what you have modified?
|
||||
3. What dataset did you use?
|
||||
|
||||
**Environment**
|
||||
|
||||
1. Please run `python mmocr/utils/collect_env.py` to collect necessary environment information and paste it here.
|
||||
2. You may add addition that may be helpful for locating the problem, such as
|
||||
- How you installed PyTorch \[e.g., pip, conda, source\]
|
||||
- Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
|
||||
|
||||
**Error traceback**
|
||||
If applicable, paste the error traceback here.
|
||||
|
||||
```none
|
||||
A placeholder for traceback.
|
||||
```
|
||||
|
||||
**Bug fix**
|
||||
If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!
|
|
@ -1,21 +0,0 @@
|
|||
---
|
||||
name: Feature request
|
||||
about: Suggest an idea for this project
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
---
|
||||
|
||||
**Describe the feature**
|
||||
|
||||
**Motivation**
|
||||
A clear and concise description of the motivation of the feature.
|
||||
Ex1. It is inconvenient when \[....\].
|
||||
Ex2. There is a recent paper \[....\], which is very helpful for \[....\].
|
||||
|
||||
**Related resources**
|
||||
If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful.
|
||||
|
||||
**Additional context**
|
||||
Add any other context or screenshots about the feature request here.
|
||||
If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated.
|
|
@ -1,7 +0,0 @@
|
|||
---
|
||||
name: General questions
|
||||
about: Ask general questions to get help
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
---
|
|
@ -1,67 +0,0 @@
|
|||
---
|
||||
name: Reimplementation Questions
|
||||
about: Ask about questions during model reimplementation
|
||||
title: ''
|
||||
labels: reimplementation
|
||||
assignees: ''
|
||||
---
|
||||
|
||||
**Notice**
|
||||
|
||||
There are several common situations in the reimplementation issues as below
|
||||
|
||||
1. Reimplement a model in the model zoo using the provided configs
|
||||
2. Reimplement a model in the model zoo on other dataset (e.g., custom datasets)
|
||||
3. Reimplement a custom model but all the components are implemented in MMOCR
|
||||
4. Reimplement a custom model with new modules implemented by yourself
|
||||
|
||||
There are several things to do for different cases as below.
|
||||
|
||||
- For case 1 & 3, please follow the steps in the following sections thus we could help to quick identify the issue.
|
||||
- For case 2 & 4, please understand that we are not able to do much help here because we usually do not know the full code and the users should be responsible to the code they write.
|
||||
- One suggestion for case 2 & 4 is that the users should first check whether the bug lies in the self-implemented code or the original code. For example, users can first make sure that the same model runs well on supported datasets. If you still need help, please describe what you have done and what you obtain in the issue, and follow the steps in the following sections and try as clear as possible so that we can better help you.
|
||||
|
||||
**Checklist**
|
||||
|
||||
1. I have searched related issues but cannot get the expected help.
|
||||
2. The issue has not been fixed in the latest version.
|
||||
|
||||
**Describe the issue**
|
||||
|
||||
A clear and concise description of what the problem you meet and what have you done.
|
||||
|
||||
**Reproduction**
|
||||
|
||||
1. What command or script did you run?
|
||||
|
||||
```none
|
||||
A placeholder for the command.
|
||||
```
|
||||
|
||||
2. What config dir you run?
|
||||
|
||||
```none
|
||||
A placeholder for the config.
|
||||
```
|
||||
|
||||
3. Did you make any modifications on the code or config? Did you understand what you have modified?
|
||||
4. What dataset did you use?
|
||||
|
||||
**Environment**
|
||||
|
||||
1. Please run `python mmocr/utils/collect_env.py` to collect necessary environment information and paste it here.
|
||||
2. You may add addition that may be helpful for locating the problem, such as
|
||||
1. How you installed PyTorch \[e.g., pip, conda, source\]
|
||||
2. Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
|
||||
|
||||
**Results**
|
||||
|
||||
If applicable, paste the related results here, e.g., what you expect and what you get.
|
||||
|
||||
```none
|
||||
A placeholder for results comparison
|
||||
```
|
||||
|
||||
**Issue fix**
|
||||
|
||||
If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!
|
|
@ -9,6 +9,7 @@ on:
|
|||
- 'demo/**'
|
||||
- '.dev_scripts/**'
|
||||
- '.circleci/**'
|
||||
- 'projects/**'
|
||||
branches:
|
||||
- dev-1.x
|
||||
|
||||
|
@ -18,24 +19,24 @@ concurrency:
|
|||
|
||||
jobs:
|
||||
build_cpu_py:
|
||||
runs-on: ubuntu-18.04
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.6, 3.8, 3.9]
|
||||
python-version: [3.8, 3.9]
|
||||
torch: [1.8.1]
|
||||
include:
|
||||
- torch: 1.8.1
|
||||
torchvision: 0.9.1
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||
- name: Install MMEngine
|
||||
run: pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
- name: Install MMCV
|
||||
|
@ -55,11 +56,11 @@ jobs:
|
|||
coverage report -m
|
||||
|
||||
build_cpu_pt:
|
||||
runs-on: ubuntu-18.04
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
torch: [1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.1, 1.11.0, 1.12.1]
|
||||
torch: [1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.1, 1.11.0, 1.12.1, 1.13.0]
|
||||
include:
|
||||
- torch: 1.6.0
|
||||
torchvision: 0.7.0
|
||||
|
@ -75,16 +76,21 @@ jobs:
|
|||
torchvision: 0.12.0
|
||||
- torch: 1.12.1
|
||||
torchvision: 0.13.1
|
||||
- torch: 1.13.0
|
||||
torchvision: 0.14.0
|
||||
- torch: 2.0.0
|
||||
torchvision: 0.15.1
|
||||
python-version: 3.8
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||
- name: Install MMEngine
|
||||
run: pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
- name: Install MMCV
|
||||
|
@ -113,53 +119,20 @@ jobs:
|
|||
name: codecov-umbrella
|
||||
fail_ci_if_error: false
|
||||
|
||||
build_cu102:
|
||||
runs-on: ubuntu-18.04
|
||||
container:
|
||||
image: pytorch/pytorch:1.8.1-cuda10.2-cudnn7-devel
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
include:
|
||||
- torch: 1.8.1
|
||||
cuda: 10.2
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Fetch GPG keys
|
||||
run: |
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
|
||||
- name: Install Python-dev
|
||||
run: apt-get update && apt-get install -y python${{matrix.python-version}}-dev
|
||||
if: ${{matrix.python-version != 3.9}}
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6
|
||||
- name: Install mmocr dependencies
|
||||
run: |
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
pip install -U openmim
|
||||
mim install 'mmcv >= 2.0.0rc1'
|
||||
pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
pip install -r requirements/tests.txt
|
||||
- name: Build and install
|
||||
run: |
|
||||
python setup.py check -m -s
|
||||
TORCH_CUDA_ARCH_LIST=7.0 pip install -e .
|
||||
|
||||
build_windows:
|
||||
runs-on: ${{ matrix.os }}
|
||||
runs-on: windows-2022
|
||||
strategy:
|
||||
matrix:
|
||||
os: [windows-2022]
|
||||
python: [3.7]
|
||||
platform: [cpu, cu111]
|
||||
torch: [1.8.1]
|
||||
torchvision: [0.9.1]
|
||||
include:
|
||||
- python-version: 3.8
|
||||
platform: cu117
|
||||
torch: 2.0.0
|
||||
torchvision: 0.15.1
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python }}
|
||||
|
@ -167,11 +140,11 @@ jobs:
|
|||
with:
|
||||
python-version: ${{ matrix.python }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
run: python -m pip install --upgrade pip
|
||||
- name: Install lmdb
|
||||
run: pip install lmdb
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==1.8.1+${{matrix.platform}} torchvision==0.9.1+${{matrix.platform}} -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
|
||||
run: pip install torch==${{matrix.torch}}+${{matrix.platform}} torchvision==${{matrix.torchvision}}+${{matrix.platform}} -f https://download.pytorch.org/whl/${{matrix.platform}}/torch_stable.html
|
||||
- name: Install mmocr dependencies
|
||||
run: |
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
|
|
|
@ -9,6 +9,7 @@ on:
|
|||
- 'demo/**'
|
||||
- '.dev_scripts/**'
|
||||
- '.circleci/**'
|
||||
- 'projects/**'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
|
@ -16,7 +17,7 @@ concurrency:
|
|||
|
||||
jobs:
|
||||
build_cpu:
|
||||
runs-on: ubuntu-18.04
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
|
@ -24,15 +25,15 @@ jobs:
|
|||
- torch: 1.8.1
|
||||
torchvision: 0.9.1
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||
- name: Install MMEngine
|
||||
run: pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
- name: Install MMCV
|
||||
|
@ -60,63 +61,32 @@ jobs:
|
|||
name: codecov-umbrella
|
||||
fail_ci_if_error: false
|
||||
|
||||
build_cu102:
|
||||
runs-on: ubuntu-18.04
|
||||
container:
|
||||
image: pytorch/pytorch:1.8.1-cuda10.2-cudnn7-devel
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.8]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Fetch GPG keys
|
||||
run: |
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
|
||||
- name: Install Python-dev
|
||||
run: apt-get update && apt-get install -y python${{matrix.python-version}}-dev
|
||||
if: ${{matrix.python-version != 3.9}}
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libxrender-dev
|
||||
- name: Install mmocr dependencies
|
||||
run: |
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
pip install -U openmim
|
||||
mim install 'mmcv >= 2.0.0rc1'
|
||||
pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
pip install -r requirements/tests.txt
|
||||
- name: Build and install
|
||||
run: |
|
||||
python setup.py check -m -s
|
||||
TORCH_CUDA_ARCH_LIST=7.0 pip install -e .
|
||||
|
||||
build_windows:
|
||||
runs-on: ${{ matrix.os }}
|
||||
runs-on: windows-2022
|
||||
strategy:
|
||||
matrix:
|
||||
os: [windows-2022]
|
||||
python: [3.7]
|
||||
platform: [cpu, cu111]
|
||||
torch: [1.8.1]
|
||||
torchvision: [0.9.1]
|
||||
include:
|
||||
- python-version: 3.8
|
||||
platform: cu117
|
||||
torch: 2.0.0
|
||||
torchvision: 0.15.1
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python }}
|
||||
uses: actions/setup-python@v2
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
run: python -m pip install --upgrade pip
|
||||
- name: Install lmdb
|
||||
run: pip install lmdb
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==1.8.1+${{matrix.platform}} torchvision==0.9.1+${{matrix.platform}} -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
|
||||
run: pip install torch==${{matrix.torch}}+${{matrix.platform}} torchvision==${{matrix.torchvision}}+${{matrix.platform}} -f https://download.pytorch.org/whl/${{matrix.platform}}/torch_stable.html
|
||||
- name: Install mmocr dependencies
|
||||
run: |
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
|
|
|
@ -67,6 +67,7 @@ instance/
|
|||
# Sphinx documentation
|
||||
docs/en/_build/
|
||||
docs/zh_cn/_build/
|
||||
docs/*/api/generated/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
@ -142,3 +143,4 @@ mmocr/.mim
|
|||
workdirs/
|
||||
.history/
|
||||
.dev/
|
||||
data/
|
||||
|
|
|
@ -6,5 +6,4 @@ assign:
|
|||
'*/1 * * * *'
|
||||
assignees:
|
||||
- gaotongxiao
|
||||
- xinke-wang
|
||||
- Harold-lkk
|
||||
|
|
|
@ -4,8 +4,8 @@ repos:
|
|||
rev: 5.0.4
|
||||
hooks:
|
||||
- id: flake8
|
||||
- repo: https://github.com/PyCQA/isort
|
||||
rev: 5.10.1
|
||||
- repo: https://github.com/zhouzaida/isort
|
||||
rev: 5.12.1
|
||||
hooks:
|
||||
- id: isort
|
||||
- repo: https://github.com/pre-commit/mirrors-yapf
|
||||
|
@ -20,10 +20,18 @@ repos:
|
|||
rev: v4.3.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
exclude: ^dicts/
|
||||
exclude: |
|
||||
(?x)^(
|
||||
dicts/|
|
||||
projects/.*?/dicts/
|
||||
)
|
||||
- id: check-yaml
|
||||
- id: end-of-file-fixer
|
||||
exclude: ^dicts/
|
||||
exclude: |
|
||||
(?x)^(
|
||||
dicts/|
|
||||
projects/.*?/dicts/
|
||||
)
|
||||
- id: requirements-txt-fixer
|
||||
- id: double-quote-string-fixer
|
||||
- id: check-merge-conflict
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
include requirements/*.txt
|
||||
include mmocr/.mim/model-index.yml
|
||||
include mmocr/.mim/dicts/*.txt
|
||||
recursive-include mmocr/.mim/configs *.py *.yml
|
||||
recursive-include mmocr/.mim/tools *.sh *.py
|
||||
|
|
104
README.md
104
README.md
|
@ -40,6 +40,39 @@
|
|||
English | [简体中文](README_zh-CN.md)
|
||||
|
||||
</div>
|
||||
<div align="center">
|
||||
<a href="https://openmmlab.medium.com/" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219255827-67c1a27f-f8c5-46a9-811d-5e57448c61d1.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://discord.gg/raweFPmdzG" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218347213-c080267f-cbb6-443e-8532-8e1ed9a58ea9.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://twitter.com/OpenMMLab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346637-d30c8a0f-3eba-4699-8131-512fb06d46db.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://www.youtube.com/openmmlab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346691-ceb2116a-465a-40af-8424-9f30d2348ca9.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://space.bilibili.com/1293512903" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219026751-d7d14cce-a7c9-4e82-9942-8375fca65b99.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://www.zhihu.com/people/openmmlab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219026120-ba71e48b-6e94-4bd4-b4e9-b7d175b5e362.png" width="3%" alt="" /></a>
|
||||
</div>
|
||||
|
||||
## Latest Updates
|
||||
|
||||
**The default branch is now `main` and the code on the branch has been upgraded to v1.0.0. The old `main` branch (v0.6.3) code now exists on the `0.x` branch.** If you have been using the `main` branch and encounter upgrade issues, please read the [Migration Guide](https://mmocr.readthedocs.io/en/dev-1.x/migration/overview.html) and notes on [Branches](https://mmocr.readthedocs.io/en/dev-1.x/migration/branches.html) .
|
||||
|
||||
v1.0.0 was released in 2023-04-06. Major updates from 1.0.0rc6 include:
|
||||
|
||||
1. Support for SCUT-CTW1500, SynthText, and MJSynth datasets in Dataset Preparer
|
||||
2. Updated FAQ and documentation
|
||||
3. Deprecation of file_client_args in favor of backend_args
|
||||
4. Added a new MMOCR tutorial notebook
|
||||
|
||||
To know more about the updates in MMOCR 1.0, please refer to [What's New in MMOCR 1.x](https://mmocr.readthedocs.io/en/dev-1.x/migration/news.html), or
|
||||
Read [Changelog](https://mmocr.readthedocs.io/en/dev-1.x/notes/changelog.html) for more details!
|
||||
|
||||
## Introduction
|
||||
|
||||
|
@ -69,20 +102,6 @@ The main branch works with **PyTorch 1.6+**.
|
|||
|
||||
The toolbox provides a comprehensive set of utilities which can help users assess the performance of models. It includes visualizers which allow visualization of images, ground truths as well as predicted bounding boxes, and a validation tool for evaluating checkpoints during training. It also includes data converters to demonstrate how to convert your own data to the annotation files which the toolbox supports.
|
||||
|
||||
## What's New
|
||||
|
||||
1. **New engines**. MMOCR 1.x is based on [MMEngine](https://github.com/open-mmlab/mmengine), which provides a general and powerful runner that allows more flexible customizations and significantly simplifies the entrypoints of high-level interfaces.
|
||||
|
||||
2. **Unified interfaces**. As a part of the OpenMMLab 2.0 projects, MMOCR 1.x unifies and refactors the interfaces and internal logics of train, testing, datasets, models, evaluation, and visualization. All the OpenMMLab 2.0 projects share the same design in those interfaces and logics to allow the emergence of multi-task/modality algorithms.
|
||||
|
||||
3. **Cross project calling**. Benefiting from the unified design, you can use the models implemented in other OpenMMLab projects, such as MMDet. We provide an example of how to use MMDetection's Mask R-CNN through `MMDetWrapper`. Check our documents for more details. More wrappers will be released in the future.
|
||||
|
||||
4. **Stronger visualization**. We provide a series of useful tools which are mostly based on brand-new visualizers. As a result, it is more convenient for the users to explore the models and datasets now.
|
||||
|
||||
5. **More documentation and tutorials**. We add a bunch of documentation and tutorials to help users get started more smoothly. Read it [here](https://mmocr.readthedocs.io/en/dev-1.x/).
|
||||
|
||||
Read [Changelog](https://mmocr.readthedocs.io/en/dev-1.x/notes/changelog.html) for more details!
|
||||
|
||||
## Installation
|
||||
|
||||
MMOCR depends on [PyTorch](https://pytorch.org/), [MMEngine](https://github.com/open-mmlab/mmengine), [MMCV](https://github.com/open-mmlab/mmcv) and [MMDetection](https://github.com/open-mmlab/mmdetection).
|
||||
|
@ -93,13 +112,9 @@ Please refer to [Install Guide](https://mmocr.readthedocs.io/en/dev-1.x/get_star
|
|||
conda create -n open-mmlab python=3.8 pytorch=1.10 cudatoolkit=11.3 torchvision -c pytorch -y
|
||||
conda activate open-mmlab
|
||||
pip3 install openmim
|
||||
mim install mmengine
|
||||
mim install 'mmcv>=2.0.0rc1'
|
||||
mim install 'mmdet>=3.0.0rc0'
|
||||
git clone https://github.com/open-mmlab/mmocr.git
|
||||
cd mmocr
|
||||
git checkout 1.x
|
||||
pip3 install -e .
|
||||
mim install -e .
|
||||
```
|
||||
|
||||
## Get Started
|
||||
|
@ -110,6 +125,13 @@ Please see [Quick Run](https://mmocr.readthedocs.io/en/dev-1.x/get_started/quick
|
|||
|
||||
Supported algorithms:
|
||||
|
||||
<details open>
|
||||
<summary>BackBone</summary>
|
||||
|
||||
- [x] [oCLIP](configs/backbone/oclip/README.md) (ECCV'2022)
|
||||
|
||||
</details>
|
||||
|
||||
<details open>
|
||||
<summary>Text Detection</summary>
|
||||
|
||||
|
@ -127,12 +149,14 @@ Supported algorithms:
|
|||
<summary>Text Recognition</summary>
|
||||
|
||||
- [x] [ABINet](configs/textrecog/abinet/README.md) (CVPR'2021)
|
||||
- [x] [ASTER](configs/textrecog/aster/README.md) (TPAMI'2018)
|
||||
- [x] [CRNN](configs/textrecog/crnn/README.md) (TPAMI'2016)
|
||||
- [x] [MASTER](configs/textrecog/master/README.md) (PR'2021)
|
||||
- [x] [NRTR](configs/textrecog/nrtr/README.md) (ICDAR'2019)
|
||||
- [x] [RobustScanner](configs/textrecog/robust_scanner/README.md) (ECCV'2020)
|
||||
- [x] [SAR](configs/textrecog/sar/README.md) (AAAI'2019)
|
||||
- [x] [SATRN](configs/textrecog/satrn/README.md) (CVPR'2020 Workshop on Text and Documents in the Deep Learning Era)
|
||||
- [x] [SVTR](configs/textrecog/svtr/README.md) (IJCAI'2022)
|
||||
|
||||
</details>
|
||||
|
||||
|
@ -143,8 +167,21 @@ Supported algorithms:
|
|||
|
||||
</details>
|
||||
|
||||
<details open>
|
||||
<summary>Text Spotting</summary>
|
||||
|
||||
- [x] [ABCNet](projects/ABCNet/README.md) (CVPR'2020)
|
||||
- [x] [ABCNetV2](projects/ABCNet/README_V2.md) (TPAMI'2021)
|
||||
- [x] [SPTS](projects/SPTS/README.md) (ACM MM'2022)
|
||||
|
||||
</details>
|
||||
|
||||
Please refer to [model_zoo](https://mmocr.readthedocs.io/en/dev-1.x/modelzoo.html) for more details.
|
||||
|
||||
## Projects
|
||||
|
||||
[Here](projects/README.md) are some implementations of SOTA models and solutions built on MMOCR, which are supported and maintained by community users. These projects demonstrate the best practices based on MMOCR for research and product development. We welcome and appreciate all the contributions to OpenMMLab ecosystem.
|
||||
|
||||
## Contributing
|
||||
|
||||
We appreciate all contributions to improve MMOCR. Please refer to [CONTRIBUTING.md](.github/CONTRIBUTING.md) for the contributing guidelines.
|
||||
|
@ -159,11 +196,11 @@ We hope the toolbox and benchmark could serve the growing research community by
|
|||
If you find this project useful in your research, please consider cite:
|
||||
|
||||
```bibtex
|
||||
@article{mmocr2021,
|
||||
@article{mmocr2022,
|
||||
title={MMOCR: A Comprehensive Toolbox for Text Detection, Recognition and Understanding},
|
||||
author={Kuang, Zhanghui and Sun, Hongbin and Li, Zhizhong and Yue, Xiaoyu and Lin, Tsui Hin and Chen, Jianyong and Wei, Huaqiang and Zhu, Yiqin and Gao, Tong and Zhang, Wenwei and Chen, Kai and Zhang, Wayne and Lin, Dahua},
|
||||
journal= {arXiv preprint arXiv:2108.06543},
|
||||
year={2021}
|
||||
author={MMOCR Developer Team},
|
||||
howpublished = {\url{https://github.com/open-mmlab/mmocr}},
|
||||
year={2022}
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -171,7 +208,7 @@ If you find this project useful in your research, please consider cite:
|
|||
|
||||
This project is released under the [Apache 2.0 license](LICENSE).
|
||||
|
||||
## Projects in OpenMMLab
|
||||
## OpenMMLab Family
|
||||
|
||||
- [MMEngine](https://github.com/open-mmlab/mmengine): OpenMMLab foundational library for training deep learning models
|
||||
- [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab foundational library for computer vision.
|
||||
|
@ -193,3 +230,22 @@ This project is released under the [Apache 2.0 license](LICENSE).
|
|||
- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab image and video editing toolbox.
|
||||
- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab image and video generative models toolbox.
|
||||
- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab model deployment framework.
|
||||
|
||||
## Welcome to the OpenMMLab community
|
||||
|
||||
Scan the QR code below to follow the OpenMMLab team's [**Zhihu Official Account**](https://www.zhihu.com/people/openmmlab) and join the OpenMMLab team's [**QQ Group**](https://jq.qq.com/?_wv=1027&k=aCvMxdr3), or join the official communication WeChat group by adding the WeChat, or join our [**Slack**](https://join.slack.com/t/mmocrworkspace/shared_invite/zt-1ifqhfla8-yKnLO_aKhVA2h71OrK8GZw)
|
||||
|
||||
<div align="center">
|
||||
<img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/zhihu_qrcode.jpg" height="400" /> <img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/qq_group_qrcode.jpg" height="400" /> <img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/wechat_qrcode.jpg" height="400" />
|
||||
</div>
|
||||
|
||||
We will provide you with the OpenMMLab community
|
||||
|
||||
- 📢 share the latest core technologies of AI frameworks
|
||||
- 💻 Explaining PyTorch common module source Code
|
||||
- 📰 News related to the release of OpenMMLab
|
||||
- 🚀 Introduction of cutting-edge algorithms developed by OpenMMLab
|
||||
🏃 Get the more efficient answer and feedback
|
||||
- 🔥 Provide a platform for communication with developers from all walks of life
|
||||
|
||||
The OpenMMLab community looks forward to your participation! 👬
|
||||
|
|
|
@ -41,6 +41,39 @@
|
|||
|
||||
</div>
|
||||
|
||||
<div align="center">
|
||||
<a href="https://openmmlab.medium.com/" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219255827-67c1a27f-f8c5-46a9-811d-5e57448c61d1.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://discord.gg/raweFPmdzG" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218347213-c080267f-cbb6-443e-8532-8e1ed9a58ea9.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://twitter.com/OpenMMLab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346637-d30c8a0f-3eba-4699-8131-512fb06d46db.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://www.youtube.com/openmmlab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346691-ceb2116a-465a-40af-8424-9f30d2348ca9.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://space.bilibili.com/1293512903" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219026751-d7d14cce-a7c9-4e82-9942-8375fca65b99.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://www.zhihu.com/people/openmmlab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219026120-ba71e48b-6e94-4bd4-b4e9-b7d175b5e362.png" width="3%" alt="" /></a>
|
||||
</div>
|
||||
|
||||
## 近期更新
|
||||
|
||||
**默认分支目前为 `main`,且分支上的代码已经切换到 v1.0.0 版本。旧版 `main` 分支(v0.6.3)的代码现存在 `0.x` 分支上。** 如果您一直在使用 `main` 分支,并遇到升级问题,请阅读 [迁移指南](https://mmocr.readthedocs.io/zh_CN/dev-1.x/migration/overview.html) 和 [分支说明](https://mmocr.readthedocs.io/zh_CN/dev-1.x/migration/branches.html) 。
|
||||
|
||||
最新的版本 v1.0.0 于 2023-04-06 发布。其相对于 1.0.0rc6 的主要更新如下:
|
||||
|
||||
1. Dataset Preparer 中支持了 SCUT-CTW1500, SynthText 和 MJSynth 数据集;
|
||||
2. 更新了文档和 FAQ;
|
||||
3. 升级文件后端;使用了 `backend_args` 替换 `file_client_args`;
|
||||
4. 增加了 MMOCR 教程 notebook。
|
||||
|
||||
如果需要了解 MMOCR 1.0 相对于 0.x 的升级内容,请阅读 [MMOCR 1.x 更新汇总](https://mmocr.readthedocs.io/zh_CN/dev-1.x/migration/news.html);或者阅读[更新日志](https://mmocr.readthedocs.io/zh_CN/dev-1.x/notes/changelog.html)以获取更多信息。
|
||||
|
||||
## 简介
|
||||
|
||||
MMOCR 是基于 PyTorch 和 mmdetection 的开源工具箱,专注于文本检测,文本识别以及相应的下游任务,如关键信息提取。 它是 OpenMMLab 项目的一部分。
|
||||
|
@ -63,27 +96,12 @@ MMOCR 是基于 PyTorch 和 mmdetection 的开源工具箱,专注于文本检
|
|||
|
||||
-**模块化设计**
|
||||
|
||||
MMOCR 的模块化设计使用户可以定义自己的优化器,数据预处理器,模型组件如主干模块,颈部模块和头部模块,以及损失函数。有关如何构建自定义模型的信
|
||||
息,请参考[概览](https://mmocr.readthedocs.io/zh_CN/dev-1.x/get_started/overview.html)。
|
||||
MMOCR 的模块化设计使用户可以定义自己的优化器,数据预处理器,模型组件如主干模块,颈部模块和头部模块,以及损失函数。有关如何构建自定义模型的信息,请参考[概览](https://mmocr.readthedocs.io/zh_CN/dev-1.x/get_started/overview.html)。
|
||||
|
||||
-**众多实用工具**
|
||||
|
||||
该工具箱提供了一套全面的实用程序,可以帮助用户评估模型的性能。它包括可对图像,标注的真值以及预测结果进行可视化的可视化工具,以及用于在训练过程中评估模型的验证工具。它还包括数据转换器,演示了如何将用户自建的标注数据转换为 MMOCR 支持的标注文件。
|
||||
|
||||
## 最新进展
|
||||
|
||||
1. 架构升级:MMOCR 1.x 是基于 [MMEngine](https://github.com/open-mmlab/mmengine),提供了一个通用的、强大的执行器,允许更灵活的定制,提供了统一的训练和测试入口。
|
||||
|
||||
2. 统一接口:MMOCR 1.x 统一了数据集、模型、评估和可视化的接口和内部逻辑。支持更强的扩展性。
|
||||
|
||||
3. 跨项目调用:受益于统一的设计,你可以使用其他OpenMMLab项目中实现的模型,如MMDet。 我们提供了一个例子,说明如何通过MMDetWrapper使用MMDetection的Mask R-CNN。查看我们的文档以了解更多细节。更多的包装器将在未来发布。
|
||||
|
||||
4. 更强的可视化:我们提供了一系列可视化工具, 用户现在可以更方便可视化数据。
|
||||
|
||||
5. 更多的文档和教程:我们增加了更多的教程,降低用户的学习门槛。详见[教程](https://mmocr.readthedocs.io/zh_CN/dev-1.x/)。
|
||||
|
||||
阅读[更新日志](https://mmocr.readthedocs.io/zh_CN/dev-1.x/notes/changelog.html)以获取更多信息。
|
||||
|
||||
## 安装
|
||||
|
||||
MMOCR 依赖 [PyTorch](https://pytorch.org/), [MMEngine](https://github.com/open-mmlab/mmengine), [MMCV](https://github.com/open-mmlab/mmcv) 和 [MMDetection](https://github.com/open-mmlab/mmdetection),以下是安装的简要步骤。
|
||||
|
@ -93,13 +111,9 @@ MMOCR 依赖 [PyTorch](https://pytorch.org/), [MMEngine](https://github.com/open
|
|||
conda create -n open-mmlab python=3.8 pytorch=1.10 cudatoolkit=11.3 torchvision -c pytorch -y
|
||||
conda activate open-mmlab
|
||||
pip3 install openmim
|
||||
mim install mmengine
|
||||
mim install 'mmcv>=2.0.0rc1'
|
||||
mim install 'mmdet>=3.0.0rc0'
|
||||
git clone https://github.com/open-mmlab/mmocr.git
|
||||
cd mmocr
|
||||
git checkout 1.x
|
||||
pip3 install -e .
|
||||
mim install -e .
|
||||
```
|
||||
|
||||
## 快速入门
|
||||
|
@ -110,6 +124,13 @@ pip3 install -e .
|
|||
|
||||
支持的算法:
|
||||
|
||||
<details open>
|
||||
<summary>骨干网络</summary>
|
||||
|
||||
- [x] [oCLIP](configs/backbone/oclip/README.md) (ECCV'2022)
|
||||
|
||||
</details>
|
||||
|
||||
<details open>
|
||||
<summary>文字检测</summary>
|
||||
|
||||
|
@ -127,12 +148,14 @@ pip3 install -e .
|
|||
<summary>文字识别</summary>
|
||||
|
||||
- [x] [ABINet](configs/textrecog/abinet/README.md) (CVPR'2021)
|
||||
- [x] [ASTER](configs/textrecog/aster/README.md) (TPAMI'2018)
|
||||
- [x] [CRNN](configs/textrecog/crnn/README.md) (TPAMI'2016)
|
||||
- [x] [MASTER](configs/textrecog/master/README.md) (PR'2021)
|
||||
- [x] [NRTR](configs/textrecog/nrtr/README.md) (ICDAR'2019)
|
||||
- [x] [RobustScanner](configs/textrecog/robust_scanner/README.md) (ECCV'2020)
|
||||
- [x] [SAR](configs/textrecog/sar/README.md) (AAAI'2019)
|
||||
- [x] [SATRN](configs/textrecog/satrn/README.md) (CVPR'2020 Workshop on Text and Documents in the Deep Learning Era)
|
||||
- [x] [SVTR](configs/textrecog/svtr/README.md) (IJCAI'2022)
|
||||
|
||||
</details>
|
||||
|
||||
|
@ -143,8 +166,22 @@ pip3 install -e .
|
|||
|
||||
</details>
|
||||
|
||||
<details open>
|
||||
<summary>端对端 OCR</summary>
|
||||
|
||||
- [x] [ABCNet](projects/ABCNet/README.md) (CVPR'2020)
|
||||
- [x] [ABCNetV2](projects/ABCNet/README_V2.md) (TPAMI'2021)
|
||||
- [x] [SPTS](projects/SPTS/README.md) (ACM MM'2022)
|
||||
|
||||
</details>
|
||||
|
||||
请点击[模型库](https://mmocr.readthedocs.io/zh_CN/dev-1.x/modelzoo.html)查看更多关于上述算法的详细信息。
|
||||
|
||||
## 社区项目
|
||||
|
||||
[这里](projects/README.md)有一些由社区用户支持和维护的基于 MMOCR 的 SOTA 模型和解决方案的实现。这些项目展示了基于 MMOCR 的研究和产品开发的最佳实践。
|
||||
我们欢迎并感谢对 OpenMMLab 生态系统的所有贡献。
|
||||
|
||||
## 贡献指南
|
||||
|
||||
我们感谢所有的贡献者为改进和提升 MMOCR 所作出的努力。请参考[贡献指南](.github/CONTRIBUTING.md)来了解参与项目贡献的相关指引。
|
||||
|
@ -195,10 +232,10 @@ MMOCR 是一款由来自不同高校和企业的研发人员共同参与贡献
|
|||
|
||||
## 欢迎加入 OpenMMLab 社区
|
||||
|
||||
扫描下方的二维码可关注 OpenMMLab 团队的 [知乎官方账号](https://www.zhihu.com/people/openmmlab),加入 OpenMMLab 团队的 [官方交流 QQ 群](https://jq.qq.com/?_wv=1027&k=aCvMxdr3),或通过添加微信“Open小喵Lab”加入官方交流微信群。
|
||||
扫描下方的二维码可关注 OpenMMLab 团队的 知乎官方账号,扫描下方微信二维码添加喵喵好友,进入 MMOCR 微信交流社群。【加好友申请格式:研究方向+地区+学校/公司+姓名】
|
||||
|
||||
<div align="center">
|
||||
<img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/zhihu_qrcode.jpg" height="400" /> <img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/qq_group_qrcode.jpg" height="400" /> <img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/wechat_qrcode.jpg" height="400" />
|
||||
<img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/zhihu_qrcode.jpg" height="400" /> <img src="https://github.com/open-mmlab/mmocr/assets/62195058/bf1e53fe-df4f-4296-9e1b-61db8971985e" height="400" />
|
||||
</div>
|
||||
|
||||
我们会在 OpenMMLab 社区为大家
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
# oCLIP
|
||||
|
||||
> [Language Matters: A Weakly Supervised Vision-Language Pre-training Approach for Scene Text Detection and Spotting](https://www.ecva.net/papers/eccv_2022/papers_ECCV/papers/136880282.pdf)
|
||||
|
||||
<!-- [ALGORITHM] -->
|
||||
|
||||
## Abstract
|
||||
|
||||
Recently, Vision-Language Pre-training (VLP) techniques have greatly benefited various vision-language tasks by jointly learning visual and textual representations, which intuitively helps in Optical Character Recognition (OCR) tasks due to the rich visual and textual information in scene text images. However, these methods cannot well cope with OCR tasks because of the difficulty in both instance-level text encoding and image-text pair acquisition (i.e. images and captured texts in them). This paper presents a weakly supervised pre-training method, oCLIP, which can acquire effective scene text representations by jointly learning and aligning visual and textual information. Our network consists of an image encoder and a character-aware text encoder that extract visual and textual features, respectively, as well as a visual-textual decoder that models the interaction among textual and visual features for learning effective scene text representations. With the learning of textual features, the pre-trained model can attend texts in images well with character awareness. Besides, these designs enable the learning from weakly annotated texts (i.e. partial texts in images without text bounding boxes) which mitigates the data annotation constraint greatly. Experiments over the weakly annotated images in ICDAR2019-LSVT show that our pre-trained model improves F-score by +2.5% and +4.8% while transferring its weights to other text detection and spotting networks, respectively. In addition, the proposed method outperforms existing pre-training techniques consistently across multiple public datasets (e.g., +3.2% and +1.3% for Total-Text and CTW1500).
|
||||
|
||||
<div align=center>
|
||||
<img src="https://user-images.githubusercontent.com/24622904/199475057-aa688422-518d-4d7a-86fc-1be0cc1b5dc6.png"/>
|
||||
</div>
|
||||
|
||||
## Models
|
||||
|
||||
| Backbone | Pre-train Data | Model |
|
||||
| :-------: | :------------: | :-------------------------------------------------------------------------------: |
|
||||
| ResNet-50 | SynthText | [Link](https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth) |
|
||||
|
||||
```{note}
|
||||
The model is converted from the official [oCLIP](https://github.com/bytedance/oclip.git).
|
||||
```
|
||||
|
||||
## Supported Text Detection Models
|
||||
|
||||
| | [DBNet](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#dbnet) | [DBNet++](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#dbnetpp) | [FCENet](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#fcenet) | [TextSnake](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#fcenet) | [PSENet](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#psenet) | [DRRG](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#drrg) | [Mask R-CNN](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#mask-r-cnn) |
|
||||
| :-------: | :------------------------------------------------------------------------: | :----------------------------------------------------------------------------: | :--------------------------------------------------------------------------: | :-----------------------------------------------------------------------------: | :--------------------------------------------------------------------------: | :----------------------------------------------------------------------: | :----------------------------------------------------------------------------------: |
|
||||
| ICDAR2015 | ✓ | ✓ | ✓ | | ✓ | | ✓ |
|
||||
| CTW1500 | | | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
|
||||
## Citation
|
||||
|
||||
```bibtex
|
||||
@article{xue2022language,
|
||||
title={Language Matters: A Weakly Supervised Vision-Language Pre-training Approach for Scene Text Detection and Spotting},
|
||||
author={Xue, Chuhui and Zhang, Wenqing and Hao, Yu and Lu, Shijian and Torr, Philip and Bai, Song},
|
||||
journal={Proceedings of the European Conference on Computer Vision (ECCV)},
|
||||
year={2022}
|
||||
}
|
||||
```
|
|
@ -0,0 +1,13 @@
|
|||
Collections:
|
||||
- Name: oCLIP
|
||||
Metadata:
|
||||
Training Data: SynthText
|
||||
Architecture:
|
||||
- CLIPResNet
|
||||
Paper:
|
||||
URL: https://arxiv.org/abs/2203.03911
|
||||
Title: 'Language Matters: A Weakly Supervised Vision-Language Pre-training Approach for Scene Text Detection and Spotting'
|
||||
README: configs/backbone/oclip/README.md
|
||||
|
||||
Models:
|
||||
Weights: https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth
|
|
@ -1,4 +1,4 @@
|
|||
wildreceipt_openset_data_root = 'data/kie/wildreceipt/'
|
||||
wildreceipt_openset_data_root = 'data/wildreceipt/'
|
||||
|
||||
wildreceipt_openset_train = dict(
|
||||
type='WildReceiptDataset',
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
wildreceipt_data_root = 'data/kie/wildreceipt/'
|
||||
wildreceipt_data_root = 'data/wildreceipt/'
|
||||
|
||||
wildreceipt_train = dict(
|
||||
type='WildReceiptDataset',
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
default_scope = 'mmocr'
|
||||
env_cfg = dict(
|
||||
cudnn_benchmark=True,
|
||||
cudnn_benchmark=False,
|
||||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||
dist_cfg=dict(backend='nccl'),
|
||||
)
|
||||
|
|
|
@ -10,7 +10,7 @@ model = dict(
|
|||
postprocessor=dict(type='SDMGRPostProcessor')),
|
||||
dictionary=dict(
|
||||
type='Dictionary',
|
||||
dict_file='data/kie/wildreceipt/dict.txt',
|
||||
dict_file='{{ fileDirname }}/../../../dicts/sdmgr_dict.txt',
|
||||
with_padding=True,
|
||||
with_unknown=True,
|
||||
unknown_token=None),
|
||||
|
|
|
@ -24,5 +24,5 @@ test_pipeline = [
|
|||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadKIEAnnotations'),
|
||||
dict(type='Resize', scale=(1024, 512), keep_ratio=True),
|
||||
dict(type='PackKIEInputs'),
|
||||
dict(type='PackKIEInputs', meta_keys=('img_path', )),
|
||||
]
|
||||
|
|
|
@ -15,6 +15,7 @@ Collections:
|
|||
|
||||
Models:
|
||||
- Name: sdmgr_unet16_60e_wildreceipt
|
||||
Alias: SDMGR
|
||||
In Collection: SDMGR
|
||||
Config: configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py
|
||||
Metadata:
|
||||
|
@ -25,3 +26,27 @@ Models:
|
|||
Metrics:
|
||||
macro_f1: 0.890
|
||||
Weights: https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_unet16_60e_wildreceipt/sdmgr_unet16_60e_wildreceipt_20220825_151648-22419f37.pth
|
||||
- Name: sdmgr_novisual_60e_wildreceipt
|
||||
In Collection: SDMGR
|
||||
Config: configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py
|
||||
Metadata:
|
||||
Training Data: wildreceipt
|
||||
Results:
|
||||
- Task: Key Information Extraction
|
||||
Dataset: wildreceipt
|
||||
Metrics:
|
||||
macro_f1: 0.873
|
||||
Weights: https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_novisual_60e_wildreceipt/sdmgr_novisual_60e_wildreceipt_20220831_193317-827649d8.pth
|
||||
- Name: sdmgr_novisual_60e_wildreceipt_openset
|
||||
In Collection: SDMGR
|
||||
Config: configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt-openset.py
|
||||
Metadata:
|
||||
Training Data: wildreceipt-openset
|
||||
Results:
|
||||
- Task: Key Information Extraction
|
||||
Dataset: wildreceipt
|
||||
Metrics:
|
||||
macro_f1: 0.931
|
||||
micro_f1: 0.940
|
||||
edge_micro_f1: 0.792
|
||||
Weights: https://download.openmmlab.com/mmocr/kie/sdmgr/sdmgr_novisual_60e_wildreceipt-openset/sdmgr_novisual_60e_wildreceipt-openset_20220831_200807-dedf15ec.pth
|
||||
|
|
|
@ -1,17 +1,15 @@
|
|||
ctw_det_data_root = 'data/det/ctw1500'
|
||||
ctw1500_textdet_data_root = 'data/ctw1500'
|
||||
|
||||
ctw_det_train = dict(
|
||||
ctw1500_textdet_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=ctw_det_data_root,
|
||||
ann_file='instances_training.json',
|
||||
data_prefix=dict(img_path='imgs/'),
|
||||
data_root=ctw1500_textdet_data_root,
|
||||
ann_file='textdet_train.json',
|
||||
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
||||
pipeline=None)
|
||||
|
||||
ctw_det_test = dict(
|
||||
ctw1500_textdet_test = dict(
|
||||
type='OCRDataset',
|
||||
data_root=ctw_det_data_root,
|
||||
ann_file='instances_test.json',
|
||||
data_prefix=dict(img_path='imgs/'),
|
||||
data_root=ctw1500_textdet_data_root,
|
||||
ann_file='textdet_test.json',
|
||||
test_mode=True,
|
||||
pipeline=None)
|
||||
|
|
|
@ -1,17 +1,15 @@
|
|||
ic15_det_data_root = 'data/det/icdar2015'
|
||||
icdar2015_textdet_data_root = 'data/icdar2015'
|
||||
|
||||
ic15_det_train = dict(
|
||||
icdar2015_textdet_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=ic15_det_data_root,
|
||||
ann_file='instances_training.json',
|
||||
data_prefix=dict(img_path='imgs/'),
|
||||
data_root=icdar2015_textdet_data_root,
|
||||
ann_file='textdet_train.json',
|
||||
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
||||
pipeline=None)
|
||||
|
||||
ic15_det_test = dict(
|
||||
icdar2015_textdet_test = dict(
|
||||
type='OCRDataset',
|
||||
data_root=ic15_det_data_root,
|
||||
ann_file='instances_test.json',
|
||||
data_prefix=dict(img_path='imgs/'),
|
||||
data_root=icdar2015_textdet_data_root,
|
||||
ann_file='textdet_test.json',
|
||||
test_mode=True,
|
||||
pipeline=None)
|
||||
|
|
|
@ -1,16 +1,16 @@
|
|||
ic17_det_data_root = 'data/det/icdar_2017'
|
||||
icdar2017_textdet_data_root = 'data/det/icdar_2017'
|
||||
|
||||
ic17_det_train = dict(
|
||||
icdar2017_textdet_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=ic17_det_data_root,
|
||||
data_root=icdar2017_textdet_data_root,
|
||||
ann_file='instances_training.json',
|
||||
data_prefix=dict(img_path='imgs/'),
|
||||
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
||||
pipeline=None)
|
||||
|
||||
ic17_det_test = dict(
|
||||
icdar2017_textdet_test = dict(
|
||||
type='OCRDataset',
|
||||
data_root=ic17_det_data_root,
|
||||
data_root=icdar2017_textdet_data_root,
|
||||
ann_file='instances_test.json',
|
||||
data_prefix=dict(img_path='imgs/'),
|
||||
test_mode=True,
|
||||
|
|
|
@ -1,17 +1,8 @@
|
|||
st_det_data_root = 'data/det/synthtext'
|
||||
synthtext_textdet_data_root = 'data/synthtext'
|
||||
|
||||
st_det_train = dict(
|
||||
synthtext_textdet_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=st_det_data_root,
|
||||
ann_file='instances_training.json',
|
||||
data_prefix=dict(img_path='imgs/'),
|
||||
data_root=synthtext_textdet_data_root,
|
||||
ann_file='textdet_train.json',
|
||||
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
||||
pipeline=None)
|
||||
|
||||
st_det_test = dict(
|
||||
type='OCRDataset',
|
||||
data_root=st_det_data_root,
|
||||
ann_file='instances_test.json',
|
||||
data_prefix=dict(img_path='imgs/'),
|
||||
test_mode=True,
|
||||
pipeline=None)
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
totaltext_textdet_data_root = 'data/totaltext'
|
||||
|
||||
totaltext_textdet_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=totaltext_textdet_data_root,
|
||||
ann_file='textdet_train.json',
|
||||
filter_cfg=dict(filter_empty_gt=True, min_size=32),
|
||||
pipeline=None)
|
||||
|
||||
totaltext_textdet_test = dict(
|
||||
type='OCRDataset',
|
||||
data_root=totaltext_textdet_data_root,
|
||||
ann_file='textdet_test.json',
|
||||
test_mode=True,
|
||||
pipeline=None)
|
|
@ -1,6 +1,6 @@
|
|||
default_scope = 'mmocr'
|
||||
env_cfg = dict(
|
||||
cudnn_benchmark=True,
|
||||
cudnn_benchmark=False,
|
||||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||
dist_cfg=dict(backend='nccl'),
|
||||
)
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
_base_ = 'default_runtime.py'
|
||||
|
||||
default_hooks = dict(
|
||||
logger=dict(type='LoggerHook', interval=1000),
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook',
|
||||
interval=10000,
|
||||
by_epoch=False,
|
||||
max_keep_ckpts=1),
|
||||
)
|
||||
|
||||
# Evaluation
|
||||
val_evaluator = None
|
||||
test_evaluator = None
|
|
@ -4,7 +4,7 @@ optim_wrapper = dict(
|
|||
optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001))
|
||||
|
||||
train_cfg = dict(type='IterBasedTrainLoop', max_iters=100000)
|
||||
test_cfg = dict(type='TestLoop')
|
||||
test_cfg = None
|
||||
val_cfg = None
|
||||
# learning policy
|
||||
param_scheduler = [
|
||||
|
|
|
@ -14,12 +14,26 @@ Recently, segmentation-based methods are quite popular in scene text detection,
|
|||
|
||||
## Results and models
|
||||
|
||||
### SynthText
|
||||
|
||||
| Method | Backbone | Training set | #iters | Download |
|
||||
| :-----------------------------------------------------------------------: | :------: | :----------: | :-----: | :--------------------------------------------------------------------------------------------------: |
|
||||
| [DBNet_r18](/configs/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext.py) | ResNet18 | SynthText | 100,000 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext/dbnet_resnet18_fpnc_100k_synthtext-2e9bf392.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext/20221214_150351.log) |
|
||||
|
||||
### ICDAR2015
|
||||
|
||||
| Method | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :--------------------------------------: | :-------------------------------------------------: | :-------------: | :------------: | :-----: | :-------: | :-------: | :----: | :----: | :-----------------------------------------: |
|
||||
| [DBNet_r18](/configs/textdet/dbnet/dbnet_resnet18_fpnc_1200e_icdar2015.py) | ImageNet | ICDAR2015 Train | ICDAR2015 Test | 1200 | 736 | 0.8853 | 0.7583 | 0.8169 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_1200e_icdar2015/dbnet_resnet18_fpnc_1200e_icdar2015_20220825_221614-7c0e94f2.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_1200e_icdar2015/20220825_221614.log) |
|
||||
| [DBNet_r50dcn](/configs/textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py) | [Synthtext](https://download.openmmlab.com/mmocr/textdet/dbnet/tmp_1.0_pretrain/dbnet_r50dcnv2_fpnc_sbn_2e_synthtext_20210325-ed322016.pth) | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.8784 | 0.8315 | 0.8543 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015_20220828_124917-452c443c.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015/20220828_124917.log) |
|
||||
| Method | Backbone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :----------------------------: | :------------------------------: | :--------------------------------------: | :-------------: | :------------: | :-----: | :-------: | :-------: | :----: | :----: | :------------------------------: |
|
||||
| [DBNet_r18](/configs/textdet/dbnet/dbnet_resnet18_fpnc_1200e_icdar2015.py) | ResNet18 | - | ICDAR2015 Train | ICDAR2015 Test | 1200 | 736 | 0.8853 | 0.7583 | 0.8169 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_1200e_icdar2015/dbnet_resnet18_fpnc_1200e_icdar2015_20220825_221614-7c0e94f2.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_1200e_icdar2015/20220825_221614.log) |
|
||||
| [DBNet_r50](/configs/textdet/dbnet/dbnet_resnet50_1200e_icdar2015.py) | ResNet50 | - | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.8744 | 0.8276 | 0.8504 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50_1200e_icdar2015/dbnet_resnet50_1200e_icdar2015_20221102_115917-54f50589.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50_1200e_icdar2015/20221102_115917.log) |
|
||||
| [DBNet_r50dcn](/configs/textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py) | ResNet50-DCN | [Synthtext](https://download.openmmlab.com/mmocr/textdet/dbnet/tmp_1.0_pretrain/dbnet_r50dcnv2_fpnc_sbn_2e_synthtext_20210325-ed322016.pth) | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.8784 | 0.8315 | 0.8543 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015_20220828_124917-452c443c.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015/20220828_124917.log) |
|
||||
| [DBNet_r50-oclip](/configs/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015.py) | [ResNet50-oCLIP](https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth) | - | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9052 | 0.8272 | 0.8644 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015/dbnet_resnet50-oclip_1200e_icdar2015_20221102_115917-bde8c87a.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015/20221102_115917.log) |
|
||||
|
||||
### Total Text
|
||||
|
||||
| Method | Backbone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :----------------------------------------------------: | :------: | :--------------: | :-------------: | :------------: | :-----: | :-------: | :-------: | :----: | :----: | :------------------------------------------------------: |
|
||||
| [DBNet_r18](/configs/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext.py) | ResNet18 | - | Totaltext Train | Totaltext Test | 1200 | 736 | 0.8640 | 0.7770 | 0.8182 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext/dbnet_resnet18_fpnc_1200e_totaltext-3ed3233c.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext/20221219_201038.log) |
|
||||
|
||||
## Citation
|
||||
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
file_client_args = dict(backend='disk')
|
||||
|
||||
model = dict(
|
||||
type='DBNet',
|
||||
backbone=dict(
|
||||
|
@ -27,10 +25,7 @@ model = dict(
|
|||
pad_size_divisor=32))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
|
@ -55,10 +50,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1333, 736), keep_ratio=True),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
file_client_args = dict(backend='disk')
|
||||
|
||||
model = dict(
|
||||
type='DBNet',
|
||||
backbone=dict(
|
||||
|
@ -29,10 +27,7 @@ model = dict(
|
|||
pad_size_divisor=32))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_bbox=True,
|
||||
|
@ -57,10 +52,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(4068, 1024), keep_ratio=True),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
|
|
|
@ -1,30 +1,45 @@
|
|||
_base_ = [
|
||||
'_base_dbnet_resnet18_fpnc.py',
|
||||
'../_base_/datasets/synthtext.py',
|
||||
'../_base_/default_runtime.py',
|
||||
'../_base_/pretrain_runtime.py',
|
||||
'../_base_/schedules/schedule_sgd_100k.py',
|
||||
]
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(type='FixInvalidPolygon'),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5),
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
|
||||
dict(type='RandomCrop', min_side_ratio=0.1),
|
||||
dict(type='Resize', scale=(640, 640), keep_ratio=True),
|
||||
dict(type='Pad', size=(640, 640)),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape'))
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
st_det_train = _base_.st_det_train
|
||||
st_det_train.pipeline = _base_.train_pipeline
|
||||
st_det_test = _base_.st_det_test
|
||||
st_det_test.pipeline = _base_.test_pipeline
|
||||
synthtext_textdet_train = _base_.synthtext_textdet_train
|
||||
synthtext_textdet_train.pipeline = train_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=st_det_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=st_det_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
dataset=synthtext_textdet_train)
|
||||
|
||||
auto_scale_lr = dict(base_batch_size=16)
|
||||
|
|
|
@ -6,24 +6,24 @@ _base_ = [
|
|||
]
|
||||
|
||||
# dataset settings
|
||||
ic15_det_train = _base_.ic15_det_train
|
||||
ic15_det_train.pipeline = _base_.train_pipeline
|
||||
ic15_det_test = _base_.ic15_det_test
|
||||
ic15_det_test.pipeline = _base_.test_pipeline
|
||||
icdar2015_textdet_train = _base_.icdar2015_textdet_train
|
||||
icdar2015_textdet_train.pipeline = _base_.train_pipeline
|
||||
icdar2015_textdet_test = _base_.icdar2015_textdet_test
|
||||
icdar2015_textdet_test.pipeline = _base_.test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=ic15_det_train)
|
||||
dataset=icdar2015_textdet_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=ic15_det_test)
|
||||
dataset=icdar2015_textdet_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
_base_ = [
|
||||
'_base_dbnet_resnet18_fpnc.py',
|
||||
'../_base_/datasets/totaltext.py',
|
||||
'../_base_/default_runtime.py',
|
||||
'../_base_/schedules/schedule_sgd_1200e.py',
|
||||
]
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(type='FixInvalidPolygon', min_poly_points=4),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5),
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
|
||||
dict(type='RandomCrop', min_side_ratio=0.1),
|
||||
dict(type='Resize', scale=(640, 640), keep_ratio=True),
|
||||
dict(type='Pad', size=(640, 640)),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape'))
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1333, 736), keep_ratio=True),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(type='FixInvalidPolygon', min_poly_points=4),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
totaltext_textdet_train = _base_.totaltext_textdet_train
|
||||
totaltext_textdet_test = _base_.totaltext_textdet_test
|
||||
totaltext_textdet_train.pipeline = train_pipeline
|
||||
totaltext_textdet_test.pipeline = test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=16,
|
||||
pin_memory=True,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=totaltext_textdet_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
pin_memory=True,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=totaltext_textdet_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
auto_scale_lr = dict(base_batch_size=16)
|
|
@ -6,24 +6,24 @@ _base_ = [
|
|||
]
|
||||
|
||||
# dataset settings
|
||||
st_det_train = _base_.st_det_train
|
||||
st_det_train.pipeline = _base_.train_pipeline
|
||||
st_det_test = _base_.st_det_test
|
||||
st_det_test.pipeline = _base_.test_pipeline
|
||||
synthtext_textdet_train = _base_.synthtext_textdet_train
|
||||
synthtext_textdet_train.pipeline = _base_.train_pipeline
|
||||
synthtext_textdet_test = _base_.synthtext_textdet_test
|
||||
synthtext_textdet_test.pipeline = _base_.test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=st_det_train)
|
||||
dataset=synthtext_textdet_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=st_det_test)
|
||||
dataset=synthtext_textdet_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
|
|
|
@ -9,24 +9,24 @@ _base_ = [
|
|||
load_from = 'https://download.openmmlab.com/mmocr/textdet/dbnet/tmp_1.0_pretrain/dbnet_r50dcnv2_fpnc_sbn_2e_synthtext_20210325-ed322016.pth' # noqa
|
||||
|
||||
# dataset settings
|
||||
ic15_det_train = _base_.ic15_det_train
|
||||
ic15_det_train.pipeline = _base_.train_pipeline
|
||||
ic15_det_test = _base_.ic15_det_test
|
||||
ic15_det_test.pipeline = _base_.test_pipeline
|
||||
icdar2015_textdet_train = _base_.icdar2015_textdet_train
|
||||
icdar2015_textdet_train.pipeline = _base_.train_pipeline
|
||||
icdar2015_textdet_test = _base_.icdar2015_textdet_test
|
||||
icdar2015_textdet_test.pipeline = _base_.test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=ic15_det_train)
|
||||
dataset=icdar2015_textdet_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=ic15_det_test)
|
||||
dataset=icdar2015_textdet_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
_base_ = [
|
||||
'dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py',
|
||||
]
|
||||
|
||||
load_from = None
|
||||
|
||||
_base_.model.backbone = dict(
|
||||
type='CLIPResNet',
|
||||
init_cfg=dict(
|
||||
type='Pretrained',
|
||||
checkpoint='https://download.openmmlab.com/'
|
||||
'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
|
||||
|
||||
_base_.train_dataloader.num_workers = 24
|
||||
_base_.optim_wrapper.optimizer.lr = 0.002
|
||||
|
||||
param_scheduler = [
|
||||
dict(type='LinearLR', end=100, start_factor=0.001),
|
||||
dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=100, end=1200),
|
||||
]
|
|
@ -0,0 +1,24 @@
|
|||
_base_ = [
|
||||
'dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py',
|
||||
]
|
||||
|
||||
load_from = None
|
||||
|
||||
_base_.model.backbone = dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=True,
|
||||
style='pytorch',
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'))
|
||||
|
||||
_base_.train_dataloader.num_workers = 24
|
||||
_base_.optim_wrapper.optimizer.lr = 0.002
|
||||
|
||||
param_scheduler = [
|
||||
dict(type='LinearLR', end=100, start_factor=0.001),
|
||||
dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=100, end=1200),
|
||||
]
|
|
@ -16,6 +16,7 @@ Collections:
|
|||
|
||||
Models:
|
||||
- Name: dbnet_resnet18_fpnc_1200e_icdar2015
|
||||
Alias: DB_r18
|
||||
In Collection: DBNet
|
||||
Config: configs/textdet/dbnet/dbnet_resnet18_fpnc_1200e_icdar2015.py
|
||||
Metadata:
|
||||
|
@ -27,6 +28,18 @@ Models:
|
|||
hmean-iou: 0.8169
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_1200e_icdar2015/dbnet_resnet18_fpnc_1200e_icdar2015_20220825_221614-7c0e94f2.pth
|
||||
|
||||
- Name: dbnet_resnet50_fpnc_1200e_icdar2015
|
||||
In Collection: DBNet
|
||||
Config: configs/textdet/dbnet/dbnet_resnet50_fpnc_1200e_icdar2015.py
|
||||
Metadata:
|
||||
Training Data: ICDAR2015
|
||||
Results:
|
||||
- Task: Text Detection
|
||||
Dataset: ICDAR2015
|
||||
Metrics:
|
||||
hmean-iou: 0.8504
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50_1200e_icdar2015/dbnet_resnet50_1200e_icdar2015_20221102_115917-54f50589.pth
|
||||
|
||||
- Name: dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015
|
||||
In Collection: DBNet
|
||||
Config: configs/textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015.py
|
||||
|
@ -38,3 +51,30 @@ Models:
|
|||
Metrics:
|
||||
hmean-iou: 0.8543
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015/dbnet_resnet50-dcnv2_fpnc_1200e_icdar2015_20220828_124917-452c443c.pth
|
||||
|
||||
- Name: dbnet_resnet50-oclip_fpnc_1200e_icdar2015
|
||||
In Collection: DBNet
|
||||
Alias:
|
||||
- DB_r50
|
||||
- DBNet
|
||||
Config: configs/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015.py
|
||||
Metadata:
|
||||
Training Data: ICDAR2015
|
||||
Results:
|
||||
- Task: Text Detection
|
||||
Dataset: ICDAR2015
|
||||
Metrics:
|
||||
hmean-iou: 0.8644
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet50-oclip_1200e_icdar2015/dbnet_resnet50-oclip_1200e_icdar2015_20221102_115917-bde8c87a.pth
|
||||
|
||||
- Name: dbnet_resnet18_fpnc_1200e_totaltext
|
||||
In Collection: DBNet
|
||||
Config: configs/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext.py
|
||||
Metadata:
|
||||
Training Data: Totaltext
|
||||
Results:
|
||||
- Task: Text Detection
|
||||
Dataset: Totaltext
|
||||
Metrics:
|
||||
hmean-iou: 0.8182
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_1200e_totaltext/dbnet_resnet18_fpnc_1200e_totaltext-3ed3233c.pth
|
||||
|
|
|
@ -14,11 +14,19 @@ Recently, segmentation-based scene text detection methods have drawn extensive a
|
|||
|
||||
## Results and models
|
||||
|
||||
### SynthText
|
||||
|
||||
| Method | BackBone | Training set | #iters | Download |
|
||||
| :--------------------------------------------------------------------------------: | :------------: | :----------: | :-----: | :-----------------------------------------------------------------------------------: |
|
||||
| [DBNetpp_r50dcn](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py) | ResNet50-dcnv2 | SynthText | 100,000 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext-00f0a80b.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext/20221215_013531.log) |
|
||||
|
||||
### ICDAR2015
|
||||
|
||||
| Method | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :--------------------------------------: | :-------------------------------------------------: | :-------------: | :------------: | :-----: | :-------: | :-------: | :----: | :----: | :-----------------------------------------: |
|
||||
| [DBNetpp_r50dcn](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py) | [Synthtext](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py) ([model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/tmp_1.0_pretrain/dbnetpp_r50dcnv2_fpnc_100k_iter_synthtext-20220502-352fec8a.pth)) | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9116 | 0.8291 | 0.8684 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015_20220829_230108-f289bd20.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/20220829_230108.log) |
|
||||
| Method | BackBone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :----------------------------: | :------------------------------: | :--------------------------------------: | :-------------: | :------------: | :-----: | :-------: | :-------: | :----: | :----: | :------------------------------: |
|
||||
| [DBNetpp_r50](/configs/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015.py) | ResNet50 | - | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9079 | 0.8209 | 0.8622 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015/dbnetpp_resnet50_fpnc_1200e_icdar2015_20221025_185550-013730aa.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015/20221025_185550.log) |
|
||||
| [DBNetpp_r50dcn](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py) | ResNet50-dcnv2 | [Synthtext](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py) ([model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/tmp_1.0_pretrain/dbnetpp_r50dcnv2_fpnc_100k_iter_synthtext-20220502-352fec8a.pth)) | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9116 | 0.8291 | 0.8684 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015_20220829_230108-f289bd20.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/20220829_230108.log) |
|
||||
| [DBNetpp_r50-oclip](/configs/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015.py) | [ResNet50-oCLIP](https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth) | - | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9174 | 0.8609 | 0.8882 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015_20221101_124139-4ecb39ac.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015/20221101_124139.log) |
|
||||
|
||||
## Citation
|
||||
|
||||
|
|
|
@ -1,34 +1,44 @@
|
|||
_base_ = [
|
||||
'_base_dbnetpp_resnet50-dcnv2_fpnc.py',
|
||||
'../_base_/default_runtime.py',
|
||||
'../_base_/pretrain_runtime.py',
|
||||
'../_base_/datasets/synthtext.py',
|
||||
'../_base_/schedules/schedule_sgd_100k.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = [_base_.st_det_train]
|
||||
test_list = [_base_.st_det_test]
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_bbox=True,
|
||||
with_polygon=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(type='FixInvalidPolygon'),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5),
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
|
||||
dict(type='RandomCrop', min_side_ratio=0.1),
|
||||
dict(type='Resize', scale=(640, 640), keep_ratio=True),
|
||||
dict(type='Pad', size=(640, 640)),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape'))
|
||||
]
|
||||
|
||||
synthtext_textdet_train = _base_.synthtext_textdet_train
|
||||
synthtext_textdet_train.pipeline = train_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='ConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=_base_.train_pipeline))
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type='ConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=_base_.test_pipeline))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
dataset=synthtext_textdet_train)
|
||||
|
||||
auto_scale_lr = dict(base_batch_size=16)
|
||||
|
|
|
@ -8,8 +8,8 @@ _base_ = [
|
|||
load_from = 'https://download.openmmlab.com/mmocr/textdet/dbnetpp/tmp_1.0_pretrain/dbnetpp_r50dcnv2_fpnc_100k_iter_synthtext-20220502-352fec8a.pth' # noqa
|
||||
|
||||
# dataset settings
|
||||
train_list = [_base_.ic15_det_train]
|
||||
test_list = [_base_.ic15_det_test]
|
||||
train_list = [_base_.icdar2015_textdet_train]
|
||||
test_list = [_base_.icdar2015_textdet_test]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
_base_ = [
|
||||
'dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py',
|
||||
]
|
||||
|
||||
load_from = None
|
||||
|
||||
_base_.model.backbone = dict(
|
||||
type='CLIPResNet',
|
||||
init_cfg=dict(
|
||||
type='Pretrained',
|
||||
checkpoint='https://download.openmmlab.com/'
|
||||
'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
|
||||
|
||||
_base_.train_dataloader.num_workers = 24
|
||||
_base_.optim_wrapper.optimizer.lr = 0.002
|
||||
|
||||
param_scheduler = [
|
||||
dict(type='LinearLR', end=200, start_factor=0.001),
|
||||
dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=200, end=1200),
|
||||
]
|
|
@ -0,0 +1,24 @@
|
|||
_base_ = [
|
||||
'dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py',
|
||||
]
|
||||
|
||||
load_from = None
|
||||
|
||||
_base_.model.backbone = dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=True,
|
||||
style='pytorch',
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'))
|
||||
|
||||
_base_.train_dataloader.num_workers = 24
|
||||
_base_.optim_wrapper.optimizer.lr = 0.003
|
||||
|
||||
param_scheduler = [
|
||||
dict(type='LinearLR', end=200, start_factor=0.001),
|
||||
dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=200, end=1200),
|
||||
]
|
|
@ -15,6 +15,20 @@ Collections:
|
|||
README: configs/textdet/dbnetpp/README.md
|
||||
|
||||
Models:
|
||||
- Name: dbnetpp_resnet50_fpnc_1200e_icdar2015
|
||||
In Collection: DBNetpp
|
||||
Alias:
|
||||
- DBPP_r50
|
||||
Config: configs/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015.py
|
||||
Metadata:
|
||||
Training Data: ICDAR2015
|
||||
Results:
|
||||
- Task: Text Detection
|
||||
Dataset: ICDAR2015
|
||||
Metrics:
|
||||
hmean-iou: 0.8622
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015/dbnetpp_resnet50_fpnc_1200e_icdar2015_20221025_185550-013730aa.pth
|
||||
|
||||
- Name: dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015
|
||||
In Collection: DBNetpp
|
||||
Config: configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py
|
||||
|
@ -26,3 +40,17 @@ Models:
|
|||
Metrics:
|
||||
hmean-iou: 0.8684
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015_20220829_230108-f289bd20.pth
|
||||
|
||||
- Name: dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015
|
||||
Alias:
|
||||
- DBNetpp
|
||||
In Collection: DBNetpp
|
||||
Config: configs/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015.py
|
||||
Metadata:
|
||||
Training Data: ICDAR2015
|
||||
Results:
|
||||
- Task: Text Detection
|
||||
Dataset: ICDAR2015
|
||||
Metrics:
|
||||
hmean-iou: 0.8882
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015_20221101_124139-4ecb39ac.pth
|
||||
|
|
|
@ -16,9 +16,10 @@ Arbitrary shape text detection is a challenging task due to the high variety and
|
|||
|
||||
### CTW1500
|
||||
|
||||
| Method | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :----------------------------------------------------------: | :--------------: | :-----------: | :----------: | :-----: | :-------: | :-------: | :----: | :----: | :------------------------------------------------------------: |
|
||||
| [DRRG](/configs/textdet/drrg/drrg_resnet50_fpn-unet_1200e_ctw1500.py) | ImageNet | CTW1500 Train | CTW1500 Test | 1200 | 640 | 0.8775 | 0.8179 | 0.8467 | [model](https://download.openmmlab.com/mmocr/textdet/drrg/drrg_resnet50_fpn-unet_1200e_ctw1500/drrg_resnet50_fpn-unet_1200e_ctw1500_20220827_105233-d5c702dd.pth) \\ [log](https://download.openmmlab.com/mmocr/textdet/drrg/drrg_resnet50_fpn-unet_1200e_ctw1500/20220827_105233.log) |
|
||||
| Method | BackBone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :-------------------------------------: | :---------------------------------------: | :--------------: | :-----------: | :----------: | :-----: | :-------: | :-------: | :----: | :----: | :----------------------------------------: |
|
||||
| [DRRG](/configs/textdet/drrg/drrg_resnet50_fpn-unet_1200e_ctw1500.py) | ResNet50 | - | CTW1500 Train | CTW1500 Test | 1200 | 640 | 0.8775 | 0.8179 | 0.8467 | [model](https://download.openmmlab.com/mmocr/textdet/drrg/drrg_resnet50_fpn-unet_1200e_ctw1500/drrg_resnet50_fpn-unet_1200e_ctw1500_20220827_105233-d5c702dd.pth) \\ [log](https://download.openmmlab.com/mmocr/textdet/drrg/drrg_resnet50_fpn-unet_1200e_ctw1500/20220827_105233.log) |
|
||||
| [DRRG_r50-oclip](/configs/textdet/drrg/drrg_resnet50-oclip_fpn-unet_1200e_ctw1500.py) | [ResNet50-oCLIP](https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth) | - | CTW1500 Train | CTW1500 Test | 1200 | | | | | [model](<>) \\ [log](<>) |
|
||||
|
||||
## Citation
|
||||
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
file_client_args = dict(backend='disk')
|
||||
|
||||
model = dict(
|
||||
type='DRRG',
|
||||
backbone=dict(
|
||||
|
@ -29,10 +27,7 @@ model = dict(
|
|||
pad_size_divisor=32))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_bbox=True,
|
||||
|
@ -82,10 +77,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1024, 640), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
_base_ = [
|
||||
'drrg_resnet50_fpn-unet_1200e_ctw1500.py',
|
||||
]
|
||||
|
||||
load_from = None
|
||||
|
||||
_base_.model.backbone = dict(
|
||||
type='CLIPResNet',
|
||||
init_cfg=dict(
|
||||
type='Pretrained',
|
||||
checkpoint='https://download.openmmlab.com/'
|
||||
'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
|
||||
|
||||
param_scheduler = [
|
||||
dict(type='LinearLR', end=100, start_factor=0.001),
|
||||
dict(type='PolyLR', power=0.9, eta_min=1e-7, begin=100, end=1200),
|
||||
]
|
|
@ -6,24 +6,24 @@ _base_ = [
|
|||
]
|
||||
|
||||
# dataset settings
|
||||
ctw_det_train = _base_.ctw_det_train
|
||||
ctw_det_train.pipeline = _base_.train_pipeline
|
||||
ctw_det_test = _base_.ctw_det_test
|
||||
ctw_det_test.pipeline = _base_.test_pipeline
|
||||
ctw1500_textdet_train = _base_.ctw1500_textdet_train
|
||||
ctw1500_textdet_train.pipeline = _base_.train_pipeline
|
||||
ctw1500_textdet_test = _base_.ctw1500_textdet_test
|
||||
ctw1500_textdet_test.pipeline = _base_.test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=ctw_det_train)
|
||||
dataset=ctw1500_textdet_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=ctw_det_test)
|
||||
dataset=ctw1500_textdet_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
|
|
|
@ -15,6 +15,7 @@ Collections:
|
|||
|
||||
Models:
|
||||
- Name: drrg_resnet50_fpn-unet_1200e_ctw1500
|
||||
Alias: DRRG
|
||||
In Collection: DRRG
|
||||
Config: configs/textdet/drrg/drrg_resnet50_fpn-unet_1200e_ctw1500.py
|
||||
Metadata:
|
||||
|
|
|
@ -16,15 +16,23 @@ One of the main challenges for arbitrary-shaped text detection is to design a go
|
|||
|
||||
### CTW1500
|
||||
|
||||
| Method | Backbone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :-------------------------------------------------: | :--------------: | :--------------: | :-----------: | :----------: | :-----: | :---------: | :-------: | :----: | :----: | :---------------------------------------------------: |
|
||||
| [FCENet](/configs/textdet/fcenet/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500.py) | ResNet50 + DCNv2 | ImageNet | CTW1500 Train | CTW1500 Test | 1500 | (736, 1080) | 0.8689 | 0.8296 | 0.8488 | [model](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500_20220825_221510-4d705392.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500/20220825_221510.log) |
|
||||
| Method | Backbone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :------------------------------------: | :---------------------------------------: | :--------------: | :-----------: | :----------: | :-----: | :---------: | :-------: | :----: | :----: | :---------------------------------------: |
|
||||
| [FCENet_r50dcn](/configs/textdet/fcenet/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500.py) | ResNet50 + DCNv2 | - | CTW1500 Train | CTW1500 Test | 1500 | (736, 1080) | 0.8689 | 0.8296 | 0.8488 | [model](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500_20220825_221510-4d705392.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500/20220825_221510.log) |
|
||||
| [FCENet_r50-oclip](/configs/textdet/fcenet/fcenet_resnet50-oclip-dcnv2_fpn_1500e_ctw1500.py) | [ResNet50-oCLIP](https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth) | - | CTW1500 Train | CTW1500 Test | 1500 | (736, 1080) | 0.8383 | 0.801 | 0.8192 | [model](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_ctw1500/fcenet_resnet50-oclip_fpn_1500e_ctw1500_20221102_121909-101df7e6.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_ctw1500/20221102_121909.log) |
|
||||
|
||||
### ICDAR2015
|
||||
|
||||
| Method | Backbone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :------------------------------------------------------: | :------: | :--------------: | :----------: | :-------: | :-----: | :----------: | :-------: | :----: | :----: | :---------------------------------------------------------: |
|
||||
| [FCENet](/configs/textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015.py) | ResNet50 | ImageNet | IC15 Train | IC15 Test | 1500 | (2260, 2260) | 0.8243 | 0.8834 | 0.8528 | [model](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015/fcenet_resnet50_fpn_1500e_icdar2015_20220826_140941-167d9042.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015/20220826_140941.log) |
|
||||
| Method | Backbone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :---------------------------------------------------: | :------------: | :--------------: | :----------: | :-------: | :-----: | :----------: | :-------: | :----: | :----: | :------------------------------------------------------: |
|
||||
| [FCENet_r50](/configs/textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015.py) | ResNet50 | - | IC15 Train | IC15 Test | 1500 | (2260, 2260) | 0.8243 | 0.8834 | 0.8528 | [model](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015/fcenet_resnet50_fpn_1500e_icdar2015_20220826_140941-167d9042.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015/20220826_140941.log) |
|
||||
| [FCENet_r50-oclip](/configs/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_icdar2015.py) | ResNet50-oCLIP | - | IC15 Train | IC15 Test | 1500 | (2260, 2260) | 0.9176 | 0.8098 | 0.8604 | [model](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_icdar2015/fcenet_resnet50-oclip_fpn_1500e_icdar2015_20221101_150145-5a6fc412.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_icdar2015/20221101_150145.log) |
|
||||
|
||||
### Total Text
|
||||
|
||||
| Method | Backbone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :---------------------------------------------------: | :------: | :--------------: | :-------------: | :------------: | :-----: | :---------: | :-------: | :----: | :----: | :-----------------------------------------------------: |
|
||||
| [FCENet_r50](/configs/textdet/fcenet/fcenet_resnet50_fpn_1500e_totaltext.py) | ResNet50 | - | Totaltext Train | Totaltext Test | 1500 | (1280, 960) | 0.8485 | 0.7810 | 0.8134 | [model](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50_fpn_1500e_totaltext/fcenet_resnet50_fpn_1500e_totaltext-91bd37af.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50_fpn_1500e_totaltext/20221219_201107.log) |
|
||||
|
||||
## Citation
|
||||
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
file_client_args = dict(backend='disk')
|
||||
|
||||
model = dict(
|
||||
type='FCENet',
|
||||
backbone=dict(
|
||||
|
@ -41,10 +39,7 @@ model = dict(
|
|||
pad_size_divisor=32))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
|
@ -96,10 +91,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(2260, 2260), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
|
|
|
@ -12,17 +12,13 @@ param_scheduler = [
|
|||
dict(type='PolyLR', power=0.9, eta_min=1e-7, end=1500),
|
||||
]
|
||||
|
||||
file_client_args = dict(backend='disk')
|
||||
# dataset settings
|
||||
ctw_det_train = _base_.ctw_det_train
|
||||
ctw_det_test = _base_.ctw_det_test
|
||||
ctw1500_textdet_train = _base_.ctw1500_textdet_train
|
||||
ctw1500_textdet_test = _base_.ctw1500_textdet_test
|
||||
|
||||
# test pipeline for CTW1500
|
||||
ctw_test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1080, 736), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
|
@ -36,22 +32,22 @@ ctw_test_pipeline = [
|
|||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
ctw_det_train.pipeline = _base_.train_pipeline
|
||||
ctw_det_test.pipeline = ctw_test_pipeline
|
||||
ctw1500_textdet_train.pipeline = _base_.train_pipeline
|
||||
ctw1500_textdet_test.pipeline = ctw_test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=8,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=ctw_det_train)
|
||||
dataset=ctw1500_textdet_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=ctw_det_test)
|
||||
dataset=ctw1500_textdet_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
_base_ = [
|
||||
'fcenet_resnet50-dcnv2_fpn_1500e_ctw1500.py',
|
||||
]
|
||||
|
||||
load_from = None
|
||||
|
||||
_base_.model.backbone = dict(
|
||||
type='CLIPResNet',
|
||||
out_indices=(1, 2, 3),
|
||||
init_cfg=dict(
|
||||
type='Pretrained',
|
||||
checkpoint='https://download.openmmlab.com/'
|
||||
'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
|
||||
|
||||
_base_.train_dataloader.num_workers = 24
|
||||
_base_.optim_wrapper.optimizer.lr = 0.0005
|
|
@ -0,0 +1,16 @@
|
|||
_base_ = [
|
||||
'fcenet_resnet50_fpn_1500e_icdar2015.py',
|
||||
]
|
||||
load_from = None
|
||||
|
||||
_base_.model.backbone = dict(
|
||||
type='CLIPResNet',
|
||||
out_indices=(1, 2, 3),
|
||||
init_cfg=dict(
|
||||
type='Pretrained',
|
||||
checkpoint='https://download.openmmlab.com/'
|
||||
'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
|
||||
|
||||
_base_.train_dataloader.batch_size = 16
|
||||
_base_.train_dataloader.num_workers = 24
|
||||
_base_.optim_wrapper.optimizer.lr = 0.0005
|
|
@ -13,24 +13,24 @@ param_scheduler = [
|
|||
]
|
||||
|
||||
# dataset settings
|
||||
ic15_det_train = _base_.ic15_det_train
|
||||
ic15_det_test = _base_.ic15_det_test
|
||||
ic15_det_train.pipeline = _base_.train_pipeline
|
||||
ic15_det_test.pipeline = _base_.test_pipeline
|
||||
icdar2015_textdet_train = _base_.icdar2015_textdet_train
|
||||
icdar2015_textdet_test = _base_.icdar2015_textdet_test
|
||||
icdar2015_textdet_train.pipeline = _base_.train_pipeline
|
||||
icdar2015_textdet_test.pipeline = _base_.test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=8,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=ic15_det_train)
|
||||
dataset=icdar2015_textdet_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=ic15_det_test)
|
||||
dataset=icdar2015_textdet_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
_base_ = [
|
||||
'_base_fcenet_resnet50_fpn.py',
|
||||
'../_base_/datasets/totaltext.py',
|
||||
'../_base_/default_runtime.py',
|
||||
'../_base_/schedules/schedule_sgd_base.py',
|
||||
]
|
||||
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook',
|
||||
save_best='icdar/hmean',
|
||||
rule='greater',
|
||||
_delete_=True))
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(type='FixInvalidPolygon'),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(800, 800),
|
||||
ratio_range=(0.75, 2.5),
|
||||
keep_ratio=True),
|
||||
dict(
|
||||
type='TextDetRandomCropFlip',
|
||||
crop_ratio=0.5,
|
||||
iter_num=1,
|
||||
min_area_ratio=0.2),
|
||||
dict(
|
||||
type='RandomApply',
|
||||
transforms=[dict(type='RandomCrop', min_side_ratio=0.3)],
|
||||
prob=0.8),
|
||||
dict(
|
||||
type='RandomApply',
|
||||
transforms=[
|
||||
dict(
|
||||
type='RandomRotate',
|
||||
max_angle=30,
|
||||
pad_with_fixed_color=False,
|
||||
use_canvas=True)
|
||||
],
|
||||
prob=0.5),
|
||||
dict(
|
||||
type='RandomChoice',
|
||||
transforms=[[
|
||||
dict(type='Resize', scale=800, keep_ratio=True),
|
||||
dict(type='SourceImagePad', target_scale=800)
|
||||
],
|
||||
dict(type='Resize', scale=800, keep_ratio=False)],
|
||||
prob=[0.6, 0.4]),
|
||||
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5,
|
||||
contrast=0.5),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1280, 960), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True),
|
||||
dict(type='FixInvalidPolygon'),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
optim_wrapper = dict(optimizer=dict(lr=1e-3, weight_decay=5e-4))
|
||||
train_cfg = dict(max_epochs=1500)
|
||||
# learning policy
|
||||
param_scheduler = [
|
||||
dict(type='StepLR', gamma=0.8, step_size=200, end=1200),
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
totaltext_textdet_train = _base_.totaltext_textdet_train
|
||||
totaltext_textdet_test = _base_.totaltext_textdet_test
|
||||
totaltext_textdet_train.pipeline = train_pipeline
|
||||
totaltext_textdet_test.pipeline = test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=16,
|
||||
persistent_workers=True,
|
||||
pin_memory=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=totaltext_textdet_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
pin_memory=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=totaltext_textdet_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
auto_scale_lr = dict(base_batch_size=16)
|
||||
|
||||
find_unused_parameters = True
|
|
@ -16,6 +16,7 @@ Collections:
|
|||
|
||||
Models:
|
||||
- Name: fcenet_resnet50-dcnv2_fpn_1500e_ctw1500
|
||||
Alias: FCE_CTW_DCNv2
|
||||
In Collection: FCENet
|
||||
Config: configs/textdet/fcenet/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500.py
|
||||
Metadata:
|
||||
|
@ -26,7 +27,21 @@ Models:
|
|||
Metrics:
|
||||
hmean-iou: 0.8488
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500_20220825_221510-4d705392.pth
|
||||
|
||||
- Name: fcenet_resnet50-oclip_fpn_1500e_ctw1500
|
||||
In Collection: FCENet
|
||||
Config: configs/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_ctw1500.py
|
||||
Metadata:
|
||||
Training Data: CTW1500
|
||||
Results:
|
||||
- Task: Text Detection
|
||||
Dataset: CTW1500
|
||||
Metrics:
|
||||
hmean-iou: 0.8192
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_ctw1500/fcenet_resnet50-oclip_fpn_1500e_ctw1500_20221102_121909-101df7e6.pth
|
||||
|
||||
- Name: fcenet_resnet50_fpn_1500e_icdar2015
|
||||
Alias: FCE_IC15
|
||||
In Collection: FCENet
|
||||
Config: configs/textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015.py
|
||||
Metadata:
|
||||
|
@ -37,3 +52,28 @@ Models:
|
|||
Metrics:
|
||||
hmean-iou: 0.8528
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015/fcenet_resnet50_fpn_1500e_icdar2015_20220826_140941-167d9042.pth
|
||||
|
||||
- Name: fcenet_resnet50-oclip_fpn_1500e_icdar2015
|
||||
Alias: FCENet
|
||||
In Collection: FCENet
|
||||
Config: configs/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_icdar2015.py
|
||||
Metadata:
|
||||
Training Data: ICDAR2015
|
||||
Results:
|
||||
- Task: Text Detection
|
||||
Dataset: ICDAR2015
|
||||
Metrics:
|
||||
hmean-iou: 0.8604
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50-oclip_fpn_1500e_icdar2015/fcenet_resnet50-oclip_fpn_1500e_icdar2015_20221101_150145-5a6fc412.pth
|
||||
|
||||
- Name: fcenet_resnet50_fpn_1500e_totaltext
|
||||
In Collection: FCENet
|
||||
Config: configs/textdet/fcenet/fcenet_resnet50_fpn_1500e_totaltext.py
|
||||
Metadata:
|
||||
Training Data: Totaltext
|
||||
Results:
|
||||
- Task: Text Detection
|
||||
Dataset: Totaltext
|
||||
Metrics:
|
||||
hmean-iou: 0.8134
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_resnet50_fpn_1500e_totaltext/fcenet_resnet50_fpn_1500e_totaltext-91bd37af.pth
|
||||
|
|
|
@ -16,15 +16,17 @@ We present a conceptually simple, flexible, and general framework for object ins
|
|||
|
||||
### CTW1500
|
||||
|
||||
| Method | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :----------------------------------------------------------: | :--------------: | :-----------: | :----------: | :-----: | :-------: | :-------: | :----: | :----: | :------------------------------------------------------------: |
|
||||
| [MaskRCNN](/configs/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_ctw1500.py) | ImageNet | CTW1500 Train | CTW1500 Test | 160 | 1600 | 0.7165 | 0.7776 | 0.7458 | [model](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_ctw1500/mask-rcnn_resnet50_fpn_160e_ctw1500_20220826_154755-ce68ee8e.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_ctw1500/20220826_154755.log) |
|
||||
| Method | BackBone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :-------------------------------------: | :---------------------------------------: | :--------------: | :-----------: | :----------: | :-----: | :-------: | :-------: | :----: | :----: | :----------------------------------------: |
|
||||
| [MaskRCNN](/configs/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_ctw1500.py) | - | - | CTW1500 Train | CTW1500 Test | 160 | 1600 | 0.7165 | 0.7776 | 0.7458 | [model](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_ctw1500/mask-rcnn_resnet50_fpn_160e_ctw1500_20220826_154755-ce68ee8e.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_ctw1500/20220826_154755.log) |
|
||||
| [MaskRCNN_r50-oclip](/configs/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_ctw1500.py) | [ResNet50-oCLIP](https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth) | - | CTW1500 Train | CTW1500 Test | 160 | 1600 | 0.753 | 0.7593 | 0.7562 | [model](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_ctw1500/mask-rcnn_resnet50-oclip_fpn_160e_ctw1500_20221101_154448-6e9e991c.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_ctw1500/20221101_154448.log) |
|
||||
|
||||
### ICDAR2015
|
||||
|
||||
| Method | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :--------------------------------------------------------: | :--------------: | :-------------: | :------------: | :-----: | :-------: | :-------: | :----: | :----: | :----------------------------------------------------------: |
|
||||
| [MaskRCNN](/configs/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015.py) | ImageNet | ICDAR2015 Train | ICDAR2015 Test | 160 | 1920 | 0.8644 | 0.7766 | 0.8182 | [model](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015/mask-rcnn_resnet50_fpn_160e_icdar2015_20220826_154808-ff5c30bf.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015/20220826_154808.log) |
|
||||
| Method | BackBone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :------------------------------------: | :--------------------------------------: | :--------------: | :-------------: | :------------: | :-----: | :-------: | :-------: | :----: | :----: | :--------------------------------------: |
|
||||
| [MaskRCNN](/configs/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015.py) | ResNet50 | - | ICDAR2015 Train | ICDAR2015 Test | 160 | 1920 | 0.8644 | 0.7766 | 0.8182 | [model](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015/mask-rcnn_resnet50_fpn_160e_icdar2015_20220826_154808-ff5c30bf.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015/20220826_154808.log) |
|
||||
| [MaskRCNN_r50-oclip](/configs/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_icdar2015.py) | [ResNet50-oCLIP](https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth) | - | ICDAR2015 Train | ICDAR2015 Test | 160 | 1920 | 0.8695 | 0.8339 | 0.8513 | [model](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_icdar2015/mask-rcnn_resnet50-oclip_fpn_160e_icdar2015_20221101_131357-a19f7802.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_icdar2015/20221101_131357.log) |
|
||||
|
||||
## Citation
|
||||
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
_base_ = ['mmdet::_base_/models/mask-rcnn_r50_fpn.py']
|
||||
|
||||
file_client_args = dict(backend='disk')
|
||||
|
||||
mask_rcnn = _base_.pop('model')
|
||||
# Adapt Mask R-CNN model to OCR task
|
||||
mask_rcnn.update(
|
||||
|
@ -18,10 +16,7 @@ mask_rcnn.update(
|
|||
model = dict(type='MMDetWrapper', text_repr_type='poly', cfg=mask_rcnn)
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
|
@ -49,10 +44,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1920, 1920), keep_ratio=True),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
_base_ = [
|
||||
'mask-rcnn_resnet50_fpn_160e_ctw1500.py',
|
||||
]
|
||||
|
||||
load_from = None
|
||||
|
||||
_base_.model.cfg.backbone = dict(
|
||||
_scope_='mmocr',
|
||||
type='CLIPResNet',
|
||||
init_cfg=dict(
|
||||
type='Pretrained',
|
||||
checkpoint='https://download.openmmlab.com/'
|
||||
'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
|
||||
|
||||
_base_.optim_wrapper.optimizer.lr = 0.02
|
|
@ -0,0 +1,15 @@
|
|||
_base_ = [
|
||||
'mask-rcnn_resnet50_fpn_160e_icdar2015.py',
|
||||
]
|
||||
|
||||
load_from = None
|
||||
|
||||
_base_.model.cfg.backbone = dict(
|
||||
_scope_='mmocr',
|
||||
type='CLIPResNet',
|
||||
init_cfg=dict(
|
||||
type='Pretrained',
|
||||
checkpoint='https://download.openmmlab.com/'
|
||||
'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
|
||||
|
||||
_base_.optim_wrapper.optimizer.lr = 0.02
|
|
@ -15,15 +15,12 @@ param_scheduler = [
|
|||
]
|
||||
|
||||
# dataset settings
|
||||
ctw_det_train = _base_.ctw_det_train
|
||||
ctw_det_test = _base_.ctw_det_test
|
||||
ctw1500_textdet_train = _base_.ctw1500_textdet_train
|
||||
ctw1500_textdet_test = _base_.ctw1500_textdet_test
|
||||
|
||||
# test pipeline for CTW1500
|
||||
ctw_test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=dict(backend='disk'),
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1600, 1600), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
|
@ -37,22 +34,22 @@ ctw_test_pipeline = [
|
|||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
ctw_det_train.pipeline = _base_.train_pipeline
|
||||
ctw_det_test.pipeline = ctw_test_pipeline
|
||||
ctw1500_textdet_train.pipeline = _base_.train_pipeline
|
||||
ctw1500_textdet_test.pipeline = ctw_test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=8,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=ctw_det_train)
|
||||
dataset=ctw1500_textdet_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=ctw_det_test)
|
||||
dataset=ctw1500_textdet_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
|
|
|
@ -15,24 +15,24 @@ param_scheduler = [
|
|||
]
|
||||
|
||||
# dataset settings
|
||||
ic15_det_train = _base_.ic15_det_train
|
||||
ic15_det_test = _base_.ic15_det_test
|
||||
ic15_det_train.pipeline = _base_.train_pipeline
|
||||
ic15_det_test.pipeline = _base_.test_pipeline
|
||||
icdar2015_textdet_train = _base_.icdar2015_textdet_train
|
||||
icdar2015_textdet_test = _base_.icdar2015_textdet_test
|
||||
icdar2015_textdet_train.pipeline = _base_.train_pipeline
|
||||
icdar2015_textdet_test.pipeline = _base_.test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=8,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=ic15_det_train)
|
||||
dataset=icdar2015_textdet_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=ic15_det_test)
|
||||
dataset=icdar2015_textdet_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
|
|
|
@ -3,12 +3,12 @@ _base_ = [
|
|||
'../_base_/datasets/icdar2017.py',
|
||||
]
|
||||
|
||||
ic17_det_train = _base_.ic17_det_train
|
||||
ic17_det_test = _base_.ic17_det_test
|
||||
icdar2017_textdet_train = _base_.icdar2017_textdet_train
|
||||
icdar2017_textdet_test = _base_.icdar2017_textdet_test
|
||||
# use the same pipeline as icdar2015
|
||||
ic17_det_train.pipeline = _base_.train_pipeline
|
||||
ic17_det_test.pipeline = _base_.test_pipeline
|
||||
icdar2017_textdet_train.pipeline = _base_.train_pipeline
|
||||
icdar2017_textdet_test.pipeline = _base_.test_pipeline
|
||||
|
||||
train_dataloader = dict(dataset=ic17_det_train)
|
||||
val_dataloader = dict(dataset=ic17_det_test)
|
||||
train_dataloader = dict(dataset=icdar2017_textdet_train)
|
||||
val_dataloader = dict(dataset=icdar2017_textdet_test)
|
||||
test_dataloader = val_dataloader
|
||||
|
|
|
@ -18,6 +18,7 @@ Collections:
|
|||
Models:
|
||||
- Name: mask-rcnn_resnet50_fpn_160e_ctw1500
|
||||
In Collection: Mask R-CNN
|
||||
Alias: MaskRCNN_CTW
|
||||
Config: configs/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_ctw1500.py
|
||||
Metadata:
|
||||
Training Data: CTW1500
|
||||
|
@ -25,11 +26,24 @@ Models:
|
|||
- Task: Text Detection
|
||||
Dataset: CTW1500
|
||||
Metrics:
|
||||
hmean: 0.7458
|
||||
hmean-iou: 0.7458
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_ctw1500/mask-rcnn_resnet50_fpn_160e_ctw1500_20220826_154755-ce68ee8e.pth
|
||||
|
||||
- Name: mask-rcnn_resnet50-oclip_fpn_160e_ctw1500
|
||||
In Collection: Mask R-CNN
|
||||
Config: configs/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_ctw1500.py
|
||||
Metadata:
|
||||
Training Data: CTW1500
|
||||
Results:
|
||||
- Task: Text Detection
|
||||
Dataset: CTW1500
|
||||
Metrics:
|
||||
hmean-iou: 0.7562
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_ctw1500/mask-rcnn_resnet50-oclip_fpn_160e_ctw1500_20221101_154448-6e9e991c.pth
|
||||
|
||||
- Name: mask-rcnn_resnet50_fpn_160e_icdar2015
|
||||
In Collection: Mask R-CNN
|
||||
Alias: MaskRCNN_IC15
|
||||
Config: configs/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015.py
|
||||
Metadata:
|
||||
Training Data: ICDAR2015
|
||||
|
@ -37,5 +51,18 @@ Models:
|
|||
- Task: Text Detection
|
||||
Dataset: ICDAR2015
|
||||
Metrics:
|
||||
hmean: 0.8182
|
||||
hmean-iou: 0.8182
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015/mask-rcnn_resnet50_fpn_160e_icdar2015_20220826_154808-ff5c30bf.pth
|
||||
|
||||
- Name: mask-rcnn_resnet50-oclip_fpn_160e_icdar2015
|
||||
In Collection: Mask R-CNN
|
||||
Alias: MaskRCNN
|
||||
Config: configs/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_icdar2015.py
|
||||
Metadata:
|
||||
Training Data: ICDAR2015
|
||||
Results:
|
||||
- Task: Text Detection
|
||||
Dataset: ICDAR2015
|
||||
Metrics:
|
||||
hmean-iou: 0.8513
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask-rcnn_resnet50-oclip_fpn_160e_icdar2015/mask-rcnn_resnet50-oclip_fpn_160e_icdar2015_20221101_131357-a19f7802.pth
|
||||
|
|
|
@ -32,12 +32,8 @@ model = dict(
|
|||
),
|
||||
postprocessor=dict(type='PANPostprocessor', text_repr_type='quad')))
|
||||
|
||||
file_client_args = dict(backend='disk')
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
|
@ -60,10 +56,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
# TODO Replace with mmcv.RescaleToShort when it's ready
|
||||
dict(
|
||||
type='ShortScaleAspectJitter',
|
||||
|
|
|
@ -15,6 +15,7 @@ Collections:
|
|||
|
||||
Models:
|
||||
- Name: panet_resnet18_fpem-ffm_600e_ctw1500
|
||||
Alias: PANet_CTW
|
||||
In Collection: PANet
|
||||
Config: configs/textdet/panet/panet_resnet18_fpem-ffm_600e_ctw1500.py
|
||||
Metadata:
|
||||
|
@ -27,6 +28,7 @@ Models:
|
|||
Weights: https://download.openmmlab.com/mmocr/textdet/panet/panet_resnet18_fpem-ffm_600e_ctw1500/panet_resnet18_fpem-ffm_600e_ctw1500_20220826_144818-980f32d0.pth
|
||||
|
||||
- Name: panet_resnet18_fpem-ffm_600e_icdar2015
|
||||
Alias: PANet_IC15
|
||||
In Collection: PANet
|
||||
Config: configs/textdet/panet/panet_resnet18_fpem-ffm_600e_icdar2015.py
|
||||
Metadata:
|
||||
|
|
|
@ -9,12 +9,8 @@ model = dict(det_head=dict(module_loss=dict(shrink_ratio=(1, 0.7))))
|
|||
|
||||
default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=20), )
|
||||
|
||||
file_client_args = dict(backend='disk')
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
|
@ -37,10 +33,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
# TODO Replace with mmcv.RescaleToShort when it's ready
|
||||
dict(
|
||||
type='ShortScaleAspectJitter',
|
||||
|
@ -59,24 +52,24 @@ test_pipeline = [
|
|||
]
|
||||
|
||||
# dataset settings
|
||||
ctw_det_train = _base_.ctw_det_train
|
||||
ctw_det_test = _base_.ctw_det_test
|
||||
ctw1500_textdet_train = _base_.ctw1500_textdet_train
|
||||
ctw1500_textdet_test = _base_.ctw1500_textdet_test
|
||||
# pipeline settings
|
||||
ctw_det_train.pipeline = train_pipeline
|
||||
ctw_det_test.pipeline = test_pipeline
|
||||
ctw1500_textdet_train.pipeline = train_pipeline
|
||||
ctw1500_textdet_test.pipeline = test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=ctw_det_train)
|
||||
dataset=ctw1500_textdet_train)
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=ctw_det_test)
|
||||
dataset=ctw1500_textdet_test)
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(
|
||||
|
|
|
@ -8,24 +8,24 @@ _base_ = [
|
|||
default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=20), )
|
||||
|
||||
# dataset settings
|
||||
ic15_det_train = _base_.ic15_det_train
|
||||
ic15_det_test = _base_.ic15_det_test
|
||||
icdar2015_textdet_train = _base_.icdar2015_textdet_train
|
||||
icdar2015_textdet_test = _base_.icdar2015_textdet_test
|
||||
# pipeline settings
|
||||
ic15_det_train.pipeline = _base_.train_pipeline
|
||||
ic15_det_test.pipeline = _base_.test_pipeline
|
||||
icdar2015_textdet_train.pipeline = _base_.train_pipeline
|
||||
icdar2015_textdet_test.pipeline = _base_.test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=64,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=ic15_det_train)
|
||||
dataset=icdar2015_textdet_train)
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=ic15_det_test)
|
||||
dataset=icdar2015_textdet_test)
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(
|
||||
|
|
|
@ -7,12 +7,8 @@ _base_ = [
|
|||
|
||||
default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=20), )
|
||||
|
||||
file_client_args = dict(backend='disk')
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
|
@ -35,10 +31,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
# TODO Replace with mmcv.RescaleToShort when it's ready
|
||||
dict(
|
||||
type='ShortScaleAspectJitter',
|
||||
|
@ -55,23 +48,23 @@ test_pipeline = [
|
|||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
ic17_det_train = _base_.ic17_det_train
|
||||
ic17_det_test = _base_.ic17_det_test
|
||||
icdar2017_textdet_train = _base_.icdar2017_textdet_train
|
||||
icdar2017_textdet_test = _base_.icdar2017_textdet_test
|
||||
# pipeline settings
|
||||
ic17_det_train.pipeline = train_pipeline
|
||||
ic17_det_test.pipeline = test_pipeline
|
||||
icdar2017_textdet_train.pipeline = train_pipeline
|
||||
icdar2017_textdet_test.pipeline = test_pipeline
|
||||
train_dataloader = dict(
|
||||
batch_size=64,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=ic17_det_train)
|
||||
dataset=icdar2017_textdet_train)
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=ic17_det_test)
|
||||
dataset=icdar2017_textdet_test)
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(
|
||||
|
|
|
@ -16,15 +16,17 @@ Scene text detection has witnessed rapid progress especially with the recent dev
|
|||
|
||||
### CTW1500
|
||||
|
||||
| Method | Backbone | Extra Data | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :---------------------------------------------------------: | :------: | :--------: | :-----------: | :----------: | :-----: | :-------: | :-------: | :----: | :----: | :-----------------------------------------------------------: |
|
||||
| [PSENet](/configs/textdet/psenet/psenet_resnet50_fpnf_600e_ctw1500.py) | ResNet50 | - | CTW1500 Train | CTW1500 Test | 600 | 1280 | 0.7705 | 0.7883 | 0.7793 | [model](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50_fpnf_600e_ctw1500/psenet_resnet50_fpnf_600e_ctw1500_20220825_221459-7f974ac8.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50_fpnf_600e_ctw1500/20220825_221459.log) |
|
||||
| Method | Backbone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :-------------------------------------: | :---------------------------------------: | :--------------: | :-----------: | :----------: | :-----: | :-------: | :-------: | :----: | :----: | :----------------------------------------: |
|
||||
| [PSENet](/configs/textdet/psenet/psenet_resnet50_fpnf_600e_ctw1500.py) | ResNet50 | - | CTW1500 Train | CTW1500 Test | 600 | 1280 | 0.7705 | 0.7883 | 0.7793 | [model](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50_fpnf_600e_ctw1500/psenet_resnet50_fpnf_600e_ctw1500_20220825_221459-7f974ac8.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50_fpnf_600e_ctw1500/20220825_221459.log) |
|
||||
| [PSENet_r50-oclip](/configs/textdet/psenet/psenet_resnet50-oclip_fpnf_600e_ctw1500.py) | [ResNet50-oCLIP](https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth) | - | CTW1500 Train | CTW1500 Test | 600 | 1280 | 0.8483 | 0.7636 | 0.8037 | [model](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50-oclip_fpnf_600e_ctw1500/psenet_resnet50-oclip_fpnf_600e_ctw1500_20221101_140406-d431710d.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50-oclip_fpnf_600e_ctw1500/20221101_140406.log) |
|
||||
|
||||
### ICDAR2015
|
||||
|
||||
| Method | Backbone | Extra Data | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :-----------------------------------------------------------: | :------: | :--------: | :----------: | :-------: | :-----: | :-------: | :-------: | :----: | :----: | :-------------------------------------------------------------: |
|
||||
| [PSENet](/configs/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015.py) | ResNet50 | - | IC15 Train | IC15 Test | 600 | 2240 | 0.8396 | 0.7636 | 0.7998 | [model](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015/psenet_resnet50_fpnf_600e_icdar2015_20220825_222709-b6741ec3.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015/20220825_222709.log) |
|
||||
| Method | Backbone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :--------------------------------------: | :-----------------------------------------: | :--------------: | :----------: | :-------: | :-----: | :-------: | :-------: | :----: | :----: | :-----------------------------------------: |
|
||||
| [PSENet](/configs/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015.py) | ResNet50 | - | IC15 Train | IC15 Test | 600 | 2240 | 0.8396 | 0.7636 | 0.7998 | [model](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015/psenet_resnet50_fpnf_600e_icdar2015_20220825_222709-b6741ec3.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015/20220825_222709.log) |
|
||||
| [PSENet_r50-oclip](/configs/textdet/psenet/psenet_resnet50-oclip_fpnf_600e_icdar2015.py) | [ResNet50-oCLIP](https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth) | - | IC15 Train | IC15 Test | 600 | 2240 | 0.8895 | 0.8098 | 0.8478 | [model](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50-oclip_fpnf_600e_icdar2015/psenet_resnet50-oclip_fpnf_600e_icdar2015_20221101_131357-2bdca389.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50-oclip_fpnf_600e_icdar2015/20221101_131357.log) |
|
||||
|
||||
## Citation
|
||||
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
file_client_args = dict(backend='disk')
|
||||
|
||||
model = dict(
|
||||
type='PSENet',
|
||||
backbone=dict(
|
||||
|
@ -32,10 +30,7 @@ model = dict(
|
|||
pad_size_divisor=32))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
|
@ -58,10 +53,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(2240, 2240), keep_ratio=True),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
|
|
|
@ -16,6 +16,7 @@ Collections:
|
|||
|
||||
Models:
|
||||
- Name: psenet_resnet50_fpnf_600e_ctw1500
|
||||
Alias: PS_CTW
|
||||
In Collection: PSENet
|
||||
Config: configs/textdet/psenet/psenet_resnet50_fpnf_600e_ctw1500.py
|
||||
Metadata:
|
||||
|
@ -27,7 +28,20 @@ Models:
|
|||
hmean-iou: 0.7793
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50_fpnf_600e_ctw1500/psenet_resnet50_fpnf_600e_ctw1500_20220825_221459-7f974ac8.pth
|
||||
|
||||
- Name: psenet_resnet50-oclip_fpnf_600e_ctw1500
|
||||
In Collection: PSENet
|
||||
Config: configs/textdet/psenet/psenet_resnet50-oclip_fpnf_600e_ctw1500.py
|
||||
Metadata:
|
||||
Training Data: CTW1500
|
||||
Results:
|
||||
- Task: Text Detection
|
||||
Dataset: CTW1500
|
||||
Metrics:
|
||||
hmean-iou: 0.8037
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50-oclip_fpnf_600e_ctw1500/psenet_resnet50-oclip_fpnf_600e_ctw1500_20221101_140406-d431710d.pth
|
||||
|
||||
- Name: psenet_resnet50_fpnf_600e_icdar2015
|
||||
Alias: PS_IC15
|
||||
In Collection: PSENet
|
||||
Config: configs/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015.py
|
||||
Metadata:
|
||||
|
@ -37,4 +51,17 @@ Models:
|
|||
Dataset: ICDAR2015
|
||||
Metrics:
|
||||
hmean-iou: 0.7998
|
||||
Weights:
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015/psenet_resnet50_fpnf_600e_icdar2015_20220825_222709-b6741ec3.pth
|
||||
|
||||
- Name: psenet_resnet50-oclip_fpnf_600e_icdar2015
|
||||
Alias: PSENet
|
||||
In Collection: PSENet
|
||||
Config: configs/textdet/psenet/psenet_resnet50-oclip_fpnf_600e_icdar2015.py
|
||||
Metadata:
|
||||
Training Data: ICDAR2015
|
||||
Results:
|
||||
- Task: Text Detection
|
||||
Dataset: ICDAR2015
|
||||
Metrics:
|
||||
hmean-iou: 0.8478
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50-oclip_fpnf_600e_icdar2015/psenet_resnet50-oclip_fpnf_600e_icdar2015_20221101_131357-2bdca389.pth
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
_base_ = [
|
||||
'psenet_resnet50_fpnf_600e_ctw1500.py',
|
||||
]
|
||||
|
||||
_base_.model.backbone = dict(
|
||||
type='CLIPResNet',
|
||||
init_cfg=dict(
|
||||
type='Pretrained',
|
||||
checkpoint='https://download.openmmlab.com/'
|
||||
'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
|
|
@ -0,0 +1,10 @@
|
|||
_base_ = [
|
||||
'psenet_resnet50_fpnf_600e_icdar2015.py',
|
||||
]
|
||||
|
||||
_base_.model.backbone = dict(
|
||||
type='CLIPResNet',
|
||||
init_cfg=dict(
|
||||
type='Pretrained',
|
||||
checkpoint='https://download.openmmlab.com/'
|
||||
'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
|
|
@ -13,14 +13,11 @@ param_scheduler = [
|
|||
]
|
||||
|
||||
# dataset settings
|
||||
ctw_det_train = _base_.ctw_det_train
|
||||
ctw_det_test = _base_.ctw_det_test
|
||||
ctw1500_textdet_train = _base_.ctw1500_textdet_train
|
||||
ctw1500_textdet_test = _base_.ctw1500_textdet_test
|
||||
|
||||
test_pipeline_ctw = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=_base_.file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1280, 1280), keep_ratio=True),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
|
@ -33,22 +30,22 @@ test_pipeline_ctw = [
|
|||
]
|
||||
|
||||
# pipeline settings
|
||||
ctw_det_train.pipeline = _base_.train_pipeline
|
||||
ctw_det_test.pipeline = test_pipeline_ctw
|
||||
ctw1500_textdet_train.pipeline = _base_.train_pipeline
|
||||
ctw1500_textdet_test.pipeline = test_pipeline_ctw
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=ctw_det_train)
|
||||
dataset=ctw1500_textdet_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=ctw_det_test)
|
||||
dataset=ctw1500_textdet_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
|
|
|
@ -13,8 +13,8 @@ param_scheduler = [
|
|||
]
|
||||
|
||||
# dataset settings
|
||||
ic15_det_train = _base_.ic15_det_train
|
||||
ic15_det_test = _base_.ic15_det_test
|
||||
icdar2015_textdet_train = _base_.icdar2015_textdet_train
|
||||
icdar2015_textdet_test = _base_.icdar2015_textdet_test
|
||||
|
||||
# use quadrilaterals for icdar2015
|
||||
model = dict(
|
||||
|
@ -22,22 +22,22 @@ model = dict(
|
|||
det_head=dict(postprocessor=dict(text_repr_type='quad')))
|
||||
|
||||
# pipeline settings
|
||||
ic15_det_train.pipeline = _base_.train_pipeline
|
||||
ic15_det_test.pipeline = _base_.test_pipeline
|
||||
icdar2015_textdet_train.pipeline = _base_.train_pipeline
|
||||
icdar2015_textdet_test.pipeline = _base_.test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=ic15_det_train)
|
||||
dataset=icdar2015_textdet_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=ic15_det_test)
|
||||
dataset=icdar2015_textdet_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
|
|
|
@ -3,14 +3,14 @@ _base_ = [
|
|||
'../_base_/datasets/icdar2017.py',
|
||||
]
|
||||
|
||||
ic17_det_train = _base_.ic17_det_train
|
||||
ic17_det_test = _base_.ic17_det_test
|
||||
icdar2017_textdet_train = _base_.icdar2017_textdet_train
|
||||
icdar2017_textdet_test = _base_.icdar2017_textdet_test
|
||||
# use the same pipeline as icdar2015
|
||||
ic17_det_train.pipeline = _base_.train_pipeline
|
||||
ic17_det_test.pipeline = _base_.test_pipeline
|
||||
icdar2017_textdet_train.pipeline = _base_.train_pipeline
|
||||
icdar2017_textdet_test.pipeline = _base_.test_pipeline
|
||||
|
||||
train_dataloader = dict(dataset=ic17_det_train)
|
||||
val_dataloader = dict(dataset=ic17_det_test)
|
||||
train_dataloader = dict(dataset=icdar2017_textdet_train)
|
||||
val_dataloader = dict(dataset=icdar2017_textdet_test)
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
auto_scale_lr = dict(base_batch_size=64 * 4)
|
||||
|
|
|
@ -16,9 +16,10 @@ Driven by deep neural networks and large scale datasets, scene text detection me
|
|||
|
||||
### CTW1500
|
||||
|
||||
| Method | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :----------------------------------------------------------: | :--------------: | :-----------: | :----------: | :-----: | :-------: | :-------: | :----: | :----: | :------------------------------------------------------------: |
|
||||
| [TextSnake](/configs/textdet/textsnake/textsnake_resnet50_fpn-unet_1200e_ctw1500.py) | ImageNet | CTW1500 Train | CTW1500 Test | 1200 | 736 | 0.8535 | 0.8052 | 0.8286 | [model](https://download.openmmlab.com/mmocr/textdet/textsnake/textsnake_resnet50_fpn-unet_1200e_ctw1500/textsnake_resnet50_fpn-unet_1200e_ctw1500_20220825_221459-c0b6adc4.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/textsnake/textsnake_resnet50_fpn-unet_1200e_ctw1500/20220825_221459.log) |
|
||||
| Method | BackBone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :-------------------------------------: | :---------------------------------------: | :--------------: | :-----------: | :----------: | :-----: | :-------: | :-------: | :----: | :----: | :----------------------------------------: |
|
||||
| [TextSnake](/configs/textdet/textsnake/textsnake_resnet50_fpn-unet_1200e_ctw1500.py) | ResNet50 | - | CTW1500 Train | CTW1500 Test | 1200 | 736 | 0.8535 | 0.8052 | 0.8286 | [model](https://download.openmmlab.com/mmocr/textdet/textsnake/textsnake_resnet50_fpn-unet_1200e_ctw1500/textsnake_resnet50_fpn-unet_1200e_ctw1500_20220825_221459-c0b6adc4.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/textsnake/textsnake_resnet50_fpn-unet_1200e_ctw1500/20220825_221459.log) |
|
||||
| [TextSnake_r50-oclip](/configs/textdet/textsnake/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500.py) | [ResNet50-oCLIP](https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth) | - | CTW1500 Train | CTW1500 Test | 1200 | 736 | 0.8869 | 0.8215 | 0.8529 | [model](https://download.openmmlab.com/mmocr/textdet/textsnake/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500_20221101_134814-a216e5b2.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/textsnake/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500/20221101_134814.log) |
|
||||
|
||||
## Citation
|
||||
|
||||
|
|
|
@ -1,5 +1,3 @@
|
|||
file_client_args = dict(backend='disk')
|
||||
|
||||
model = dict(
|
||||
type='TextSnake',
|
||||
backbone=dict(
|
||||
|
@ -28,10 +26,7 @@ model = dict(
|
|||
pad_size_divisor=32))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_bbox=True,
|
||||
|
@ -72,10 +67,7 @@ train_pipeline = [
|
|||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1333, 736), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
|
|
|
@ -25,3 +25,16 @@ Models:
|
|||
Metrics:
|
||||
hmean-iou: 0.8286
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/textsnake/textsnake_resnet50_fpn-unet_1200e_ctw1500/textsnake_resnet50_fpn-unet_1200e_ctw1500_20220825_221459-c0b6adc4.pth
|
||||
|
||||
- Name: textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500
|
||||
Alias: TextSnake
|
||||
In Collection: TextSnake
|
||||
Config: configs/textdet/textsnake/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500.py
|
||||
Metadata:
|
||||
Training Data: CTW1500
|
||||
Results:
|
||||
- Task: Text Detection
|
||||
Dataset: CTW1500
|
||||
Metrics:
|
||||
hmean-iou: 0.8529
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/textsnake/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500/textsnake_resnet50-oclip_fpn-unet_1200e_ctw1500_20221101_134814-a216e5b2.pth
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
_base_ = [
|
||||
'textsnake_resnet50_fpn-unet_1200e_ctw1500.py',
|
||||
]
|
||||
|
||||
_base_.model.backbone = dict(
|
||||
type='CLIPResNet',
|
||||
init_cfg=dict(
|
||||
type='Pretrained',
|
||||
checkpoint='https://download.openmmlab.com/'
|
||||
'mmocr/backbone/resnet50-oclip-7ba0c533.pth'))
|
|
@ -6,24 +6,24 @@ _base_ = [
|
|||
]
|
||||
|
||||
# dataset settings
|
||||
ctw_det_train = _base_.ctw_det_train
|
||||
ctw_det_train.pipeline = _base_.train_pipeline
|
||||
ctw_det_test = _base_.ctw_det_test
|
||||
ctw_det_test.pipeline = _base_.test_pipeline
|
||||
ctw1500_textdet_train = _base_.ctw1500_textdet_train
|
||||
ctw1500_textdet_train.pipeline = _base_.train_pipeline
|
||||
ctw1500_textdet_test = _base_.ctw1500_textdet_test
|
||||
ctw1500_textdet_test.pipeline = _base_.test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=ctw_det_train)
|
||||
dataset=ctw1500_textdet_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=ctw_det_test)
|
||||
dataset=ctw1500_textdet_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
cocov1_rec_train_data_root = 'data/rec/coco_text_v1'
|
||||
cocotextv1_textrecog_data_root = 'data/rec/coco_text_v1'
|
||||
|
||||
cocov1_rec_train = dict(
|
||||
cocotextv1_textrecog_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=cocov1_rec_train_data_root,
|
||||
data_root=cocotextv1_textrecog_data_root,
|
||||
ann_file='train_labels.json',
|
||||
test_mode=False,
|
||||
pipeline=None)
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
cute80_rec_data_root = 'data/rec/ct80/'
|
||||
cute80_textrecog_data_root = 'data/cute80'
|
||||
|
||||
cute80_rec_test = dict(
|
||||
cute80_textrecog_test = dict(
|
||||
type='OCRDataset',
|
||||
data_root=cute80_rec_data_root,
|
||||
ann_file='test_labels.json',
|
||||
data_root=cute80_textrecog_data_root,
|
||||
ann_file='textrecog_test.json',
|
||||
test_mode=True,
|
||||
pipeline=None)
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
ic11_rec_data_root = 'data/rec/icdar_2011/'
|
||||
icdar2011_textrecog_data_root = 'data/rec/icdar_2011/'
|
||||
|
||||
ic11_rec_train = dict(
|
||||
icdar2011_textrecog_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=ic11_rec_data_root,
|
||||
data_root=icdar2011_textrecog_data_root,
|
||||
ann_file='train_labels.json',
|
||||
test_mode=False,
|
||||
pipeline=None)
|
||||
|
|
|
@ -1,15 +1,21 @@
|
|||
ic13_rec_data_root = 'data/rec/icdar_2013/'
|
||||
icdar2013_textrecog_data_root = 'data/icdar2013'
|
||||
|
||||
ic13_rec_train = dict(
|
||||
icdar2013_textrecog_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=ic13_rec_data_root,
|
||||
ann_file='train_labels.json',
|
||||
test_mode=False,
|
||||
data_root=icdar2013_textrecog_data_root,
|
||||
ann_file='textrecog_train.json',
|
||||
pipeline=None)
|
||||
|
||||
ic13_rec_test = dict(
|
||||
icdar2013_textrecog_test = dict(
|
||||
type='OCRDataset',
|
||||
data_root=ic13_rec_data_root,
|
||||
ann_file='test_labels.json',
|
||||
data_root=icdar2013_textrecog_data_root,
|
||||
ann_file='textrecog_test.json',
|
||||
test_mode=True,
|
||||
pipeline=None)
|
||||
|
||||
icdar2013_857_textrecog_test = dict(
|
||||
type='OCRDataset',
|
||||
data_root=icdar2013_textrecog_data_root,
|
||||
ann_file='textrecog_test_857.json',
|
||||
test_mode=True,
|
||||
pipeline=None)
|
||||
|
|
|
@ -1,15 +1,21 @@
|
|||
ic15_rec_data_root = 'data/rec/icdar_2015/'
|
||||
icdar2015_textrecog_data_root = 'data/icdar2015'
|
||||
|
||||
ic15_rec_train = dict(
|
||||
icdar2015_textrecog_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=ic15_rec_data_root,
|
||||
ann_file='train_labels.json',
|
||||
test_mode=False,
|
||||
data_root=icdar2015_textrecog_data_root,
|
||||
ann_file='textrecog_train.json',
|
||||
pipeline=None)
|
||||
|
||||
ic15_rec_test = dict(
|
||||
icdar2015_textrecog_test = dict(
|
||||
type='OCRDataset',
|
||||
data_root=ic15_rec_data_root,
|
||||
ann_file='test_labels.json',
|
||||
data_root=icdar2015_textrecog_data_root,
|
||||
ann_file='textrecog_test.json',
|
||||
test_mode=True,
|
||||
pipeline=None)
|
||||
|
||||
icdar2015_1811_textrecog_test = dict(
|
||||
type='OCRDataset',
|
||||
data_root=icdar2015_textrecog_data_root,
|
||||
ann_file='textrecog_test_1811.json',
|
||||
test_mode=True,
|
||||
pipeline=None)
|
||||
|
|
|
@ -1,15 +1,14 @@
|
|||
iiit5k_rec_data_root = 'data/rec/IIIT5K/'
|
||||
iiit5k_textrecog_data_root = 'data/iiit5k'
|
||||
|
||||
iiit5k_rec_train = dict(
|
||||
iiit5k_textrecog_train = dict(
|
||||
type='OCRDataset',
|
||||
data_root=iiit5k_rec_data_root,
|
||||
ann_file='train_labels.json',
|
||||
test_mode=False,
|
||||
data_root=iiit5k_textrecog_data_root,
|
||||
ann_file='textrecog_train.json',
|
||||
pipeline=None)
|
||||
|
||||
iiit5k_rec_test = dict(
|
||||
iiit5k_textrecog_test = dict(
|
||||
type='OCRDataset',
|
||||
data_root=iiit5k_rec_data_root,
|
||||
ann_file='test_labels.json',
|
||||
data_root=iiit5k_textrecog_data_root,
|
||||
ann_file='textrecog_test.json',
|
||||
test_mode=True,
|
||||
pipeline=None)
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue