mirror of https://github.com/open-mmlab/mmocr.git
Compare commits
621 Commits
Author | SHA1 | Date |
---|---|---|
|
966296f26a | |
|
2caab0a4e7 | |
|
b18a09b2f0 | |
|
9551af6e5a | |
|
1dcd6fa695 | |
|
6b3f6f5285 | |
|
0cd2878b04 | |
|
bbe8964f00 | |
|
a344280bcb | |
|
4eb3cc7de5 | |
|
e9a31ddd70 | |
|
1e696887b9 | |
|
231cff5da2 | |
|
8afc79f370 | |
|
9e713c63fe | |
|
d7c59f3325 | |
|
a7e326f829 | |
|
97efb04c50 | |
|
e0a78c021b | |
|
16de16f8f8 | |
|
e6174b29fe | |
|
4842599191 | |
|
1c91a9820a | |
|
afe58a4a77 | |
|
67f25c6fb3 | |
|
6342ff262c | |
|
4b887676a3 | |
|
bb591d2b1b | |
|
59d89e10c7 | |
|
73df26d749 | |
|
f47cff5199 | |
|
c886936117 | |
|
22f40b79ed | |
|
1a379f2f1b | |
|
d0dc90253a | |
|
6d9582b6c7 | |
|
e0707bf5f2 | |
|
ae252626d3 | |
|
d80df99037 | |
|
506f7d296e | |
|
9caacc76ee | |
|
63a6ed4e6c | |
|
c6580a48c1 | |
|
7ef34c4407 | |
|
47f54304f5 | |
|
465316f193 | |
|
590af4b5e8 | |
|
a58c77df80 | |
|
e9b23c56ad | |
|
75c06d34bb | |
|
bfb36d81b3 | |
|
45a8d89fb9 | |
|
d56155c82d | |
|
33cbc9b92f | |
|
cc78866ed7 | |
|
f250ea2379 | |
|
5685bb0f38 | |
|
5670695338 | |
|
81fd74c266 | |
|
47f7fc06ed | |
|
82f81ff67c | |
|
3aa9572a64 | |
|
62d440fe8e | |
|
0894178343 | |
|
7cfd412ce7 | |
|
280a89c18e | |
|
6eaa0673f7 | |
|
9b0f1da1e7 | |
|
37c5d371c7 | |
|
e9bf689f74 | |
|
1127240108 | |
|
df0be646ea | |
|
f820470415 | |
|
7cea6a6419 | |
|
3240bace4a | |
|
b21d2b964a | |
|
332089ca11 | |
|
b3be8cfbb3 | |
|
d25e061b03 | |
|
20a87d476c | |
|
d8e615921d | |
|
2a2cab3c8c | |
|
c870046a4a | |
|
edf085c010 | |
|
c3aef21eea | |
|
03a23ca4db | |
|
3b0a41518d | |
|
ad470e323a | |
|
2d743cfa19 | |
|
2b5cdbdbfc | |
|
a82fc66812 | |
|
bed778fc3f | |
|
689ecf0f5f | |
|
bf41194965 | |
|
dff97edaad | |
|
50f55c2976 | |
|
b3f21dd95d | |
|
7f4a1eecdc | |
|
6992923768 | |
|
b64565c10f | |
|
39f99ac720 | |
|
27b6a68586 | |
|
37dca0600a | |
|
0aa5d7be6d | |
|
b0557c2c55 | |
|
d679691a02 | |
|
acae8da223 | |
|
4d5ed98177 | |
|
5dbacfe202 | |
|
65e746eb3d | |
|
7e9f7756bc | |
|
53e72e4440 | |
|
1413b5043a | |
|
b79382cd6b | |
|
e3fd570687 | |
|
9baf440d7a | |
|
89606a1cf1 | |
|
e1aa1f6f42 | |
|
101f2b6eef | |
|
d2a6845c64 | |
|
0ec1524f54 | |
|
e81bb13696 | |
|
24bfb18768 | |
|
fb78c942d6 | |
|
4396e8f5d8 | |
|
c38618bf51 | |
|
f6da8715b9 | |
|
b11c58897c | |
|
302efb9db3 | |
|
419f98d8a4 | |
|
0bd62d67c8 | |
|
e096df8b57 | |
|
547ed31eda | |
|
5cfe481f7f | |
|
ffe5237aa8 | |
|
58ea06d986 | |
|
38d2fc3438 | |
|
5ded52230a | |
|
ebdf1cf90d | |
|
f4940de2a4 | |
|
79a4b2042c | |
|
e095107518 | |
|
d9ea92191e | |
|
3a0aa05d9c | |
|
9ac9a227ec | |
|
5940d6bc9c | |
|
fa4fd1fd42 | |
|
08cab32832 | |
|
b9152a2239 | |
|
782bcc446d | |
|
a12c215e85 | |
|
b8c445b04f | |
|
d9356252af | |
|
c957ded662 | |
|
2b6d258ae1 | |
|
c32ce6baa3 | |
|
31a353a892 | |
|
f6472eab2a | |
|
24aaec2675 | |
|
26e7ea6e77 | |
|
cfce57ad87 | |
|
37f3b88a05 | |
|
29107ef81d | |
|
3433c8cba4 | |
|
e067ddea23 | |
|
d8c0df4827 | |
|
b8e395ed71 | |
|
b1a3b94508 | |
|
06a20fae71 | |
|
5fbb22cd4e | |
|
9785dc616c | |
|
00254f0390 | |
|
cad55f6178 | |
|
e28fc326ae | |
|
6b2077ef19 | |
|
1d5f43e79f | |
|
d514784878 | |
|
34e97abcb0 | |
|
62ff782b71 | |
|
99c86a74b8 | |
|
79a778689d | |
|
baa2b4f863 | |
|
31c41d82c9 | |
|
8737675445 | |
|
b65b65e8f8 | |
|
0afbb70b5d | |
|
abf5a8972c | |
|
cf454ca76c | |
|
d92444097d | |
|
f1dd437d8d | |
|
1c06edc68f | |
|
8864fa174b | |
|
a09437adaa | |
|
9040263b04 | |
|
52a7873973 | |
|
357ccaf27d | |
|
705ea79067 | |
|
f619c697a5 | |
|
ec395c5c68 | |
|
f30c16ce96 | |
|
daa676dd37 | |
|
e7e46771ba | |
|
769d845b4f | |
|
dfc17207ba | |
|
b26907e908 | |
|
3d015462e7 | |
|
bf921661c6 | |
|
4fef7d1868 | |
|
0b53f50ead | |
|
5e596cc579 | |
|
a0284ae910 | |
|
73ba54cbb0 | |
|
8d29643d98 | |
|
22283b4acd | |
|
77ab13b3ff | |
|
5a88a771c3 | |
|
e9d4364842 | |
|
794744826e | |
|
c6cc37b096 | |
|
1cf2643df0 | |
|
b4336204b8 | |
|
1077ce4294 | |
|
0dd72f40f7 | |
|
93d883e7dc | |
|
87f15b3135 | |
|
3e2a336e91 | |
|
7f3d832074 | |
|
50cba1ac6e | |
|
a5b8fb5df1 | |
|
89442c3dc2 | |
|
e801df3471 | |
|
e8d1bc37d3 | |
|
c44b611a6c | |
|
45f3f51dba | |
|
e100479ebb | |
|
ac02c20581 | |
|
27697e387c | |
|
a6f6b12277 | |
|
415bb7f8d0 | |
|
2a9e8f5306 | |
|
a979346e35 | |
|
8b8cc4e6e5 | |
|
db6ce0d95e | |
|
dbb346afed | |
|
c91b028772 | |
|
ce47b53399 | |
|
53562d8526 | |
|
bb80d16da2 | |
|
965f92f1e0 | |
|
cbef6b8c78 | |
|
f788bfdbb9 | |
|
e72edd6dcb | |
|
19b19cc404 | |
|
bfa2f20a35 | |
|
8f0141cfaa | |
|
8b32ea6fa9 | |
|
8c904127a8 | |
|
bf042f8267 | |
|
56179fe1a9 | |
|
ea537bbe86 | |
|
9b368fe45c | |
|
e78a1591db | |
|
7ab2a2e09d | |
|
ad73fb10ff | |
|
a45716d20e | |
|
b32412a9e9 | |
|
9bd5258513 | |
|
1b5764b155 | |
|
b81d58e70c | |
|
9a0054ea66 | |
|
c093c687a7 | |
|
a24de8318e | |
|
ab04560a4d | |
|
1860a3a3b6 | |
|
5c8c774aa9 | |
|
f247926028 | |
|
f36c88de0c | |
|
9620f2de91 | |
|
4c20ebcb71 | |
|
e760dcd1dd | |
|
d27b2fd84f | |
|
240bf06ddd | |
|
908ebf1bcf | |
|
b2e06c04f5 | |
|
7aea3619ca | |
|
8d0c6a013a | |
|
b0b6dadc00 | |
|
7ac7f66949 | |
|
6ca7404925 | |
|
814b281c79 | |
|
98dae9319f | |
|
7fcfa09431 | |
|
d73903a9a0 | |
|
c7a4298c32 | |
|
0d9b40706c | |
|
6b6d833be4 | |
|
1cc049086e | |
|
587566d2c2 | |
|
bcc245efd3 | |
|
1978075577 | |
|
792cb26924 | |
|
7b25b62c21 | |
|
97f6c1d5d6 | |
|
7cd96aaf79 | |
|
27313b264c | |
|
ef683206ed | |
|
6759bd409a | |
|
37ff38e7aa | |
|
85d3344cf8 | |
|
506fcdbe05 | |
|
c9ec09d8f1 | |
|
cdba3056c0 | |
|
80d85c129f | |
|
8331224e52 | |
|
8d2e8886e8 | |
|
48cc575507 | |
|
2381c993ea | |
|
f2024dc4bf | |
|
717460055c | |
|
05c4bc3c88 | |
|
f11ed20d9a | |
|
2cca103b93 | |
|
507f0656c9 | |
|
bc043101fe | |
|
0bf05b0ae9 | |
|
83ba24cad6 | |
|
870f062394 | |
|
83e4fb10ee | |
|
e00d4f377b | |
|
2b476bd8c0 | |
|
8c2873f061 | |
|
2487c0a4a5 | |
|
0dc33189e0 | |
|
7593e04ea0 | |
|
abb6c16095 | |
|
ca01ee5eb3 | |
|
7a6e2aece1 | |
|
0393e32603 | |
|
5dfa68641c | |
|
ec7415a382 | |
|
6f30020eec | |
|
e303404215 | |
|
f5e93d0eba | |
|
8d65f873da | |
|
05ff5d0489 | |
|
16b41108f9 | |
|
8c5e83c521 | |
|
1b33ff5d76 | |
|
dc180443b8 | |
|
254dbdd18a | |
|
1a167ff317 | |
|
3980ead987 | |
|
27261b2bce | |
|
d8c3aeff3a | |
|
567aec5390 | |
|
20e999e3b9 | |
|
2b3a4fe6b5 | |
|
3734527d38 | |
|
dc84187311 | |
|
3709c7b03a | |
|
157cf7a127 | |
|
1cbc42eceb | |
|
8bce19218e | |
|
17b56ac646 | |
|
993ee5a91c | |
|
eb2d5b525a | |
|
f107991ac1 | |
|
914c8af7bf | |
|
19958fbf6f | |
|
bf517b63e8 | |
|
68b0aaa2e9 | |
|
41a642bc7b | |
|
7813e18a6c | |
|
e73665029b | |
|
ae4ba012a8 | |
|
dae4c9ca8c | |
|
de616ffa02 | |
|
058984af1d | |
|
efd81b7a5a | |
|
67e4085915 | |
|
1d1f664e9a | |
|
25faa7d1f1 | |
|
d50d2a46eb | |
|
a844b497db | |
|
ee1212a5cd | |
|
23e1f2432a | |
|
62d390dc3f | |
|
966e2ca9de | |
|
5381b1d105 | |
|
2d478ea244 | |
|
b8d472b77b | |
|
4e603f0531 | |
|
83aac48491 | |
|
de78a8839f | |
|
d4dbad56ee | |
|
a26114d9c7 | |
|
02c6802312 | |
|
2cb55550cd | |
|
2df8cb89a4 | |
|
2fe534b178 | |
|
422bea9d10 | |
|
77ffe8fb00 | |
|
b828d654a9 | |
|
460f068891 | |
|
2f5e337e2f | |
|
9470821aa0 | |
|
e0992a7fae | |
|
ed9e8d150c | |
|
9db0941837 | |
|
eaf7f6bf0c | |
|
e23a2ef089 | |
|
622e65926e | |
|
7490301877 | |
|
52f0eefb2e | |
|
b40d3ffd47 | |
|
9b7f75e157 | |
|
58ca3a1463 | |
|
0a828ef250 | |
|
c5364f843d | |
|
02a43d234e | |
|
b20bcc47b3 | |
|
6ff567bb08 | |
|
9f2fabc35a | |
|
09169f32ee | |
|
4b185d3347 | |
|
ab6e897c6b | |
|
9e9c34d74c | |
|
c8589f2af4 | |
|
d942427161 | |
|
77e29adb7b | |
|
ef98df8052 | |
|
298ea312c0 | |
|
83ec5726d6 | |
|
b955df9904 | |
|
d2e8e79df1 | |
|
0dc4fda545 | |
|
988fea441b | |
|
dffb35b1c1 | |
|
8313e698d2 | |
|
0fb0d7cb1a | |
|
24575de140 | |
|
ca35c78e69 | |
|
41d9c741cd | |
|
47771788f0 | |
|
fe43b4e767 | |
|
7be4dc1bca | |
|
21b01344cc | |
|
0bf1ce88c2 | |
|
cd4e520cb9 | |
|
c0c0f4b565 | |
|
4a04982806 | |
|
0716c97cf6 | |
|
00ba46b5b9 | |
|
05990c58d9 | |
|
831b937c98 | |
|
a3b3bb647f | |
|
490d6cd806 | |
|
1212ae89cc | |
|
301eb7b783 | |
|
fded755af2 | |
|
17606c25fc | |
|
200899b2a0 | |
|
b406f3785f | |
|
13920924ce | |
|
95f19aa891 | |
|
b6e031666b | |
|
55c99dd0c1 | |
|
f71852398d | |
|
7c3789d64e | |
|
d636adeb1f | |
|
8f7c0e2977 | |
|
21d0dd71dc | |
|
48be56928b | |
|
ed37d2db5c | |
|
bf7c738798 | |
|
acd2bcc452 | |
|
f7731c43bd | |
|
a353a28a1a | |
|
a135580912 | |
|
f03ed3ce11 | |
|
d5a2d20574 | |
|
1af7f94a63 | |
|
3992f0d78e | |
|
b44869059b | |
|
401088913b | |
|
35e5138b5d | |
|
dfe93dc7d2 | |
|
da175b44a4 | |
|
4f2ec6de71 | |
|
50f229d9fe | |
|
8614070e36 | |
|
d41921f03d | |
|
781166764c | |
|
25e819f6bf | |
|
b3b1ef146b | |
|
34a96a8b87 | |
|
cb8f980bae | |
|
d859fcad1c | |
|
2a852f23b5 | |
|
71d1a445c9 | |
|
b585dbcdd7 | |
|
d34fad1451 | |
|
a4952a6dd6 | |
|
8ac235677e | |
|
f1eebe9e34 | |
|
38eef984c2 | |
|
f4a8e0f3a9 | |
|
8396b2014e | |
|
3a9f9e6b61 | |
|
32ef9cc3cf | |
|
7a66a84b64 | |
|
cd3d173b18 | |
|
747b2a14dc | |
|
43c50eee82 | |
|
1e1da7b395 | |
|
0f0f68baf1 | |
|
00f821315e | |
|
bbbefaeb31 | |
|
22d90301b8 | |
|
a379d086f1 | |
|
dd29f09593 | |
|
afd9f9893a | |
|
7582fdea41 | |
|
3aae157aec | |
|
f173cd3543 | |
|
4c9d14a6e7 | |
|
4fd048aa24 | |
|
206c4ccc65 | |
|
58c59e80dd | |
|
fe43259a05 | |
|
4246b1eaee | |
|
ee48713a89 | |
|
c78be99f6b | |
|
7e7a526f37 | |
|
f47f3eff03 | |
|
05e31e09bc | |
|
e2577741dd | |
|
4706cc7eca | |
|
2f4679e908 | |
|
be30df5d50 | |
|
f820a50752 | |
|
a2a3b677d8 | |
|
84a61ba816 | |
|
a05e3f19c5 | |
|
e8f57d6540 | |
|
5dc791adbb | |
|
d2808e6b84 | |
|
7b6778c5d8 | |
|
0b5d2df310 | |
|
6cd38a038f | |
|
ac4eb34843 | |
|
79186b61ec | |
|
178030bad6 | |
|
b28d0d99d6 | |
|
f29853d9cd | |
|
6478499073 | |
|
df2f7b69db | |
|
9acc3680cb | |
|
7b09da485b | |
|
0f041d4250 | |
|
f6b72b244b | |
|
6fe4ee82f2 | |
|
98d9d39505 | |
|
cb85f857aa | |
|
6a260514e8 | |
|
2c23098b29 | |
|
26da038d49 | |
|
9c3d741712 | |
|
f0c6d44ce8 | |
|
98bc90bd1c | |
|
c920edfb3a | |
|
c47c5711c1 | |
|
f7cea9d40f | |
|
6f3aed95a6 | |
|
b5c5ddd3e0 | |
|
536dfdd4bd | |
|
593d7529a3 | |
|
d98648c06f | |
|
41c1671e7b | |
|
23458f8a47 | |
|
3f24e34a5d | |
|
13bd2837ae | |
|
a90b9600ce | |
|
b5fc589320 | |
|
324b7e4e80 | |
|
69e6c80558 | |
|
73222b270c | |
|
107e9d2f48 | |
|
1755dad193 | |
|
4c1790b3c6 | |
|
5657be1e1a | |
|
1bd26f24ba | |
|
688d72fdc4 | |
|
c4a2fa5eee | |
|
7800e13fc2 | |
|
64fb6fffc0 | |
|
9bdc247c0c | |
|
fe64040581 | |
|
72a79f9350 | |
|
1f888c9e97 | |
|
d669ce2e82 | |
|
4f36bcd1aa | |
|
b78f5b3b26 | |
|
d068370b85 | |
|
b1ab4c7c33 | |
|
7c5c784a94 | |
|
d3f65aaacf | |
|
12558969ee | |
|
e1e26d3f74 | |
|
9d7818b564 | |
|
376fe05e6c | |
|
0c060099e1 | |
|
f89ab858f4 | |
|
426995747b | |
|
86879c6834 | |
|
13986f497d | |
|
d9bb3d6359 | |
|
3059c97dc5 | |
|
08eecb9256 | |
|
9cfa29f862 | |
|
0329ff9328 |
.circleci
.github
configs
_base_
schedules
backbone/oclip
kie/_base_
|
@ -26,7 +26,7 @@ workflows:
|
|||
tools/.* lint_only false
|
||||
configs/.* lint_only false
|
||||
.circleci/.* lint_only false
|
||||
base-revision: main
|
||||
base-revision: dev-1.x
|
||||
# this is the path of the configuration we should trigger once
|
||||
# path filtering and pipeline parameter value updates are
|
||||
# complete. In this case, we are using the parent dynamic
|
||||
|
|
|
@ -1,19 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
TORCH=$1
|
||||
CUDA=$2
|
||||
|
||||
# 10.2 -> cu102
|
||||
MMCV_CUDA="cu`echo ${CUDA} | tr -d '.'`"
|
||||
|
||||
# MMCV only provides pre-compiled packages for torch 1.x.0
|
||||
# which works for any subversions of torch 1.x.
|
||||
# We force the torch version to be 1.x.0 to ease package searching
|
||||
# and avoid unnecessary rebuild during MMCV's installation.
|
||||
TORCH_VER_ARR=(${TORCH//./ })
|
||||
TORCH_VER_ARR[2]=0
|
||||
printf -v MMCV_TORCH "%s." "${TORCH_VER_ARR[@]}"
|
||||
MMCV_TORCH=${MMCV_TORCH%?} # Remove the last dot
|
||||
|
||||
echo "export MMCV_CUDA=${MMCV_CUDA}" >> $BASH_ENV
|
||||
echo "export MMCV_TORCH=${MMCV_TORCH}" >> $BASH_ENV
|
|
@ -16,9 +16,6 @@ jobs:
|
|||
- run:
|
||||
name: Install pre-commit hook
|
||||
command: |
|
||||
sudo apt-add-repository ppa:brightbox/ruby-ng -y
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ruby2.7
|
||||
pip install pre-commit
|
||||
pre-commit install
|
||||
- run:
|
||||
|
@ -28,7 +25,7 @@ jobs:
|
|||
name: Check docstring coverage
|
||||
command: |
|
||||
pip install interrogate
|
||||
interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-regex "__repr__" --fail-under 50 mmocr
|
||||
interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-magic --ignore-regex "__repr__" --fail-under 90 mmocr
|
||||
build_cpu:
|
||||
parameters:
|
||||
# The python version must match available image tags in
|
||||
|
@ -44,75 +41,74 @@ jobs:
|
|||
resource_class: large
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: Get MMCV_TORCH as environment variables
|
||||
command: |
|
||||
. .circleci/scripts/get_mmcv_var.sh << parameters.torch >>
|
||||
source $BASH_ENV
|
||||
- run:
|
||||
name: Install Libraries
|
||||
command: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx libjpeg-dev zlib1g-dev libtinfo-dev libncurses5
|
||||
sudo apt-get install -y ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx libjpeg-dev zlib1g-dev libtinfo-dev libncurses5 libgeos-dev
|
||||
- run:
|
||||
name: Configure Python & pip
|
||||
command: |
|
||||
python -m pip install --upgrade pip
|
||||
python -m pip install wheel
|
||||
pip install --upgrade pip
|
||||
pip install wheel
|
||||
- run:
|
||||
name: Install PyTorch
|
||||
command: |
|
||||
python -V
|
||||
python -m pip install torch==<< parameters.torch >>+cpu torchvision==<< parameters.torchvision >>+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
pip install torch==<< parameters.torch >>+cpu torchvision==<< parameters.torchvision >>+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
- run:
|
||||
name: Install mmocr dependencies
|
||||
command: |
|
||||
python -m pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cpu/torch${MMCV_TORCH}/index.html
|
||||
python -m pip install mmdet
|
||||
python -m pip install -r requirements.txt
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
pip install -U openmim
|
||||
mim install 'mmcv >= 2.0.0rc1'
|
||||
pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
pip install -r requirements/tests.txt
|
||||
- run:
|
||||
name: Build and install
|
||||
command: |
|
||||
python -m pip install -e .
|
||||
pip install -e .
|
||||
- run:
|
||||
name: Run unittests
|
||||
command: |
|
||||
python -m coverage run --branch --source mmocr -m pytest tests/
|
||||
python -m coverage xml
|
||||
python -m coverage report -m
|
||||
|
||||
coverage run --branch --source mmocr -m pytest tests/
|
||||
coverage xml
|
||||
coverage report -m
|
||||
build_cuda:
|
||||
parameters:
|
||||
torch:
|
||||
type: string
|
||||
cuda:
|
||||
type: enum
|
||||
enum: ["10.1", "10.2", "11.1"]
|
||||
enum: ["10.1", "10.2", "11.1", "11.7"]
|
||||
cudnn:
|
||||
type: integer
|
||||
default: 7
|
||||
machine:
|
||||
image: ubuntu-2004-cuda-11.4:202110-01
|
||||
docker_layer_caching: true
|
||||
# docker_layer_caching: true
|
||||
resource_class: gpu.nvidia.small
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
name: Get MMCV_TORCH and MMCV_CUDA as environment variables
|
||||
# Cloning repos in VM since Docker doesn't have access to the private key
|
||||
name: Clone Repos
|
||||
command: |
|
||||
. .circleci/scripts/get_mmcv_var.sh << parameters.torch >> << parameters.cuda >>
|
||||
source $BASH_ENV
|
||||
git clone -b main --depth 1 https://github.com/open-mmlab/mmengine.git /home/circleci/mmengine
|
||||
git clone -b dev-3.x --depth 1 https://github.com/open-mmlab/mmdetection.git /home/circleci/mmdetection
|
||||
- run:
|
||||
name: Build Docker image
|
||||
command: |
|
||||
docker build .circleci/docker -t mmocr:gpu --build-arg PYTORCH=<< parameters.torch >> --build-arg CUDA=<< parameters.cuda >> --build-arg CUDNN=<< parameters.cudnn >>
|
||||
docker run --gpus all -t -d -v /home/circleci/project:/mmocr -w /mmocr --name mmocr mmocr:gpu
|
||||
docker run --gpus all -t -d -v /home/circleci/project:/mmocr -v /home/circleci/mmengine:/mmengine -v /home/circleci/mmdetection:/mmdetection -w /mmocr --name mmocr mmocr:gpu
|
||||
- run:
|
||||
name: Install mmocr dependencies
|
||||
command: |
|
||||
docker exec mmocr pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/${MMCV_CUDA}/torch${MMCV_TORCH}/index.html
|
||||
docker exec mmocr pip install mmdet
|
||||
docker exec mmocr pip install -r requirements.txt
|
||||
docker exec mmocr pip install -e /mmengine
|
||||
docker exec mmocr pip install -U openmim
|
||||
docker exec mmocr mim install 'mmcv >= 2.0.0rc1'
|
||||
docker exec mmocr pip install -e /mmdetection
|
||||
docker exec mmocr pip install -r requirements/tests.txt
|
||||
- run:
|
||||
name: Build and install
|
||||
command: |
|
||||
|
@ -120,7 +116,7 @@ jobs:
|
|||
- run:
|
||||
name: Run unittests
|
||||
command: |
|
||||
docker exec mmocr python -m pytest tests/
|
||||
docker exec mmocr pytest tests/
|
||||
|
||||
workflows:
|
||||
pr_stage_lint:
|
||||
|
@ -131,6 +127,8 @@ workflows:
|
|||
filters:
|
||||
branches:
|
||||
ignore:
|
||||
- dev-1.x
|
||||
- 1.x
|
||||
- main
|
||||
pr_stage_test:
|
||||
when:
|
||||
|
@ -142,18 +140,20 @@ workflows:
|
|||
filters:
|
||||
branches:
|
||||
ignore:
|
||||
- dev-1.x
|
||||
- test-1.x
|
||||
- main
|
||||
- build_cpu:
|
||||
name: minimum_version_cpu
|
||||
torch: 1.6.0
|
||||
torchvision: 0.7.0
|
||||
python: 3.6.9 # The lowest python 3.6.x version available on CircleCI images
|
||||
python: "3.7"
|
||||
requires:
|
||||
- lint
|
||||
- build_cpu:
|
||||
name: maximum_version_cpu
|
||||
torch: 1.9.0
|
||||
torchvision: 0.10.0
|
||||
torch: 2.0.0
|
||||
torchvision: 0.15.1
|
||||
python: 3.9.0
|
||||
requires:
|
||||
- minimum_version_cpu
|
||||
|
@ -169,6 +169,15 @@ workflows:
|
|||
cuda: "10.2"
|
||||
requires:
|
||||
- hold
|
||||
- build_cuda:
|
||||
name: mainstream_version_gpu
|
||||
torch: 2.0.0
|
||||
# Use double quotation mark to explicitly specify its type
|
||||
# as string instead of number
|
||||
cuda: "11.7"
|
||||
cudnn: 8
|
||||
requires:
|
||||
- hold
|
||||
merge_stage_test:
|
||||
when:
|
||||
not:
|
||||
|
@ -183,4 +192,5 @@ workflows:
|
|||
filters:
|
||||
branches:
|
||||
only:
|
||||
- dev-1.x
|
||||
- main
|
||||
|
|
|
@ -2,4 +2,4 @@
|
|||
skip = *.ipynb
|
||||
count =
|
||||
quiet-level = 3
|
||||
ignore-words-list = convertor,convertors,formating,nin,wan,datas,hist
|
||||
ignore-words-list = convertor,convertors,formating,nin,wan,datas,hist,ned
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
textdet/dbnet/dbnet_resnet18_fpnc_1200e_icdar2015.py
|
||||
textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py
|
||||
textdet/drrg/drrg_resnet50_fpn-unet_1200e_ctw1500.py
|
||||
textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015.py
|
||||
textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015.py
|
||||
textdet/panet/panet_resnet18_fpem-ffm_600e_icdar2015.py
|
||||
textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015.py
|
||||
textdet/textsnake/textsnake_resnet50_fpn-unet_1200e_ctw1500.py
|
||||
textrecog/abinet/abinet-vision_20e_st-an_mj.py
|
||||
textrecog/crnn/crnn_mini-vgg_5e_mj.py
|
||||
textrecog/master/master_resnet31_12e_st_mj_sa.py
|
||||
textrecog/nrtr/nrtr_resnet31-1by16-1by8_6e_st_mj.py
|
||||
textrecog/robust_scanner/robustscanner_resnet31_5e_st-sub_mj-sub_sa_real.py
|
||||
textrecog/sar/sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real.py
|
||||
textrecog/satrn/satrn_shallow-small_5e_st_mj.py
|
||||
textrecog/satrn/satrn_shallow-small_5e_st_mj.py
|
||||
textrecog/aster/aster_resnet45_6e_st_mj.py
|
||||
textrecog/svtr/svtr-small_20e_st_mj.py
|
|
@ -0,0 +1,7 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
third_part_libs = [
|
||||
'pip install -r ../requirements/albu.txt',
|
||||
]
|
||||
|
||||
default_floating_range = 0.5
|
|
@ -0,0 +1,9 @@
|
|||
textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py
|
||||
textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015.py
|
||||
textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015.py
|
||||
textrecog/abinet/abinet-vision_20e_st-an_mj.py
|
||||
textrecog/crnn/crnn_mini-vgg_5e_mj.py
|
||||
textrecog/aster/aster_resnet45_6e_st_mj.py
|
||||
textrecog/nrtr/nrtr_resnet31-1by16-1by8_6e_st_mj.py
|
||||
textrecog/sar/sar_resnet31_parallel-decoder_5e_st-sub_mj-sub_sa_real.py
|
||||
textrecog/svtr/svtr-small_20e_st_mj.py
|
|
@ -0,0 +1,18 @@
|
|||
# Each line should be the relative path to the root directory
|
||||
# of this repo. Support regular expression as well.
|
||||
# For example:
|
||||
# mmocr/models/textdet/postprocess/utils.py
|
||||
# .*/utils.py
|
||||
.*/__init__.py
|
||||
|
||||
# It will be removed after all models have been refactored
|
||||
mmocr/utils/bbox_utils.py
|
||||
|
||||
# Major part is covered, however, it's hard to cover model's output.
|
||||
mmocr/models/textdet/detectors/mmdet_wrapper.py
|
||||
|
||||
# It will be removed after KieVisualizer and TextSpotterVisualizer
|
||||
mmocr/visualization/visualize.py
|
||||
|
||||
# Add tests for data preparers later
|
||||
mmocr/datasets/preparers
|
|
@ -0,0 +1,43 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
readarray -t IGNORED_FILES < $( dirname "$0" )/covignore.cfg
|
||||
|
||||
REUSE_COVERAGE_REPORT=${REUSE_COVERAGE_REPORT:-0}
|
||||
REPO=${1:-"origin"}
|
||||
BRANCH=${2:-"refactor_dev"}
|
||||
|
||||
git fetch $REPO $BRANCH
|
||||
|
||||
PY_FILES=""
|
||||
for FILE_NAME in $(git diff --name-only ${REPO}/${BRANCH}); do
|
||||
# Only test python files in mmocr/ existing in current branch, and not ignored in covignore.cfg
|
||||
if [ ${FILE_NAME: -3} == ".py" ] && [ ${FILE_NAME:0:6} == "mmocr/" ] && [ -f "$FILE_NAME" ]; then
|
||||
IGNORED=false
|
||||
for IGNORED_FILE_NAME in "${IGNORED_FILES[@]}"; do
|
||||
# Skip blank lines
|
||||
if [ -z "$IGNORED_FILE_NAME" ]; then
|
||||
continue
|
||||
fi
|
||||
if [ "${IGNORED_FILE_NAME::1}" != "#" ] && [[ "$FILE_NAME" =~ $IGNORED_FILE_NAME ]]; then
|
||||
echo "Ignoring $FILE_NAME"
|
||||
IGNORED=true
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [ "$IGNORED" = false ]; then
|
||||
PY_FILES="$PY_FILES $FILE_NAME"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
||||
# Only test the coverage when PY_FILES are not empty, otherwise they will test the entire project
|
||||
if [ ! -z "${PY_FILES}" ]
|
||||
then
|
||||
if [ "$REUSE_COVERAGE_REPORT" == "0" ]; then
|
||||
coverage run --branch --source mmocr -m pytest tests/
|
||||
fi
|
||||
coverage report --fail-under 90 -m $PY_FILES
|
||||
interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-magic --ignore-regex "__repr__" --fail-under 95 $PY_FILES
|
||||
fi
|
|
@ -14,22 +14,22 @@ appearance, race, religion, or sexual identity and orientation.
|
|||
Examples of behavior that contributes to creating a positive environment
|
||||
include:
|
||||
|
||||
* Using welcoming and inclusive language
|
||||
* Being respectful of differing viewpoints and experiences
|
||||
* Gracefully accepting constructive criticism
|
||||
* Focusing on what is best for the community
|
||||
* Showing empathy towards other community members
|
||||
- Using welcoming and inclusive language
|
||||
- Being respectful of differing viewpoints and experiences
|
||||
- Gracefully accepting constructive criticism
|
||||
- Focusing on what is best for the community
|
||||
- Showing empathy towards other community members
|
||||
|
||||
Examples of unacceptable behavior by participants include:
|
||||
|
||||
* The use of sexualized language or imagery and unwelcome sexual attention or
|
||||
advances
|
||||
* Trolling, insulting/derogatory comments, and personal or political attacks
|
||||
* Public or private harassment
|
||||
* Publishing others' private information, such as a physical or electronic
|
||||
address, without explicit permission
|
||||
* Other conduct which could reasonably be considered inappropriate in a
|
||||
professional setting
|
||||
- The use of sexualized language or imagery and unwelcome sexual attention or
|
||||
advances
|
||||
- Trolling, insulting/derogatory comments, and personal or political attacks
|
||||
- Public or private harassment
|
||||
- Publishing others' private information, such as a physical or electronic
|
||||
address, without explicit permission
|
||||
- Other conduct which could reasonably be considered inappropriate in a
|
||||
professional setting
|
||||
|
||||
## Our Responsibilities
|
||||
|
||||
|
@ -70,7 +70,7 @@ members of the project's leadership.
|
|||
This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
|
||||
available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
|
||||
|
||||
[homepage]: https://www.contributor-covenant.org
|
||||
|
||||
For answers to common questions about this code of conduct, see
|
||||
https://www.contributor-covenant.org/faq
|
||||
|
||||
[homepage]: https://www.contributor-covenant.org
|
||||
|
|
|
@ -1,224 +1 @@
|
|||
# Contributing to MMOCR
|
||||
|
||||
All kinds of contributions are welcome, including but not limited to the following.
|
||||
|
||||
- Fixes (typo, bugs)
|
||||
- New features and components
|
||||
|
||||
Contents
|
||||
|
||||
- [Contributing to MMOCR](#contributing-to-mmocr)
|
||||
- [Workflow](#workflow)
|
||||
- [Main Steps](#main-steps)
|
||||
- [Detailed Steps](#detailed-steps)
|
||||
- [Step 1: Create a Fork](#step-1-create-a-fork)
|
||||
- [Step 2: Develop a new feature](#step-2-develop-a-new-feature)
|
||||
- [Step 2.1: Keep your fork up to date](#step-21-keep-your-fork-up-to-date)
|
||||
- [Step 2.2: Create a feature branch](#step-22-create-a-feature-branch)
|
||||
- [Step 3: Commit your changes](#step-3-commit-your-changes)
|
||||
- [Step 4: Prepare to Pull Request](#step-4-prepare-to-pull-request)
|
||||
- [Step 4.1: Merge official repo updates to your fork](#step-41-merge-official-repo-updates-to-your-fork)
|
||||
- [Step 4.2: Push <your_feature_branch> branch to your remote forked repo,](#step-42-push-your_feature_branch-branch-to-your-remote-forked-repo)
|
||||
- [Step 4.3: Create a Pull Request](#step-43-create-a-pull-request)
|
||||
- [Step 4.4: Review code](#step-44-review-code)
|
||||
- [Step 4.5: Revise <your_feature_branch> (optional)](#step-45-revise-your_feature_branch--optional)
|
||||
- [Step 4.6: Delete <your_feature_branch> branch if your PR is accepted.](#step-46-delete-your_feature_branch-branch-if-your-pr-is-accepted)
|
||||
- [Code style](#code-style)
|
||||
- [Python](#python)
|
||||
- [Installing pre-commit hooks](#installing-pre-commit-hooks)
|
||||
- [Prerequisite](#prerequisite)
|
||||
- [Installation](#installation)
|
||||
- [C++ and CUDA](#c-and-cuda)
|
||||
|
||||
## Workflow
|
||||
### Main Steps
|
||||
|
||||
1. Fork and pull the latest MMOCR
|
||||
2. Checkout a new branch (do not use main branch for PRs)
|
||||
3. Commit your changes
|
||||
4. Create a PR
|
||||
|
||||
**Note**
|
||||
|
||||
- If you plan to add some new features that involve large changes, it is encouraged to open an issue for discussion first.
|
||||
- If you are the author of some papers and would like to include your method to MMOCR, please let us know (open an issue or contact the maintainers). We will much appreciate your contribution.
|
||||
- For new features and new modules, unit tests are required to improve the code's robustness.
|
||||
|
||||
### Detailed Steps
|
||||
|
||||
The official public [repository](https://github.com/open-mmlab/mmocr) holds only one branch with an infinite lifetime: *main*
|
||||
|
||||
The *main* branch is the main branch where the source code of **HEAD** always reflects a state with the latest development changes for the next release.
|
||||
|
||||
Feature branches are used to develop new features for the upcoming or a distant future release.
|
||||
|
||||
All new developers to **MMOCR** need to follow the following steps:
|
||||
|
||||
#### Step 1: Create a Fork
|
||||
|
||||
1. Fork the repo on GitHub or GitLab to your personal account. Click the `Fork` button on the [project page](https://github.com/open-mmlab/mmocr).
|
||||
|
||||
2. Clone your new forked repo to your computer.
|
||||
```
|
||||
git clone https://github.com/<your name>/mmocr.git
|
||||
```
|
||||
3. Add the official repo as an upstream:
|
||||
```
|
||||
git remote add upstream https://github.com/open-mmlab/mmocr.git
|
||||
```
|
||||
|
||||
#### Step 2: Develop a new feature
|
||||
|
||||
##### Step 2.1: Keep your fork up to date
|
||||
|
||||
Whenever you want to update your fork with the latest upstream changes, you need to fetch the upstream repo's branches and latest commits to bring them into your repository:
|
||||
|
||||
```
|
||||
# Fetch from upstream remote
|
||||
git fetch upstream
|
||||
|
||||
# Update your main branch
|
||||
git checkout main
|
||||
git rebase upstream/main
|
||||
git push origin main
|
||||
```
|
||||
|
||||
##### Step 2.2: Create a feature branch
|
||||
- Create an issue on [github](https://github.com/open-mmlab/mmocr)
|
||||
|
||||
- Create a feature branch
|
||||
-
|
||||
```bash
|
||||
git checkout -b feature/iss_<index> main
|
||||
# index is the issue index on github above
|
||||
```
|
||||
|
||||
#### Step 3: Commit your changes
|
||||
|
||||
Develop your new feature and test it to make sure it works well, then commit.
|
||||
|
||||
If you have not configured pre-commit hooks for MMOCR, please [install pre-commit hooks](#installing-pre-commit-hooks) before your first commit.
|
||||
|
||||
The commit message is suggested to be clear. Here is an example:
|
||||
|
||||
```bash
|
||||
git commit -m "fix #<issue_index>: <commit_message>"
|
||||
```
|
||||
|
||||
#### Step 4: Prepare to Pull Request
|
||||
|
||||
- Before creating an PR, please run
|
||||
|
||||
```bash
|
||||
pre-commit run --all-files
|
||||
pytest tests
|
||||
```
|
||||
|
||||
and fix all failures.
|
||||
|
||||
- Make sure to link your pull request to the related issue. Please refer to the [instructon](https://docs.github.com/en/github/managing-your-work-on-github/linking-a-pull-request-to-an-issue)
|
||||
|
||||
|
||||
##### Step 4.1: Merge official repo updates to your fork
|
||||
|
||||
```
|
||||
# fetch from upstream remote. i.e., the official repo
|
||||
git fetch upstream
|
||||
|
||||
# update the main branch of your fork
|
||||
git checkout main
|
||||
git rebase upstream/main
|
||||
git push origin main
|
||||
|
||||
# update the <your_feature_branch> branch
|
||||
git checkout <your_feature_branch>
|
||||
git rebase main
|
||||
# solve conflicts if any and Test
|
||||
```
|
||||
|
||||
##### Step 4.2: Push <your_feature_branch> branch to your remote forked repo,
|
||||
```
|
||||
git checkout <your_feature_branch>
|
||||
git push origin <your_feature_branch>
|
||||
```
|
||||
##### Step 4.3: Create a Pull Request
|
||||
|
||||
Go to the page for your fork on GitHub, select your new feature branch, and click the pull request button to integrate your feature branch into the upstream remote’s develop branch.
|
||||
|
||||
##### Step 4.4: Review code
|
||||
|
||||
|
||||
##### Step 4.5: Revise <your_feature_branch> (optional)
|
||||
If PR is not accepted, pls follow steps above till your PR is accepted.
|
||||
|
||||
##### Step 4.6: Delete <your_feature_branch> branch if your PR is accepted.
|
||||
```
|
||||
git branch -d <your_feature_branch>
|
||||
git push origin :<your_feature_branch>
|
||||
```
|
||||
|
||||
|
||||
## Code style
|
||||
### Python
|
||||
We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style.
|
||||
|
||||
We use the following tools for linting and formatting:
|
||||
|
||||
- [flake8](http://flake8.pycqa.org/en/latest/): linter
|
||||
- [yapf](https://github.com/google/yapf): formatter
|
||||
- [isort](https://github.com/timothycrosley/isort): sort imports
|
||||
|
||||
Style configurations of yapf and isort can be found in [setup.cfg](../setup.cfg).
|
||||
|
||||
We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`,
|
||||
fixes `end-of-files`, sorts `requirments.txt` automatically on every commit.
|
||||
The config for a pre-commit hook is stored in [.pre-commit-config](../.pre-commit-config.yaml).
|
||||
|
||||
#### Installing pre-commit hooks
|
||||
|
||||
##### Prerequisite
|
||||
|
||||
Make sure Ruby runs on your system.
|
||||
|
||||
On Windows: Install Ruby from [the official website](https://rubyinstaller.org/).
|
||||
|
||||
On Debian/Ubuntu:
|
||||
|
||||
```shell
|
||||
sudo apt-add-repository ppa:brightbox/ruby-ng -y
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ruby2.7
|
||||
```
|
||||
|
||||
On other Linux distributions:
|
||||
|
||||
```shell
|
||||
# install rvm
|
||||
curl -L https://get.rvm.io | bash -s -- --autolibs=read-fail
|
||||
[[ -s "$HOME/.rvm/scripts/rvm" ]] && source "$HOME/.rvm/scripts/rvm"
|
||||
rvm autolibs disable
|
||||
# install ruby
|
||||
rvm install 2.7.1
|
||||
```
|
||||
|
||||
##### Installation
|
||||
|
||||
After you clone the repository, you will need to install and initialize pre-commit hook.
|
||||
|
||||
```shell
|
||||
pip install -U pre-commit
|
||||
```
|
||||
|
||||
|
||||
From the repository folder
|
||||
|
||||
```shell
|
||||
pre-commit install
|
||||
```
|
||||
|
||||
After this on every commit check code linters and formatter will be enforced.
|
||||
|
||||
>Before you create a PR, make sure that your code lints and is formatted by yapf.
|
||||
|
||||
### C++ and CUDA
|
||||
We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
|
||||
We appreciate all contributions to improve MMOCR. Please read [Contribution Guide](/docs/en/notes/contribution_guide.md) for step-by-step instructions to make a contribution to MMOCR, and [CONTRIBUTING.md](https://github.com/open-mmlab/mmcv/blob/master/CONTRIBUTING.md) in MMCV for more details about the contributing guideline.
|
||||
|
|
|
@ -0,0 +1,121 @@
|
|||
name: "🐞 Bug report"
|
||||
description: "Create a report to help us reproduce and fix the bug"
|
||||
labels: kind/bug
|
||||
title: "[Bug] "
|
||||
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Note
|
||||
For general usage questions or idea discussions, please post it to our [**Forum**](https://github.com/open-mmlab/mmocr/discussions)
|
||||
If this issue is about installing MMCV, please file an issue at [MMCV](https://github.com/open-mmlab/mmcv/issues/new/choose).
|
||||
If it's anything about model deployment, please raise it to [MMDeploy](https://github.com/open-mmlab/mmdeploy)
|
||||
|
||||
Please fill in as **much** of the following form as you're able to. **The clearer the description, the shorter it will take to solve it.**
|
||||
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Prerequisite
|
||||
description: Please check the following items before creating a new issue.
|
||||
options:
|
||||
- label: I have searched [Issues](https://github.com/open-mmlab/mmocr/issues) and [Discussions](https://github.com/open-mmlab/mmocr/discussions) but cannot get the expected help.
|
||||
required: true
|
||||
# - label: I have read the [FAQ documentation](https://mmocr.readthedocs.io/en/1.x/notes/4_faq.html) but cannot get the expected help.
|
||||
# required: true
|
||||
- label: The bug has not been fixed in the [latest version (0.x)](https://github.com/open-mmlab/mmocr) or [latest version (1.x)](https://github.com/open-mmlab/mmocr/tree/dev-1.x).
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: task
|
||||
attributes:
|
||||
label: Task
|
||||
description: The problem arises when
|
||||
options:
|
||||
- I'm using the official example scripts/configs for the officially supported tasks/models/datasets.
|
||||
- I have modified the scripts/configs, or I'm working on my own tasks/models/datasets.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: dropdown
|
||||
id: branch
|
||||
attributes:
|
||||
label: Branch
|
||||
description: The problem arises when I'm working on
|
||||
options:
|
||||
- main branch https://github.com/open-mmlab/mmocr
|
||||
- 1.x branch https://github.com/open-mmlab/mmocr/tree/dev-1.x
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Environment
|
||||
description: |
|
||||
Please run `python mmocr/utils/collect_env.py` to collect necessary environment information and copy-paste it here.
|
||||
You may add additional information that may be helpful for locating the problem, such as
|
||||
- How you installed PyTorch \[e.g., pip, conda, source\]
|
||||
- Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Reproduces the problem - code sample
|
||||
description: |
|
||||
Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
|
||||
placeholder: |
|
||||
```python
|
||||
# Sample code to reproduce the problem
|
||||
```
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Reproduces the problem - command or script
|
||||
description: |
|
||||
What command or script did you run?
|
||||
placeholder: |
|
||||
```shell
|
||||
The command or script you run.
|
||||
```
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Reproduces the problem - error message
|
||||
description: |
|
||||
Please provide the error message or logs you got, with the full traceback.
|
||||
|
||||
Tip: You can attach images or log files by dragging them into the text area..
|
||||
placeholder: |
|
||||
```
|
||||
The error message or logs you got, with the full traceback.
|
||||
```
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Additional information
|
||||
description: |
|
||||
Tell us anything else you think we should know.
|
||||
|
||||
Tip: You can attach images or log files by dragging them into the text area.
|
||||
placeholder: |
|
||||
1. What's your expected result?
|
||||
2. What dataset did you use?
|
||||
3. What do you think might be the reason?
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Acknowledgement
|
||||
Thanks for taking the time to fill out this report.
|
||||
|
||||
If you have already identified the reason, we strongly appreciate you creating a new PR to fix it [**Here**](https://github.com/open-mmlab/mmocr/pulls)!
|
||||
Please refer to [**Contribution Guide**](https://mmocr.readthedocs.io/en/dev-1.x/notes/contribution_guide.html) for contributing.
|
||||
|
||||
Welcome to join our [**Community**](https://mmocr.readthedocs.io/en/latest/contact.html) to discuss together. 👬
|
|
@ -0,0 +1,39 @@
|
|||
name: 🚀 Feature request
|
||||
description: Suggest an idea for this project
|
||||
labels: [feature-request]
|
||||
title: "[Feature] "
|
||||
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Note
|
||||
For general usage questions or idea discussions, please post it to our [**Forum**](https://github.com/open-mmlab/mmocr/discussions)
|
||||
|
||||
Please fill in as **much** of the following form as you're able to. **The clearer the description, the shorter it will take to solve it.**
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: What is the feature?
|
||||
description: Tell us more about the feature and how this feature can help.
|
||||
placeholder: |
|
||||
E.g., It is inconvenient when \[....\].
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Any other context?
|
||||
description: |
|
||||
Have you considered any alternative solutions or features? If so, what are they? Also, feel free to add any other context or screenshots about the feature request here.
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Acknowledgement
|
||||
Thanks for taking the time to fill out this report.
|
||||
|
||||
We strongly appreciate you creating a new PR to implement it [**Here**](https://github.com/open-mmlab/mmocr/pulls)!
|
||||
Please refer to [**Contribution Guide**](https://mmocr.readthedocs.io/en/dev-1.x/notes/contribution_guide.html) for contributing.
|
||||
|
||||
Welcome to join our [**Community**](https://mmocr.readthedocs.io/en/latest/contact.html) to discuss together. 👬
|
|
@ -0,0 +1,51 @@
|
|||
name: "\U0001F31F New model/dataset/scheduler addition"
|
||||
description: Submit a proposal/request to implement a new model / dataset / scheduler
|
||||
labels: [ "feature-request" ]
|
||||
title: "[New Models] "
|
||||
|
||||
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Note
|
||||
For general usage questions or idea discussions, please post it to our [**Forum**](https://github.com/open-mmlab/mmocr/discussions)
|
||||
|
||||
Please fill in as **much** of the following form as you're able to. **The clearer the description, the shorter it will take to solve it.**
|
||||
|
||||
- type: textarea
|
||||
id: description-request
|
||||
validations:
|
||||
required: true
|
||||
attributes:
|
||||
label: Model/Dataset/Scheduler description
|
||||
description: |
|
||||
Put any and all important information relative to the model/dataset/scheduler
|
||||
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Open source status
|
||||
description: |
|
||||
Please provide the open-source status, which would be very helpful
|
||||
options:
|
||||
- label: "The model implementation is available"
|
||||
- label: "The model weights are available."
|
||||
|
||||
- type: textarea
|
||||
id: additional-info
|
||||
attributes:
|
||||
label: Provide useful links for the implementation
|
||||
description: |
|
||||
Please provide information regarding the implementation, the weights, and the authors.
|
||||
Please mention the authors by @gh-username if you're aware of their usernames.
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Acknowledgement
|
||||
Thanks for taking the time to fill out this report.
|
||||
|
||||
We strongly appreciate you creating a new PR to implement it [**Here**](https://github.com/open-mmlab/mmocr/pulls)!
|
||||
Please refer to [**Contribution Guide**](https://mmocr.readthedocs.io/en/dev-1.x/notes/contribution_guide.html) for contributing.
|
||||
|
||||
Welcome to join our [**Community**](https://mmocr.readthedocs.io/en/latest/contact.html) to discuss together. 👬
|
|
@ -0,0 +1,48 @@
|
|||
name: 📚 Documentation
|
||||
description: Report an issue related to the documentation.
|
||||
labels: "docs"
|
||||
title: "[Docs] "
|
||||
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Note
|
||||
For general usage questions or idea discussions, please post it to our [**Forum**](https://github.com/open-mmlab/mmocr/discussions)
|
||||
Please fill in as **much** of the following form as you're able to. **The clearer the description, the shorter it will take to solve it.**
|
||||
|
||||
- type: dropdown
|
||||
id: branch
|
||||
attributes:
|
||||
label: Branch
|
||||
description: This issue is related to the
|
||||
options:
|
||||
- master branch https://mmocr.readthedocs.io/en/latest/
|
||||
- 1.x branch https://mmocr.readthedocs.io/en/dev-1.x/
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: 📚 The doc issue
|
||||
description: >
|
||||
A clear and concise description the issue.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Suggest a potential alternative/fix
|
||||
description: >
|
||||
Tell us how we could improve the documentation in this regard.
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
## Acknowledgement
|
||||
Thanks for taking the time to fill out this report.
|
||||
|
||||
If you have already identified the reason, we strongly appreciate you creating a new PR to fix it [**here**](https://github.com/open-mmlab/mmocr/pulls)!
|
||||
Please refer to [**Contribution Guide**](https://mmocr.readthedocs.io/en/dev-1.x/notes/contribution_guide.html) for contributing.
|
||||
|
||||
Welcome to join our [**Community**](https://mmocr.readthedocs.io/en/latest/contact.html) to discuss together. 👬
|
|
@ -1,6 +1,12 @@
|
|||
blank_issues_enabled: false
|
||||
|
||||
contact_links:
|
||||
- name: MMOCR Documentation
|
||||
url: https://mmocr.readthedocs.io/en/latest/
|
||||
about: Check if your question is answered in docs
|
||||
- name: ❔ FAQ
|
||||
url: https://mmocr.readthedocs.io/en/dev-1.x/get_started/faq.html
|
||||
about: Is your question frequently asked?
|
||||
- name: 💬 Forum
|
||||
url: https://github.com/open-mmlab/mmocr/discussions
|
||||
about: Ask general usage questions and discuss with other MMOCR community members
|
||||
- name: 🌐 Explore OpenMMLab
|
||||
url: https://openmmlab.com/
|
||||
about: Get know more about OpenMMLab
|
||||
|
|
|
@ -1,46 +0,0 @@
|
|||
---
|
||||
name: Error report
|
||||
about: Create a report to help us improve
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
Thanks for your error report and we appreciate it a lot.
|
||||
|
||||
**Checklist**
|
||||
|
||||
1. I have searched related issues but cannot get the expected help.
|
||||
2. The bug has not been fixed in the latest version.
|
||||
|
||||
**Describe the bug**
|
||||
A clear and concise description of what the bug is.
|
||||
|
||||
**Reproduction**
|
||||
|
||||
1. What command or script did you run?
|
||||
|
||||
```none
|
||||
A placeholder for the command.
|
||||
```
|
||||
|
||||
2. Did you make any modifications on the code or config? Did you understand what you have modified?
|
||||
3. What dataset did you use?
|
||||
|
||||
**Environment**
|
||||
|
||||
1. Please run `python mmocr/utils/collect_env.py` to collect necessary environment information and paste it here.
|
||||
2. You may add addition that may be helpful for locating the problem, such as
|
||||
- How you installed PyTorch [e.g., pip, conda, source]
|
||||
- Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
|
||||
|
||||
**Error traceback**
|
||||
If applicable, paste the error traceback here.
|
||||
|
||||
```none
|
||||
A placeholder for traceback.
|
||||
```
|
||||
|
||||
**Bug fix**
|
||||
If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!
|
|
@ -1,22 +0,0 @@
|
|||
---
|
||||
name: Feature request
|
||||
about: Suggest an idea for this project
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Describe the feature**
|
||||
|
||||
**Motivation**
|
||||
A clear and concise description of the motivation of the feature.
|
||||
Ex1. It is inconvenient when [....].
|
||||
Ex2. There is a recent paper [....], which is very helpful for [....].
|
||||
|
||||
**Related resources**
|
||||
If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful.
|
||||
|
||||
**Additional context**
|
||||
Add any other context or screenshots about the feature request here.
|
||||
If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated.
|
|
@ -1,8 +0,0 @@
|
|||
---
|
||||
name: General questions
|
||||
about: Ask general questions to get help
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
|
@ -1,68 +0,0 @@
|
|||
---
|
||||
name: Reimplementation Questions
|
||||
about: Ask about questions during model reimplementation
|
||||
title: ''
|
||||
labels: 'reimplementation'
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
**Notice**
|
||||
|
||||
There are several common situations in the reimplementation issues as below
|
||||
|
||||
1. Reimplement a model in the model zoo using the provided configs
|
||||
2. Reimplement a model in the model zoo on other dataset (e.g., custom datasets)
|
||||
3. Reimplement a custom model but all the components are implemented in MMOCR
|
||||
4. Reimplement a custom model with new modules implemented by yourself
|
||||
|
||||
There are several things to do for different cases as below.
|
||||
|
||||
- For case 1 & 3, please follow the steps in the following sections thus we could help to quick identify the issue.
|
||||
- For case 2 & 4, please understand that we are not able to do much help here because we usually do not know the full code and the users should be responsible to the code they write.
|
||||
- One suggestion for case 2 & 4 is that the users should first check whether the bug lies in the self-implemented code or the original code. For example, users can first make sure that the same model runs well on supported datasets. If you still need help, please describe what you have done and what you obtain in the issue, and follow the steps in the following sections and try as clear as possible so that we can better help you.
|
||||
|
||||
**Checklist**
|
||||
|
||||
1. I have searched related issues but cannot get the expected help.
|
||||
2. The issue has not been fixed in the latest version.
|
||||
|
||||
**Describe the issue**
|
||||
|
||||
A clear and concise description of what the problem you meet and what have you done.
|
||||
|
||||
**Reproduction**
|
||||
|
||||
1. What command or script did you run?
|
||||
|
||||
```none
|
||||
A placeholder for the command.
|
||||
```
|
||||
|
||||
2. What config dir you run?
|
||||
|
||||
```none
|
||||
A placeholder for the config.
|
||||
```
|
||||
|
||||
3. Did you make any modifications on the code or config? Did you understand what you have modified?
|
||||
4. What dataset did you use?
|
||||
|
||||
**Environment**
|
||||
|
||||
1. Please run `python mmocr/utils/collect_env.py` to collect necessary environment information and paste it here.
|
||||
2. You may add addition that may be helpful for locating the problem, such as
|
||||
1. How you installed PyTorch [e.g., pip, conda, source]
|
||||
2. Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
|
||||
|
||||
**Results**
|
||||
|
||||
If applicable, paste the related results here, e.g., what you expect and what you get.
|
||||
|
||||
```none
|
||||
A placeholder for results comparison
|
||||
```
|
||||
|
||||
**Issue fix**
|
||||
|
||||
If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!
|
|
@ -17,9 +17,6 @@ jobs:
|
|||
python-version: 3.7
|
||||
- name: Install pre-commit hook
|
||||
run: |
|
||||
sudo apt-add-repository ppa:brightbox/ruby-ng -y
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ruby2.7
|
||||
pip install pre-commit
|
||||
pre-commit install
|
||||
- name: Linting
|
||||
|
@ -27,4 +24,4 @@ jobs:
|
|||
- name: Check docstring coverage
|
||||
run: |
|
||||
pip install interrogate
|
||||
interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-regex "__repr__" --fail-under 50 mmocr
|
||||
interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-regex "__repr__" --fail-under 90 mmocr
|
||||
|
|
|
@ -9,8 +9,9 @@ on:
|
|||
- 'demo/**'
|
||||
- '.dev_scripts/**'
|
||||
- '.circleci/**'
|
||||
- 'projects/**'
|
||||
branches:
|
||||
- main
|
||||
- dev-1.x
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
|
@ -18,33 +19,34 @@ concurrency:
|
|||
|
||||
jobs:
|
||||
build_cpu_py:
|
||||
runs-on: ubuntu-18.04
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.6, 3.8, 3.9]
|
||||
python-version: [3.8, 3.9]
|
||||
torch: [1.8.1]
|
||||
include:
|
||||
- torch: 1.8.1
|
||||
torchvision: 0.9.1
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Get MMCV_TORCH as the environment variable
|
||||
run: . .github/workflows/scripts/get_mmcv_var.sh ${{matrix.torch}}
|
||||
shell: bash
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||
- name: Install MMEngine
|
||||
run: pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
- name: Install MMCV
|
||||
run: pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cpu/torch${MMCV_TORCH}/index.html
|
||||
run: |
|
||||
pip install -U openmim
|
||||
mim install 'mmcv >= 2.0.0rc1'
|
||||
- name: Install MMDet
|
||||
run: pip install mmdet
|
||||
run: pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
- name: Install other dependencies
|
||||
run: pip install -r requirements.txt
|
||||
run: pip install -r requirements/tests.txt
|
||||
- name: Build and install
|
||||
run: rm -rf .eggs && pip install -e .
|
||||
- name: Run unittests and generate coverage report
|
||||
|
@ -54,14 +56,12 @@ jobs:
|
|||
coverage report -m
|
||||
|
||||
build_cpu_pt:
|
||||
runs-on: ubuntu-18.04
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
torch: [1.5.1, 1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.1, 1.11.0]
|
||||
torch: [1.6.0, 1.7.1, 1.8.1, 1.9.1, 1.10.1, 1.11.0, 1.12.1, 1.13.0]
|
||||
include:
|
||||
- torch: 1.5.1
|
||||
torchvision: 0.6.1
|
||||
- torch: 1.6.0
|
||||
torchvision: 0.7.0
|
||||
- torch: 1.7.1
|
||||
|
@ -74,25 +74,33 @@ jobs:
|
|||
torchvision: 0.11.2
|
||||
- torch: 1.11.0
|
||||
torchvision: 0.12.0
|
||||
- torch: 1.12.1
|
||||
torchvision: 0.13.1
|
||||
- torch: 1.13.0
|
||||
torchvision: 0.14.0
|
||||
- torch: 2.0.0
|
||||
torchvision: 0.15.1
|
||||
python-version: 3.8
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Get MMCV_TORCH as the environment variable
|
||||
run: . .github/workflows/scripts/get_mmcv_var.sh ${{matrix.torch}}
|
||||
shell: bash
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||
- name: Install MMEngine
|
||||
run: pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
- name: Install MMCV
|
||||
run: pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cpu/torch${MMCV_TORCH}/index.html
|
||||
run: |
|
||||
pip install -U openmim
|
||||
mim install 'mmcv >= 2.0.0rc1'
|
||||
- name: Install MMDet
|
||||
run: pip install mmdet
|
||||
run: pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
- name: Install other dependencies
|
||||
run: pip install -r requirements.txt
|
||||
run: pip install -r requirements/tests.txt
|
||||
- name: Build and install
|
||||
run: rm -rf .eggs && pip install -e .
|
||||
- name: Run unittests and generate coverage report
|
||||
|
@ -111,76 +119,42 @@ jobs:
|
|||
name: codecov-umbrella
|
||||
fail_ci_if_error: false
|
||||
|
||||
build_cu102:
|
||||
runs-on: ubuntu-18.04
|
||||
container:
|
||||
image: pytorch/pytorch:1.8.1-cuda10.2-cudnn7-devel
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
include:
|
||||
- torch: 1.8.1
|
||||
cuda: 10.2
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: python -m pip install pip --upgrade
|
||||
- name: Fetch GPG keys
|
||||
run: |
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
|
||||
- name: Get MMCV_TORCH and MMCV_CUDA as environment variables
|
||||
run: . .github/workflows/scripts/get_mmcv_var.sh ${{matrix.torch}} ${{matrix.cuda}}
|
||||
shell: bash
|
||||
- name: Install Python-dev
|
||||
run: apt-get update && apt-get install -y python${{matrix.python-version}}-dev
|
||||
if: ${{matrix.python-version != 3.9}}
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
apt-get update && apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6
|
||||
- name: Install mmocr dependencies
|
||||
run: |
|
||||
python -m pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/${MMCV_CUDA}/torch${MMCV_TORCH}}/index.html
|
||||
python -m pip install mmdet
|
||||
python -m pip install -r requirements.txt
|
||||
- name: Build and install
|
||||
run: |
|
||||
python setup.py check -m -s
|
||||
TORCH_CUDA_ARCH_LIST=7.0 python -m pip install -e .
|
||||
|
||||
build_windows:
|
||||
runs-on: ${{ matrix.os }}
|
||||
runs-on: windows-2022
|
||||
strategy:
|
||||
matrix:
|
||||
os: [windows-2022]
|
||||
python: [3.7]
|
||||
platform: [cpu, cu102]
|
||||
platform: [cpu, cu111]
|
||||
torch: [1.8.1]
|
||||
torchvision: [0.9.1]
|
||||
include:
|
||||
- python-version: 3.8
|
||||
platform: cu117
|
||||
torch: 2.0.0
|
||||
torchvision: 0.15.1
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
- name: Set up Python ${{ matrix.python }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
python-version: ${{ matrix.python }}
|
||||
- name: Upgrade pip
|
||||
run: python -m pip install pip --upgrade
|
||||
- name: Install Pillow
|
||||
run: python -m pip install Pillow
|
||||
run: python -m pip install --upgrade pip
|
||||
- name: Install lmdb
|
||||
run: python -m pip install lmdb
|
||||
run: pip install lmdb
|
||||
- name: Install PyTorch
|
||||
run: python -m pip install torch==1.8.1+${{matrix.platform}} torchvision==0.9.1+${{matrix.platform}} -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
|
||||
run: pip install torch==${{matrix.torch}}+${{matrix.platform}} torchvision==${{matrix.torchvision}}+${{matrix.platform}} -f https://download.pytorch.org/whl/${{matrix.platform}}/torch_stable.html
|
||||
- name: Install mmocr dependencies
|
||||
run: |
|
||||
python -m pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8/index.html --only-binary mmcv-full
|
||||
python -m pip install mmdet
|
||||
python -m pip install -r requirements.txt
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
pip install -U openmim
|
||||
mim install 'mmcv >= 2.0.0rc1'
|
||||
pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
pip install -r requirements/tests.txt
|
||||
- name: Build and install
|
||||
run: |
|
||||
python -m pip install -e .
|
||||
pip install -e .
|
||||
- name: Run unittests and generate coverage report
|
||||
run: |
|
||||
pytest tests/
|
||||
|
|
|
@ -9,6 +9,7 @@ on:
|
|||
- 'demo/**'
|
||||
- '.dev_scripts/**'
|
||||
- '.circleci/**'
|
||||
- 'projects/**'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
|
@ -16,7 +17,7 @@ concurrency:
|
|||
|
||||
jobs:
|
||||
build_cpu:
|
||||
runs-on: ubuntu-18.04
|
||||
runs-on: ubuntu-22.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
|
@ -24,24 +25,25 @@ jobs:
|
|||
- torch: 1.8.1
|
||||
torchvision: 0.9.1
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Get MMCV_TORCH as the environment variable
|
||||
run: . .github/workflows/scripts/get_mmcv_var.sh ${{matrix.torch}}
|
||||
shell: bash
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/cpu/torch_stable.html
|
||||
- name: Install MMEngine
|
||||
run: pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
- name: Install MMCV
|
||||
run: pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cpu/torch${MMCV_TORCH}/index.html
|
||||
run: |
|
||||
pip install -U openmim
|
||||
mim install 'mmcv >= 2.0.0rc1'
|
||||
- name: Install MMDet
|
||||
run: pip install mmdet
|
||||
run: pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
- name: Install other dependencies
|
||||
run: pip install -r requirements.txt
|
||||
run: pip install -r requirements/tests.txt
|
||||
- name: Build and install
|
||||
run: rm -rf .eggs && pip install -e .
|
||||
- name: Run unittests and generate coverage report
|
||||
|
@ -59,74 +61,42 @@ jobs:
|
|||
name: codecov-umbrella
|
||||
fail_ci_if_error: false
|
||||
|
||||
build_cu102:
|
||||
runs-on: ubuntu-18.04
|
||||
container:
|
||||
image: pytorch/pytorch:1.8.1-cuda10.2-cudnn7-devel
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: python -m pip install pip --upgrade
|
||||
- name: Fetch GPG keys
|
||||
run: |
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub
|
||||
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
|
||||
- name: Get MMCV_TORCH and MMCV_CUDA as environment variables
|
||||
run: . .github/workflows/scripts/get_mmcv_var.sh ${{matrix.torch}} ${{matrix.cuda}}
|
||||
shell: bash
|
||||
- name: Install Python-dev
|
||||
run: apt-get update && apt-get install -y python${{matrix.python-version}}-dev
|
||||
if: ${{matrix.python-version != 3.9}}
|
||||
- name: Install system dependencies
|
||||
run: |
|
||||
apt-get update
|
||||
apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libxrender-dev
|
||||
- name: Install mmocr dependencies
|
||||
run: |
|
||||
python -m pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/${MMCV_CUDA}/torch${MMCV_TORCH}/index.html
|
||||
python -m pip install mmdet
|
||||
python -m pip install -r requirements.txt
|
||||
- name: Build and install
|
||||
run: |
|
||||
python setup.py check -m -s
|
||||
TORCH_CUDA_ARCH_LIST=7.0 python -m pip install -e .
|
||||
|
||||
build_windows:
|
||||
runs-on: ${{ matrix.os }}
|
||||
runs-on: windows-2022
|
||||
strategy:
|
||||
matrix:
|
||||
os: [windows-2022]
|
||||
python: [3.7]
|
||||
platform: [cpu, cu102]
|
||||
platform: [cpu, cu111]
|
||||
torch: [1.8.1]
|
||||
torchvision: [0.9.1]
|
||||
include:
|
||||
- python-version: 3.8
|
||||
platform: cu117
|
||||
torch: 2.0.0
|
||||
torchvision: 0.15.1
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
python-version: ${{ matrix.python }}
|
||||
- name: Upgrade pip
|
||||
run: python -m pip install pip --upgrade
|
||||
- name: Install Pillow
|
||||
run: python -m pip install Pillow
|
||||
run: python -m pip install --upgrade pip
|
||||
- name: Install lmdb
|
||||
run: python -m pip install lmdb
|
||||
run: pip install lmdb
|
||||
- name: Install PyTorch
|
||||
run: python -m pip install torch==1.8.1+${{matrix.platform}} torchvision==0.9.1+${{matrix.platform}} -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
|
||||
run: pip install torch==${{matrix.torch}}+${{matrix.platform}} torchvision==${{matrix.torchvision}}+${{matrix.platform}} -f https://download.pytorch.org/whl/${{matrix.platform}}/torch_stable.html
|
||||
- name: Install mmocr dependencies
|
||||
run: |
|
||||
python -m pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8/index.html --only-binary mmcv-full
|
||||
python -m pip install mmdet
|
||||
python -m pip install -r requirements.txt
|
||||
pip install git+https://github.com/open-mmlab/mmengine.git@main
|
||||
pip install -U openmim
|
||||
mim install 'mmcv >= 2.0.0rc1'
|
||||
pip install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
|
||||
pip install -r requirements/tests.txt
|
||||
- name: Build and install
|
||||
run: |
|
||||
python -m pip install -e .
|
||||
pip install -e .
|
||||
- name: Run unittests and generate coverage report
|
||||
run: |
|
||||
pytest tests/
|
||||
|
|
|
@ -1,19 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
TORCH=$1
|
||||
CUDA=$2
|
||||
|
||||
# 10.2 -> cu102
|
||||
MMCV_CUDA="cu`echo ${CUDA} | tr -d '.'`"
|
||||
|
||||
# MMCV only provides pre-compiled packages for torch 1.x.0
|
||||
# which works for any subversions of torch 1.x.
|
||||
# We force the torch version to be 1.x.0 to ease package searching
|
||||
# and avoid unnecessary rebuild during MMCV's installation.
|
||||
TORCH_VER_ARR=(${TORCH//./ })
|
||||
TORCH_VER_ARR[2]=0
|
||||
printf -v MMCV_TORCH "%s." "${TORCH_VER_ARR[@]}"
|
||||
MMCV_TORCH=${MMCV_TORCH%?} # Remove the last dot
|
||||
|
||||
echo "MMCV_CUDA=${MMCV_CUDA}" >> $GITHUB_ENV
|
||||
echo "MMCV_TORCH=${MMCV_TORCH}" >> $GITHUB_ENV
|
|
@ -0,0 +1,44 @@
|
|||
name: test-mim
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- 'model-index.yml'
|
||||
- 'configs/**'
|
||||
|
||||
pull_request:
|
||||
paths:
|
||||
- 'model-index.yml'
|
||||
- 'configs/**'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build_cpu:
|
||||
runs-on: ubuntu-18.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
torch: [1.8.0]
|
||||
include:
|
||||
- torch: 1.8.0
|
||||
torch_version: torch1.8
|
||||
torchvision: 0.9.0
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Upgrade pip
|
||||
run: pip install pip --upgrade
|
||||
- name: Install PyTorch
|
||||
run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
|
||||
- name: Install openmim
|
||||
run: pip install openmim
|
||||
- name: Build and install
|
||||
run: rm -rf .eggs && mim install -e .
|
||||
- name: test commands of mim
|
||||
run: mim search mmocr
|
|
@ -67,6 +67,7 @@ instance/
|
|||
# Sphinx documentation
|
||||
docs/en/_build/
|
||||
docs/zh_cn/_build/
|
||||
docs/*/api/generated/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
@ -107,7 +108,7 @@ venv.bak/
|
|||
|
||||
# cython generated cpp
|
||||
!data/dict
|
||||
data/*
|
||||
/data
|
||||
.vscode
|
||||
.idea
|
||||
|
||||
|
@ -142,3 +143,4 @@ mmocr/.mim
|
|||
workdirs/
|
||||
.history/
|
||||
.dev/
|
||||
data/
|
||||
|
|
|
@ -6,6 +6,4 @@ assign:
|
|||
'*/1 * * * *'
|
||||
assignees:
|
||||
- gaotongxiao
|
||||
- xinke-wang
|
||||
- Mountchicken
|
||||
- Harold-lkk
|
||||
|
|
|
@ -1,27 +1,37 @@
|
|||
exclude: ^tests/data/
|
||||
repos:
|
||||
- repo: https://github.com/PyCQA/flake8
|
||||
rev: 4.0.1
|
||||
rev: 5.0.4
|
||||
hooks:
|
||||
- id: flake8
|
||||
- repo: https://github.com/PyCQA/isort
|
||||
rev: 5.10.1
|
||||
- repo: https://github.com/zhouzaida/isort
|
||||
rev: 5.12.1
|
||||
hooks:
|
||||
- id: isort
|
||||
- repo: https://github.com/pre-commit/mirrors-yapf
|
||||
rev: v0.30.0
|
||||
rev: v0.32.0
|
||||
hooks:
|
||||
- id: yapf
|
||||
- repo: https://github.com/codespell-project/codespell
|
||||
rev: v2.1.0
|
||||
rev: v2.2.1
|
||||
hooks:
|
||||
- id: codespell
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v3.1.0
|
||||
rev: v4.3.0
|
||||
hooks:
|
||||
- id: trailing-whitespace
|
||||
exclude: |
|
||||
(?x)^(
|
||||
dicts/|
|
||||
projects/.*?/dicts/
|
||||
)
|
||||
- id: check-yaml
|
||||
- id: end-of-file-fixer
|
||||
exclude: |
|
||||
(?x)^(
|
||||
dicts/|
|
||||
projects/.*?/dicts/
|
||||
)
|
||||
- id: requirements-txt-fixer
|
||||
- id: double-quote-string-fixer
|
||||
- id: check-merge-conflict
|
||||
|
@ -29,12 +39,17 @@ repos:
|
|||
args: ["--remove"]
|
||||
- id: mixed-line-ending
|
||||
args: ["--fix=lf"]
|
||||
- repo: https://github.com/markdownlint/markdownlint
|
||||
rev: v0.11.0
|
||||
- id: mixed-line-ending
|
||||
args: ["--fix=lf"]
|
||||
- repo: https://github.com/executablebooks/mdformat
|
||||
rev: 0.7.9
|
||||
hooks:
|
||||
- id: markdownlint
|
||||
args: ["-r", "~MD002,~MD013,~MD029,~MD033,~MD034",
|
||||
"-t", "allow_different_nesting"]
|
||||
- id: mdformat
|
||||
args: ["--number", "--table-width", "200"]
|
||||
additional_dependencies:
|
||||
- mdformat-openmmlab
|
||||
- mdformat_frontmatter
|
||||
- linkify-it-py
|
||||
- repo: https://github.com/myint/docformatter
|
||||
rev: v1.3.1
|
||||
hooks:
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
include requirements/*.txt
|
||||
include mmocr/.mim/model-index.yml
|
||||
include mmocr/.mim/dicts/*.txt
|
||||
recursive-include mmocr/.mim/configs *.py *.yml
|
||||
recursive-include mmocr/.mim/tools *.sh *.py
|
||||
|
|
187
README.md
187
README.md
|
@ -17,35 +17,78 @@
|
|||
</sup>
|
||||
</div>
|
||||
<div> </div>
|
||||
</div>
|
||||
|
||||
## Introduction
|
||||
|
||||
English | [简体中文](README_zh-CN.md)
|
||||
|
||||
[](https://github.com/open-mmlab/mmocr/actions)
|
||||
[](https://mmocr.readthedocs.io/en/latest/?badge=latest)
|
||||
[](https://mmocr.readthedocs.io/en/dev-1.x/?badge=dev-1.x)
|
||||
[](https://codecov.io/gh/open-mmlab/mmocr)
|
||||
[](https://github.com/open-mmlab/mmocr/blob/main/LICENSE)
|
||||
[](https://pypi.org/project/mmocr/)
|
||||
[](https://github.com/open-mmlab/mmocr/issues)
|
||||
[](https://github.com/open-mmlab/mmocr/issues)
|
||||
<a href="https://console.tiyaro.ai/explore?q=mmocr&pub=mmocr"> <img src="https://tiyaro-public-docs.s3.us-west-2.amazonaws.com/assets/try_on_tiyaro_badge.svg"></a>
|
||||
|
||||
[📘Documentation](https://mmocr.readthedocs.io/en/dev-1.x/) |
|
||||
[🛠️Installation](https://mmocr.readthedocs.io/en/dev-1.x/get_started/install.html) |
|
||||
[👀Model Zoo](https://mmocr.readthedocs.io/en/dev-1.x/modelzoo.html) |
|
||||
[🆕Update News](https://mmocr.readthedocs.io/en/dev-1.x/notes/changelog.html) |
|
||||
[🤔Reporting Issues](https://github.com/open-mmlab/mmocr/issues/new/choose)
|
||||
|
||||
</div>
|
||||
|
||||
<div align="center">
|
||||
|
||||
English | [简体中文](README_zh-CN.md)
|
||||
|
||||
</div>
|
||||
<div align="center">
|
||||
<a href="https://openmmlab.medium.com/" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219255827-67c1a27f-f8c5-46a9-811d-5e57448c61d1.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://discord.gg/raweFPmdzG" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218347213-c080267f-cbb6-443e-8532-8e1ed9a58ea9.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://twitter.com/OpenMMLab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346637-d30c8a0f-3eba-4699-8131-512fb06d46db.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://www.youtube.com/openmmlab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346691-ceb2116a-465a-40af-8424-9f30d2348ca9.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://space.bilibili.com/1293512903" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219026751-d7d14cce-a7c9-4e82-9942-8375fca65b99.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://www.zhihu.com/people/openmmlab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219026120-ba71e48b-6e94-4bd4-b4e9-b7d175b5e362.png" width="3%" alt="" /></a>
|
||||
</div>
|
||||
|
||||
## Latest Updates
|
||||
|
||||
**The default branch is now `main` and the code on the branch has been upgraded to v1.0.0. The old `main` branch (v0.6.3) code now exists on the `0.x` branch.** If you have been using the `main` branch and encounter upgrade issues, please read the [Migration Guide](https://mmocr.readthedocs.io/en/dev-1.x/migration/overview.html) and notes on [Branches](https://mmocr.readthedocs.io/en/dev-1.x/migration/branches.html) .
|
||||
|
||||
v1.0.0 was released in 2023-04-06. Major updates from 1.0.0rc6 include:
|
||||
|
||||
1. Support for SCUT-CTW1500, SynthText, and MJSynth datasets in Dataset Preparer
|
||||
2. Updated FAQ and documentation
|
||||
3. Deprecation of file_client_args in favor of backend_args
|
||||
4. Added a new MMOCR tutorial notebook
|
||||
|
||||
To know more about the updates in MMOCR 1.0, please refer to [What's New in MMOCR 1.x](https://mmocr.readthedocs.io/en/dev-1.x/migration/news.html), or
|
||||
Read [Changelog](https://mmocr.readthedocs.io/en/dev-1.x/notes/changelog.html) for more details!
|
||||
|
||||
## Introduction
|
||||
|
||||
MMOCR is an open-source toolbox based on PyTorch and mmdetection for text detection, text recognition, and the corresponding downstream tasks including key information extraction. It is part of the [OpenMMLab](https://openmmlab.com/) project.
|
||||
|
||||
The main branch works with **PyTorch 1.6+**.
|
||||
|
||||
Documentation: https://mmocr.readthedocs.io/en/latest/.
|
||||
|
||||
<div align="left">
|
||||
<img src="resources/illustration.jpg"/>
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/24622904/187838618-1fdc61c0-2d46-49f9-8502-976ffdf01f28.png"/>
|
||||
</div>
|
||||
|
||||
### Major Features
|
||||
|
||||
- **Comprehensive Pipeline**
|
||||
|
||||
The toolbox supports not only text detection and text recognition, but also their downstream tasks such as key information extraction.
|
||||
The toolbox supports not only text detection and text recognition, but also their downstream tasks such as key information extraction.
|
||||
|
||||
- **Multiple Models**
|
||||
|
||||
|
@ -53,16 +96,42 @@ Documentation: https://mmocr.readthedocs.io/en/latest/.
|
|||
|
||||
- **Modular Design**
|
||||
|
||||
The modular design of MMOCR enables users to define their own optimizers, data preprocessors, and model components such as backbones, necks and heads as well as losses. Please refer to [Getting Started](https://mmocr.readthedocs.io/en/latest/getting_started.html) for how to construct a customized model.
|
||||
The modular design of MMOCR enables users to define their own optimizers, data preprocessors, and model components such as backbones, necks and heads as well as losses. Please refer to [Overview](https://mmocr.readthedocs.io/en/dev-1.x/get_started/overview.html) for how to construct a customized model.
|
||||
|
||||
- **Numerous Utilities**
|
||||
|
||||
The toolbox provides a comprehensive set of utilities which can help users assess the performance of models. It includes visualizers which allow visualization of images, ground truths as well as predicted bounding boxes, and a validation tool for evaluating checkpoints during training. It also includes data converters to demonstrate how to convert your own data to the annotation files which the toolbox supports.
|
||||
|
||||
## [Model Zoo](https://mmocr.readthedocs.io/en/latest/modelzoo.html)
|
||||
## Installation
|
||||
|
||||
MMOCR depends on [PyTorch](https://pytorch.org/), [MMEngine](https://github.com/open-mmlab/mmengine), [MMCV](https://github.com/open-mmlab/mmcv) and [MMDetection](https://github.com/open-mmlab/mmdetection).
|
||||
Below are quick steps for installation.
|
||||
Please refer to [Install Guide](https://mmocr.readthedocs.io/en/dev-1.x/get_started/install.html) for more detailed instruction.
|
||||
|
||||
```shell
|
||||
conda create -n open-mmlab python=3.8 pytorch=1.10 cudatoolkit=11.3 torchvision -c pytorch -y
|
||||
conda activate open-mmlab
|
||||
pip3 install openmim
|
||||
git clone https://github.com/open-mmlab/mmocr.git
|
||||
cd mmocr
|
||||
mim install -e .
|
||||
```
|
||||
|
||||
## Get Started
|
||||
|
||||
Please see [Quick Run](https://mmocr.readthedocs.io/en/dev-1.x/get_started/quick_run.html) for the basic usage of MMOCR.
|
||||
|
||||
## [Model Zoo](https://mmocr.readthedocs.io/en/dev-1.x/modelzoo.html)
|
||||
|
||||
Supported algorithms:
|
||||
|
||||
<details open>
|
||||
<summary>BackBone</summary>
|
||||
|
||||
- [x] [oCLIP](configs/backbone/oclip/README.md) (ECCV'2022)
|
||||
|
||||
</details>
|
||||
|
||||
<details open>
|
||||
<summary>Text Detection</summary>
|
||||
|
||||
|
@ -80,13 +149,14 @@ Supported algorithms:
|
|||
<summary>Text Recognition</summary>
|
||||
|
||||
- [x] [ABINet](configs/textrecog/abinet/README.md) (CVPR'2021)
|
||||
- [x] [ASTER](configs/textrecog/aster/README.md) (TPAMI'2018)
|
||||
- [x] [CRNN](configs/textrecog/crnn/README.md) (TPAMI'2016)
|
||||
- [x] [MASTER](configs/textrecog/master/README.md) (PR'2021)
|
||||
- [x] [NRTR](configs/textrecog/nrtr/README.md) (ICDAR'2019)
|
||||
- [x] [RobustScanner](configs/textrecog/robust_scanner/README.md) (ECCV'2020)
|
||||
- [x] [SAR](configs/textrecog/sar/README.md) (AAAI'2019)
|
||||
- [x] [SATRN](configs/textrecog/satrn/README.md) (CVPR'2020 Workshop on Text and Documents in the Deep Learning Era)
|
||||
- [x] [SegOCR](configs/textrecog/seg/README.md) (Manuscript'2021)
|
||||
- [x] [SVTR](configs/textrecog/svtr/README.md) (IJCAI'2022)
|
||||
|
||||
</details>
|
||||
|
||||
|
@ -98,55 +168,19 @@ Supported algorithms:
|
|||
</details>
|
||||
|
||||
<details open>
|
||||
<summary>Named Entity Recognition</summary>
|
||||
<summary>Text Spotting</summary>
|
||||
|
||||
- [x] [Bert-Softmax](configs/ner/bert_softmax/README.md) (NAACL'2019)
|
||||
- [x] [ABCNet](projects/ABCNet/README.md) (CVPR'2020)
|
||||
- [x] [ABCNetV2](projects/ABCNet/README_V2.md) (TPAMI'2021)
|
||||
- [x] [SPTS](projects/SPTS/README.md) (ACM MM'2022)
|
||||
|
||||
</details>
|
||||
|
||||
Please refer to [model_zoo](https://mmocr.readthedocs.io/en/latest/modelzoo.html) for more details.
|
||||
Please refer to [model_zoo](https://mmocr.readthedocs.io/en/dev-1.x/modelzoo.html) for more details.
|
||||
|
||||
## License
|
||||
## Projects
|
||||
|
||||
This project is released under the [Apache 2.0 license](LICENSE).
|
||||
|
||||
## Citation
|
||||
|
||||
If you find this project useful in your research, please consider cite:
|
||||
|
||||
```bibtex
|
||||
@article{mmocr2021,
|
||||
title={MMOCR: A Comprehensive Toolbox for Text Detection, Recognition and Understanding},
|
||||
author={Kuang, Zhanghui and Sun, Hongbin and Li, Zhizhong and Yue, Xiaoyu and Lin, Tsui Hin and Chen, Jianyong and Wei, Huaqiang and Zhu, Yiqin and Gao, Tong and Zhang, Wenwei and Chen, Kai and Zhang, Wayne and Lin, Dahua},
|
||||
journal= {arXiv preprint arXiv:2108.06543},
|
||||
year={2021}
|
||||
}
|
||||
```
|
||||
|
||||
## Changelog
|
||||
|
||||
v0.6.0 was released in 2022-05-05.
|
||||
|
||||
## Installation
|
||||
|
||||
MMOCR depends on [PyTorch](https://pytorch.org/), [MMCV](https://github.com/open-mmlab/mmcv) and [MMDetection](https://github.com/open-mmlab/mmdetection).
|
||||
Below are quick steps for installation.
|
||||
Please refer to [Install Guide](https://mmocr.readthedocs.io/en/latest/install.html) for more detailed instruction.
|
||||
|
||||
```shell
|
||||
conda create -n open-mmlab python=3.8 pytorch=1.10 cudatoolkit=11.3 torchvision -c pytorch -y
|
||||
conda activate open-mmlab
|
||||
pip3 install openmim
|
||||
mim install mmcv-full
|
||||
mim install mmdet
|
||||
git clone https://github.com/open-mmlab/mmocr.git
|
||||
cd mmocr
|
||||
pip3 install -e .
|
||||
```
|
||||
|
||||
## Get Started
|
||||
|
||||
Please see [Getting Started](https://mmocr.readthedocs.io/en/latest/getting_started.html) for the basic usage of MMOCR.
|
||||
[Here](projects/README.md) are some implementations of SOTA models and solutions built on MMOCR, which are supported and maintained by community users. These projects demonstrate the best practices based on MMOCR for research and product development. We welcome and appreciate all the contributions to OpenMMLab ecosystem.
|
||||
|
||||
## Contributing
|
||||
|
||||
|
@ -157,8 +191,26 @@ We appreciate all contributions to improve MMOCR. Please refer to [CONTRIBUTING.
|
|||
MMOCR is an open-source project that is contributed by researchers and engineers from various colleges and companies. We appreciate all the contributors who implement their methods or add new features, as well as users who give valuable feedbacks.
|
||||
We hope the toolbox and benchmark could serve the growing research community by providing a flexible toolkit to reimplement existing methods and develop their own new OCR methods.
|
||||
|
||||
## Projects in OpenMMLab
|
||||
## Citation
|
||||
|
||||
If you find this project useful in your research, please consider cite:
|
||||
|
||||
```bibtex
|
||||
@article{mmocr2022,
|
||||
title={MMOCR: A Comprehensive Toolbox for Text Detection, Recognition and Understanding},
|
||||
author={MMOCR Developer Team},
|
||||
howpublished = {\url{https://github.com/open-mmlab/mmocr}},
|
||||
year={2022}
|
||||
}
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
This project is released under the [Apache 2.0 license](LICENSE).
|
||||
|
||||
## OpenMMLab Family
|
||||
|
||||
- [MMEngine](https://github.com/open-mmlab/mmengine): OpenMMLab foundational library for training deep learning models
|
||||
- [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab foundational library for computer vision.
|
||||
- [MIM](https://github.com/open-mmlab/mim): MIM installs OpenMMLab packages.
|
||||
- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab image classification toolbox and benchmark.
|
||||
|
@ -178,3 +230,22 @@ We hope the toolbox and benchmark could serve the growing research community by
|
|||
- [MMEditing](https://github.com/open-mmlab/mmediting): OpenMMLab image and video editing toolbox.
|
||||
- [MMGeneration](https://github.com/open-mmlab/mmgeneration): OpenMMLab image and video generative models toolbox.
|
||||
- [MMDeploy](https://github.com/open-mmlab/mmdeploy): OpenMMLab model deployment framework.
|
||||
|
||||
## Welcome to the OpenMMLab community
|
||||
|
||||
Scan the QR code below to follow the OpenMMLab team's [**Zhihu Official Account**](https://www.zhihu.com/people/openmmlab) and join the OpenMMLab team's [**QQ Group**](https://jq.qq.com/?_wv=1027&k=aCvMxdr3), or join the official communication WeChat group by adding the WeChat, or join our [**Slack**](https://join.slack.com/t/mmocrworkspace/shared_invite/zt-1ifqhfla8-yKnLO_aKhVA2h71OrK8GZw)
|
||||
|
||||
<div align="center">
|
||||
<img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/zhihu_qrcode.jpg" height="400" /> <img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/qq_group_qrcode.jpg" height="400" /> <img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/wechat_qrcode.jpg" height="400" />
|
||||
</div>
|
||||
|
||||
We will provide you with the OpenMMLab community
|
||||
|
||||
- 📢 share the latest core technologies of AI frameworks
|
||||
- 💻 Explaining PyTorch common module source Code
|
||||
- 📰 News related to the release of OpenMMLab
|
||||
- 🚀 Introduction of cutting-edge algorithms developed by OpenMMLab
|
||||
🏃 Get the more efficient answer and feedback
|
||||
- 🔥 Provide a platform for communication with developers from all walks of life
|
||||
|
||||
The OpenMMLab community looks forward to your participation! 👬
|
||||
|
|
161
README_zh-CN.md
161
README_zh-CN.md
|
@ -17,52 +17,120 @@
|
|||
</sup>
|
||||
</div>
|
||||
<div> </div>
|
||||
</div>
|
||||
|
||||
## 简介
|
||||
|
||||
[English](/README.md) | 简体中文
|
||||
|
||||
[](https://github.com/open-mmlab/mmocr/actions)
|
||||
[](https://mmocr.readthedocs.io/en/latest/?badge=latest)
|
||||
[](https://mmocr.readthedocs.io/en/dev-1.x/?badge=dev-1.x)
|
||||
[](https://codecov.io/gh/open-mmlab/mmocr)
|
||||
[](https://github.com/open-mmlab/mmocr/blob/main/LICENSE)
|
||||
[](https://pypi.org/project/mmocr/)
|
||||
[](https://github.com/open-mmlab/mmocr/issues)
|
||||
[](https://github.com/open-mmlab/mmocr/issues)
|
||||
<a href="https://console.tiyaro.ai/explore?q=mmocr&pub=mmocr"> <img src="https://tiyaro-public-docs.s3.us-west-2.amazonaws.com/assets/try_on_tiyaro_badge.svg"></a>
|
||||
|
||||
[📘文档](https://mmocr.readthedocs.io/zh_CN/dev-1.x/) |
|
||||
[🛠️安装](https://mmocr.readthedocs.io/zh_CN/dev-1.x/get_started/install.html) |
|
||||
[👀模型库](https://mmocr.readthedocs.io/zh_CN/dev-1.x/modelzoo.html) |
|
||||
[🆕更新日志](https://mmocr.readthedocs.io/en/dev-1.x/notes/changelog.html) |
|
||||
[🤔报告问题](https://github.com/open-mmlab/mmocr/issues/new/choose)
|
||||
|
||||
</div>
|
||||
|
||||
<div align="center">
|
||||
|
||||
[English](/README.md) | 简体中文
|
||||
|
||||
</div>
|
||||
|
||||
<div align="center">
|
||||
<a href="https://openmmlab.medium.com/" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219255827-67c1a27f-f8c5-46a9-811d-5e57448c61d1.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://discord.gg/raweFPmdzG" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218347213-c080267f-cbb6-443e-8532-8e1ed9a58ea9.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://twitter.com/OpenMMLab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346637-d30c8a0f-3eba-4699-8131-512fb06d46db.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://www.youtube.com/openmmlab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346691-ceb2116a-465a-40af-8424-9f30d2348ca9.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://space.bilibili.com/1293512903" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219026751-d7d14cce-a7c9-4e82-9942-8375fca65b99.png" width="3%" alt="" /></a>
|
||||
<img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
|
||||
<a href="https://www.zhihu.com/people/openmmlab" style="text-decoration:none;">
|
||||
<img src="https://user-images.githubusercontent.com/25839884/219026120-ba71e48b-6e94-4bd4-b4e9-b7d175b5e362.png" width="3%" alt="" /></a>
|
||||
</div>
|
||||
|
||||
## 近期更新
|
||||
|
||||
**默认分支目前为 `main`,且分支上的代码已经切换到 v1.0.0 版本。旧版 `main` 分支(v0.6.3)的代码现存在 `0.x` 分支上。** 如果您一直在使用 `main` 分支,并遇到升级问题,请阅读 [迁移指南](https://mmocr.readthedocs.io/zh_CN/dev-1.x/migration/overview.html) 和 [分支说明](https://mmocr.readthedocs.io/zh_CN/dev-1.x/migration/branches.html) 。
|
||||
|
||||
最新的版本 v1.0.0 于 2023-04-06 发布。其相对于 1.0.0rc6 的主要更新如下:
|
||||
|
||||
1. Dataset Preparer 中支持了 SCUT-CTW1500, SynthText 和 MJSynth 数据集;
|
||||
2. 更新了文档和 FAQ;
|
||||
3. 升级文件后端;使用了 `backend_args` 替换 `file_client_args`;
|
||||
4. 增加了 MMOCR 教程 notebook。
|
||||
|
||||
如果需要了解 MMOCR 1.0 相对于 0.x 的升级内容,请阅读 [MMOCR 1.x 更新汇总](https://mmocr.readthedocs.io/zh_CN/dev-1.x/migration/news.html);或者阅读[更新日志](https://mmocr.readthedocs.io/zh_CN/dev-1.x/notes/changelog.html)以获取更多信息。
|
||||
|
||||
## 简介
|
||||
|
||||
MMOCR 是基于 PyTorch 和 mmdetection 的开源工具箱,专注于文本检测,文本识别以及相应的下游任务,如关键信息提取。 它是 OpenMMLab 项目的一部分。
|
||||
|
||||
主分支目前支持 **PyTorch 1.6 以上**的版本。
|
||||
|
||||
文档:https://mmocr.readthedocs.io/zh_CN/latest/
|
||||
|
||||
<div align="left">
|
||||
<img src="resources/illustration.jpg"/>
|
||||
<div align="center">
|
||||
<img src="https://user-images.githubusercontent.com/24622904/187838618-1fdc61c0-2d46-49f9-8502-976ffdf01f28.png"/>
|
||||
</div>
|
||||
|
||||
### 主要特性
|
||||
|
||||
-**全流程**
|
||||
|
||||
该工具箱不仅支持文本检测和文本识别,还支持其下游任务,例如关键信息提取。
|
||||
该工具箱不仅支持文本检测和文本识别,还支持其下游任务,例如关键信息提取。
|
||||
|
||||
-**多种模型**
|
||||
|
||||
该工具箱支持用于文本检测,文本识别和关键信息提取的各种最新模型。
|
||||
该工具箱支持用于文本检测,文本识别和关键信息提取的各种最新模型。
|
||||
|
||||
-**模块化设计**
|
||||
|
||||
MMOCR 的模块化设计使用户可以定义自己的优化器,数据预处理器,模型组件如主干模块,颈部模块和头部模块,以及损失函数。有关如何构建自定义模型的信
|
||||
息,请参考[快速入门](https://mmocr.readthedocs.io/zh_CN/latest/getting_started.html)。
|
||||
MMOCR 的模块化设计使用户可以定义自己的优化器,数据预处理器,模型组件如主干模块,颈部模块和头部模块,以及损失函数。有关如何构建自定义模型的信息,请参考[概览](https://mmocr.readthedocs.io/zh_CN/dev-1.x/get_started/overview.html)。
|
||||
|
||||
-**众多实用工具**
|
||||
|
||||
该工具箱提供了一套全面的实用程序,可以帮助用户评估模型的性能。它包括可对图像,标注的真值以及预测结果进行可视化的可视化工具,以及用于在训练过程中评估模型的验证工具。它还包括数据转换器,演示了如何将用户自建的标注数据转换为 MMOCR 支持的标注文件。
|
||||
## [模型库](https://mmocr.readthedocs.io/en/latest/modelzoo.html)
|
||||
该工具箱提供了一套全面的实用程序,可以帮助用户评估模型的性能。它包括可对图像,标注的真值以及预测结果进行可视化的可视化工具,以及用于在训练过程中评估模型的验证工具。它还包括数据转换器,演示了如何将用户自建的标注数据转换为 MMOCR 支持的标注文件。
|
||||
|
||||
## 安装
|
||||
|
||||
MMOCR 依赖 [PyTorch](https://pytorch.org/), [MMEngine](https://github.com/open-mmlab/mmengine), [MMCV](https://github.com/open-mmlab/mmcv) 和 [MMDetection](https://github.com/open-mmlab/mmdetection),以下是安装的简要步骤。
|
||||
更详细的安装指南请参考 [安装文档](https://mmocr.readthedocs.io/zh_CN/dev-1.x/get_started/install.html)。
|
||||
|
||||
```shell
|
||||
conda create -n open-mmlab python=3.8 pytorch=1.10 cudatoolkit=11.3 torchvision -c pytorch -y
|
||||
conda activate open-mmlab
|
||||
pip3 install openmim
|
||||
git clone https://github.com/open-mmlab/mmocr.git
|
||||
cd mmocr
|
||||
mim install -e .
|
||||
```
|
||||
|
||||
## 快速入门
|
||||
|
||||
请参考[快速入门](https://mmocr.readthedocs.io/zh_CN/dev-1.x/get_started/quick_run.html)文档学习 MMOCR 的基本使用。
|
||||
|
||||
## [模型库](https://mmocr.readthedocs.io/zh_CN/dev-1.x/modelzoo.html)
|
||||
|
||||
支持的算法:
|
||||
|
||||
<details open>
|
||||
<summary>骨干网络</summary>
|
||||
|
||||
- [x] [oCLIP](configs/backbone/oclip/README.md) (ECCV'2022)
|
||||
|
||||
</details>
|
||||
|
||||
<details open>
|
||||
<summary>文字检测</summary>
|
||||
|
||||
|
@ -80,13 +148,14 @@ MMOCR 是基于 PyTorch 和 mmdetection 的开源工具箱,专注于文本检
|
|||
<summary>文字识别</summary>
|
||||
|
||||
- [x] [ABINet](configs/textrecog/abinet/README.md) (CVPR'2021)
|
||||
- [x] [ASTER](configs/textrecog/aster/README.md) (TPAMI'2018)
|
||||
- [x] [CRNN](configs/textrecog/crnn/README.md) (TPAMI'2016)
|
||||
- [x] [MASTER](configs/textrecog/master/README.md) (PR'2021)
|
||||
- [x] [NRTR](configs/textrecog/nrtr/README.md) (ICDAR'2019)
|
||||
- [x] [RobustScanner](configs/textrecog/robust_scanner/README.md) (ECCV'2020)
|
||||
- [x] [SAR](configs/textrecog/sar/README.md) (AAAI'2019)
|
||||
- [x] [SATRN](configs/textrecog/satrn/README.md) (CVPR'2020 Workshop on Text and Documents in the Deep Learning Era)
|
||||
- [x] [SegOCR](configs/textrecog/seg/README.md) (Manuscript'2021)
|
||||
- [x] [SVTR](configs/textrecog/svtr/README.md) (IJCAI'2022)
|
||||
|
||||
</details>
|
||||
|
||||
|
@ -98,17 +167,28 @@ MMOCR 是基于 PyTorch 和 mmdetection 的开源工具箱,专注于文本检
|
|||
</details>
|
||||
|
||||
<details open>
|
||||
<summary>命名实体识别</summary>
|
||||
<summary>端对端 OCR</summary>
|
||||
|
||||
- [x] [Bert-Softmax](configs/ner/bert_softmax/README.md) (NAACL'2019)
|
||||
- [x] [ABCNet](projects/ABCNet/README.md) (CVPR'2020)
|
||||
- [x] [ABCNetV2](projects/ABCNet/README_V2.md) (TPAMI'2021)
|
||||
- [x] [SPTS](projects/SPTS/README.md) (ACM MM'2022)
|
||||
|
||||
</details>
|
||||
|
||||
请点击[模型库](https://mmocr.readthedocs.io/en/latest/modelzoo.html)查看更多关于上述算法的详细信息。
|
||||
请点击[模型库](https://mmocr.readthedocs.io/zh_CN/dev-1.x/modelzoo.html)查看更多关于上述算法的详细信息。
|
||||
|
||||
## 开源许可证
|
||||
## 社区项目
|
||||
|
||||
该项目采用 [Apache 2.0 license](LICENSE) 开源许可证。
|
||||
[这里](projects/README.md)有一些由社区用户支持和维护的基于 MMOCR 的 SOTA 模型和解决方案的实现。这些项目展示了基于 MMOCR 的研究和产品开发的最佳实践。
|
||||
我们欢迎并感谢对 OpenMMLab 生态系统的所有贡献。
|
||||
|
||||
## 贡献指南
|
||||
|
||||
我们感谢所有的贡献者为改进和提升 MMOCR 所作出的努力。请参考[贡献指南](.github/CONTRIBUTING.md)来了解参与项目贡献的相关指引。
|
||||
|
||||
## 致谢
|
||||
|
||||
MMOCR 是一款由来自不同高校和企业的研发人员共同参与贡献的开源项目。我们感谢所有为项目提供算法复现和新功能支持的贡献者,以及提供宝贵反馈的用户。 我们希望此工具箱可以帮助大家来复现已有的方法和开发新的方法,从而为研究社区贡献力量。
|
||||
|
||||
## 引用
|
||||
|
||||
|
@ -123,40 +203,13 @@ MMOCR 是基于 PyTorch 和 mmdetection 的开源工具箱,专注于文本检
|
|||
}
|
||||
```
|
||||
|
||||
## 更新日志
|
||||
## 开源许可证
|
||||
|
||||
最新的月度版本 v0.6.0 在 2022.05.05 发布。
|
||||
|
||||
## 安装
|
||||
|
||||
MMOCR 依赖 [PyTorch](https://pytorch.org/), [MMCV](https://github.com/open-mmlab/mmcv) 和 [MMDetection](https://github.com/open-mmlab/mmdetection),以下是安装的简要步骤。
|
||||
更详细的安装指南请参考 [安装文档](https://mmocr.readthedocs.io/zh_CN/latest/install.html)。
|
||||
|
||||
```shell
|
||||
conda create -n open-mmlab python=3.8 pytorch=1.10 cudatoolkit=11.3 torchvision -c pytorch -y
|
||||
conda activate open-mmlab
|
||||
pip3 install openmim
|
||||
mim install mmcv-full
|
||||
mim install mmdet
|
||||
git clone https://github.com/open-mmlab/mmocr.git
|
||||
cd mmocr
|
||||
pip3 install -e .
|
||||
```
|
||||
|
||||
## 快速入门
|
||||
|
||||
请参考[快速入门](https://mmocr.readthedocs.io/zh_CN/latest/getting_started.html)文档学习 MMOCR 的基本使用。
|
||||
|
||||
## 贡献指南
|
||||
|
||||
我们感谢所有的贡献者为改进和提升 MMOCR 所作出的努力。请参考[贡献指南](.github/CONTRIBUTING.md)来了解参与项目贡献的相关指引。
|
||||
|
||||
## 致谢
|
||||
MMOCR 是一款由来自不同高校和企业的研发人员共同参与贡献的开源项目。我们感谢所有为项目提供算法复现和新功能支持的贡献者,以及提供宝贵反馈的用户。 我们希望此工具箱可以帮助大家来复现已有的方法和开发新的方法,从而为研究社区贡献力量。
|
||||
该项目采用 [Apache 2.0 license](LICENSE) 开源许可证。
|
||||
|
||||
## OpenMMLab 的其他项目
|
||||
|
||||
|
||||
- [MMEngine](https://github.com/open-mmlab/mmengine): OpenMMLab 深度学习模型训练基础库
|
||||
- [MMCV](https://github.com/open-mmlab/mmcv): OpenMMLab 计算机视觉基础库
|
||||
- [MIM](https://github.com/open-mmlab/mim): MIM 是 OpenMMlab 项目、算法、模型的统一入口
|
||||
- [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab 图像分类工具箱
|
||||
|
@ -179,10 +232,10 @@ MMOCR 是一款由来自不同高校和企业的研发人员共同参与贡献
|
|||
|
||||
## 欢迎加入 OpenMMLab 社区
|
||||
|
||||
扫描下方的二维码可关注 OpenMMLab 团队的 [知乎官方账号](https://www.zhihu.com/people/openmmlab),加入 OpenMMLab 团队的 [官方交流 QQ 群](https://jq.qq.com/?_wv=1027&k=aCvMxdr3),或通过添加微信“Open小喵Lab”加入官方交流微信群。
|
||||
扫描下方的二维码可关注 OpenMMLab 团队的 知乎官方账号,扫描下方微信二维码添加喵喵好友,进入 MMOCR 微信交流社群。【加好友申请格式:研究方向+地区+学校/公司+姓名】
|
||||
|
||||
<div align="center">
|
||||
<img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/zhihu_qrcode.jpg" height="400" /> <img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/qq_group_qrcode.jpg" height="400" /> <img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/wechat_qrcode.jpg" height="400" />
|
||||
<img src="https://raw.githubusercontent.com/open-mmlab/mmcv/master/docs/en/_static/zhihu_qrcode.jpg" height="400" /> <img src="https://github.com/open-mmlab/mmocr/assets/62195058/bf1e53fe-df4f-4296-9e1b-61db8971985e" height="400" />
|
||||
</div>
|
||||
|
||||
我们会在 OpenMMLab 社区为大家
|
||||
|
|
|
@ -1,17 +0,0 @@
|
|||
# yapf:disable
|
||||
log_config = dict(
|
||||
interval=5,
|
||||
hooks=[
|
||||
dict(type='TextLoggerHook')
|
||||
])
|
||||
# yapf:enable
|
||||
dist_params = dict(backend='nccl')
|
||||
log_level = 'INFO'
|
||||
load_from = None
|
||||
resume_from = None
|
||||
workflow = [('train', 1)]
|
||||
|
||||
# disable opencv multithreading to avoid system being overloaded
|
||||
opencv_num_threads = 0
|
||||
# set multi-process start method as `fork` to speed up the training
|
||||
mp_start_method = 'fork'
|
|
@ -1,18 +0,0 @@
|
|||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/ctw1500'
|
||||
|
||||
train = dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/instances_training.json',
|
||||
img_prefix=f'{data_root}/imgs',
|
||||
pipeline=None)
|
||||
|
||||
test = dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/instances_test.json',
|
||||
img_prefix=f'{data_root}/imgs',
|
||||
pipeline=None)
|
||||
|
||||
train_list = [train]
|
||||
|
||||
test_list = [test]
|
|
@ -1,18 +0,0 @@
|
|||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/icdar2015'
|
||||
|
||||
train = dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/instances_training.json',
|
||||
img_prefix=f'{data_root}/imgs',
|
||||
pipeline=None)
|
||||
|
||||
test = dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/instances_test.json',
|
||||
img_prefix=f'{data_root}/imgs',
|
||||
pipeline=None)
|
||||
|
||||
train_list = [train]
|
||||
|
||||
test_list = [test]
|
|
@ -1,18 +0,0 @@
|
|||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/icdar2017'
|
||||
|
||||
train = dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/instances_training.json',
|
||||
img_prefix=f'{data_root}/imgs',
|
||||
pipeline=None)
|
||||
|
||||
test = dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/instances_val.json',
|
||||
img_prefix=f'{data_root}/imgs',
|
||||
pipeline=None)
|
||||
|
||||
train_list = [train]
|
||||
|
||||
test_list = [test]
|
|
@ -1,18 +0,0 @@
|
|||
dataset_type = 'TextDetDataset'
|
||||
data_root = 'data/synthtext'
|
||||
|
||||
train = dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/instances_training.lmdb',
|
||||
loader=dict(
|
||||
type='AnnFileLoader',
|
||||
repeat=1,
|
||||
file_format='lmdb',
|
||||
parser=dict(
|
||||
type='LineJsonParser',
|
||||
keys=['file_name', 'height', 'width', 'annotations'])),
|
||||
img_prefix=f'{data_root}/imgs',
|
||||
pipeline=None)
|
||||
|
||||
train_list = [train]
|
||||
test_list = [train]
|
|
@ -1,41 +0,0 @@
|
|||
root = 'tests/data/toy_dataset'
|
||||
|
||||
# dataset with type='TextDetDataset'
|
||||
train1 = dict(
|
||||
type='TextDetDataset',
|
||||
img_prefix=f'{root}/imgs',
|
||||
ann_file=f'{root}/instances_test.txt',
|
||||
loader=dict(
|
||||
type='AnnFileLoader',
|
||||
repeat=4,
|
||||
file_format='txt',
|
||||
parser=dict(
|
||||
type='LineJsonParser',
|
||||
keys=['file_name', 'height', 'width', 'annotations'])),
|
||||
pipeline=None,
|
||||
test_mode=False)
|
||||
|
||||
# dataset with type='IcdarDataset'
|
||||
train2 = dict(
|
||||
type='IcdarDataset',
|
||||
ann_file=f'{root}/instances_test.json',
|
||||
img_prefix=f'{root}/imgs',
|
||||
pipeline=None)
|
||||
|
||||
test = dict(
|
||||
type='TextDetDataset',
|
||||
img_prefix=f'{root}/imgs',
|
||||
ann_file=f'{root}/instances_test.txt',
|
||||
loader=dict(
|
||||
type='AnnFileLoader',
|
||||
repeat=1,
|
||||
file_format='txt',
|
||||
parser=dict(
|
||||
type='LineJsonParser',
|
||||
keys=['file_name', 'height', 'width', 'annotations'])),
|
||||
pipeline=None,
|
||||
test_mode=True)
|
||||
|
||||
train_list = [train1, train2]
|
||||
|
||||
test_list = [test]
|
|
@ -1,21 +0,0 @@
|
|||
model = dict(
|
||||
type='DBNet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=18,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
|
||||
norm_eval=False,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPNC', in_channels=[64, 128, 256, 512], lateral_channels=256),
|
||||
bbox_head=dict(
|
||||
type='DBHead',
|
||||
in_channels=256,
|
||||
loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True),
|
||||
postprocessor=dict(type='DBPostprocessor', text_repr_type='quad')),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
|
@ -1,23 +0,0 @@
|
|||
model = dict(
|
||||
type='DBNet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
stage_with_dcn=(False, True, True, True)),
|
||||
neck=dict(
|
||||
type='FPNC', in_channels=[256, 512, 1024, 2048], lateral_channels=256),
|
||||
bbox_head=dict(
|
||||
type='DBHead',
|
||||
in_channels=256,
|
||||
loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True),
|
||||
postprocessor=dict(type='DBPostprocessor', text_repr_type='quad')),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
|
@ -1,28 +0,0 @@
|
|||
model = dict(
|
||||
type='DBNet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
stage_with_dcn=(False, True, True, True)),
|
||||
neck=dict(
|
||||
type='FPNC',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
lateral_channels=256,
|
||||
asf_cfg=dict(attention_type='ScaleChannelSpatial')),
|
||||
bbox_head=dict(
|
||||
type='DBHead',
|
||||
in_channels=256,
|
||||
loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True),
|
||||
postprocessor=dict(
|
||||
type='DBPostprocessor', text_repr_type='quad',
|
||||
epsilon_ratio=0.002)),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
|
@ -1,21 +0,0 @@
|
|||
model = dict(
|
||||
type='DRRG',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPN_UNet', in_channels=[256, 512, 1024, 2048], out_channels=32),
|
||||
bbox_head=dict(
|
||||
type='DRRGHead',
|
||||
in_channels=32,
|
||||
text_region_thr=0.3,
|
||||
center_region_thr=0.4,
|
||||
loss=dict(type='DRRGLoss'),
|
||||
postprocessor=dict(type='DRRGPostprocessor', link_thr=0.80)))
|
|
@ -1,33 +0,0 @@
|
|||
model = dict(
|
||||
type='FCENet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=False,
|
||||
style='pytorch'),
|
||||
neck=dict(
|
||||
type='mmdet.FPN',
|
||||
in_channels=[512, 1024, 2048],
|
||||
out_channels=256,
|
||||
add_extra_convs='on_output',
|
||||
num_outs=3,
|
||||
relu_before_extra_convs=True,
|
||||
act_cfg=None),
|
||||
bbox_head=dict(
|
||||
type='FCEHead',
|
||||
in_channels=256,
|
||||
scales=(8, 16, 32),
|
||||
fourier_degree=5,
|
||||
loss=dict(type='FCELoss', num_sample=50),
|
||||
postprocessor=dict(
|
||||
type='FCEPostprocessor',
|
||||
text_repr_type='quad',
|
||||
num_reconstr_points=50,
|
||||
alpha=1.2,
|
||||
beta=1.0,
|
||||
score_thr=0.3)))
|
|
@ -1,35 +0,0 @@
|
|||
model = dict(
|
||||
type='FCENet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=True,
|
||||
style='pytorch',
|
||||
dcn=dict(type='DCNv2', deform_groups=2, fallback_on_stride=False),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
stage_with_dcn=(False, True, True, True)),
|
||||
neck=dict(
|
||||
type='mmdet.FPN',
|
||||
in_channels=[512, 1024, 2048],
|
||||
out_channels=256,
|
||||
add_extra_convs='on_output',
|
||||
num_outs=3,
|
||||
relu_before_extra_convs=True,
|
||||
act_cfg=None),
|
||||
bbox_head=dict(
|
||||
type='FCEHead',
|
||||
in_channels=256,
|
||||
scales=(8, 16, 32),
|
||||
fourier_degree=5,
|
||||
loss=dict(type='FCELoss', num_sample=50),
|
||||
postprocessor=dict(
|
||||
type='FCEPostprocessor',
|
||||
text_repr_type='poly',
|
||||
num_reconstr_points=50,
|
||||
alpha=1.0,
|
||||
beta=2.0,
|
||||
score_thr=0.3)))
|
|
@ -1,126 +0,0 @@
|
|||
# model settings
|
||||
model = dict(
|
||||
type='OCRMaskRCNN',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=True,
|
||||
style='pytorch'),
|
||||
neck=dict(
|
||||
type='mmdet.FPN',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
num_outs=5),
|
||||
rpn_head=dict(
|
||||
type='RPNHead',
|
||||
in_channels=256,
|
||||
feat_channels=256,
|
||||
anchor_generator=dict(
|
||||
type='AnchorGenerator',
|
||||
scales=[4],
|
||||
ratios=[0.17, 0.44, 1.13, 2.90, 7.46],
|
||||
strides=[4, 8, 16, 32, 64]),
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[.0, .0, .0, .0],
|
||||
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
||||
loss_cls=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
||||
roi_head=dict(
|
||||
type='StandardRoIHead',
|
||||
bbox_roi_extractor=dict(
|
||||
type='SingleRoIExtractor',
|
||||
roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),
|
||||
out_channels=256,
|
||||
featmap_strides=[4, 8, 16, 32]),
|
||||
bbox_head=dict(
|
||||
type='Shared2FCBBoxHead',
|
||||
in_channels=256,
|
||||
fc_out_channels=1024,
|
||||
roi_feat_size=7,
|
||||
num_classes=1,
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[0., 0., 0., 0.],
|
||||
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
||||
reg_class_agnostic=False,
|
||||
loss_cls=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
||||
mask_roi_extractor=dict(
|
||||
type='SingleRoIExtractor',
|
||||
roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),
|
||||
out_channels=256,
|
||||
featmap_strides=[4, 8, 16, 32]),
|
||||
mask_head=dict(
|
||||
type='FCNMaskHead',
|
||||
num_convs=4,
|
||||
in_channels=256,
|
||||
conv_out_channels=256,
|
||||
num_classes=1,
|
||||
loss_mask=dict(
|
||||
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
|
||||
|
||||
# model training and testing settings
|
||||
train_cfg=dict(
|
||||
rpn=dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.7,
|
||||
neg_iou_thr=0.3,
|
||||
min_pos_iou=0.3,
|
||||
match_low_quality=True,
|
||||
ignore_iof_thr=-1,
|
||||
gpu_assign_thr=50),
|
||||
sampler=dict(
|
||||
type='RandomSampler',
|
||||
num=256,
|
||||
pos_fraction=0.5,
|
||||
neg_pos_ub=-1,
|
||||
add_gt_as_proposals=False),
|
||||
allowed_border=-1,
|
||||
pos_weight=-1,
|
||||
debug=False),
|
||||
rpn_proposal=dict(
|
||||
nms_across_levels=False,
|
||||
nms_pre=2000,
|
||||
nms_post=1000,
|
||||
max_per_img=1000,
|
||||
nms=dict(type='nms', iou_threshold=0.7),
|
||||
min_bbox_size=0),
|
||||
rcnn=dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.5,
|
||||
neg_iou_thr=0.5,
|
||||
min_pos_iou=0.5,
|
||||
match_low_quality=True,
|
||||
ignore_iof_thr=-1),
|
||||
sampler=dict(
|
||||
type='OHEMSampler',
|
||||
num=512,
|
||||
pos_fraction=0.25,
|
||||
neg_pos_ub=-1,
|
||||
add_gt_as_proposals=True),
|
||||
mask_size=28,
|
||||
pos_weight=-1,
|
||||
debug=False)),
|
||||
test_cfg=dict(
|
||||
rpn=dict(
|
||||
nms_across_levels=False,
|
||||
nms_pre=1000,
|
||||
nms_post=1000,
|
||||
max_per_img=1000,
|
||||
nms=dict(type='nms', iou_threshold=0.7),
|
||||
min_bbox_size=0),
|
||||
rcnn=dict(
|
||||
score_thr=0.05,
|
||||
nms=dict(type='nms', iou_threshold=0.5),
|
||||
max_per_img=100,
|
||||
mask_thr_binary=0.5)))
|
|
@ -1,126 +0,0 @@
|
|||
# model settings
|
||||
model = dict(
|
||||
type='OCRMaskRCNN',
|
||||
text_repr_type='poly',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=True,
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
style='pytorch'),
|
||||
neck=dict(
|
||||
type='mmdet.FPN',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
num_outs=5),
|
||||
rpn_head=dict(
|
||||
type='RPNHead',
|
||||
in_channels=256,
|
||||
feat_channels=256,
|
||||
anchor_generator=dict(
|
||||
type='AnchorGenerator',
|
||||
scales=[4],
|
||||
ratios=[0.17, 0.44, 1.13, 2.90, 7.46],
|
||||
strides=[4, 8, 16, 32, 64]),
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[.0, .0, .0, .0],
|
||||
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
||||
loss_cls=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
||||
roi_head=dict(
|
||||
type='StandardRoIHead',
|
||||
bbox_roi_extractor=dict(
|
||||
type='SingleRoIExtractor',
|
||||
roi_layer=dict(type='RoIAlign', output_size=7, sample_num=0),
|
||||
out_channels=256,
|
||||
featmap_strides=[4, 8, 16, 32]),
|
||||
bbox_head=dict(
|
||||
type='Shared2FCBBoxHead',
|
||||
in_channels=256,
|
||||
fc_out_channels=1024,
|
||||
roi_feat_size=7,
|
||||
num_classes=80,
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[0., 0., 0., 0.],
|
||||
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
||||
reg_class_agnostic=False,
|
||||
loss_cls=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
|
||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
||||
mask_roi_extractor=dict(
|
||||
type='SingleRoIExtractor',
|
||||
roi_layer=dict(type='RoIAlign', output_size=14, sample_num=0),
|
||||
out_channels=256,
|
||||
featmap_strides=[4, 8, 16, 32]),
|
||||
mask_head=dict(
|
||||
type='FCNMaskHead',
|
||||
num_convs=4,
|
||||
in_channels=256,
|
||||
conv_out_channels=256,
|
||||
num_classes=80,
|
||||
loss_mask=dict(
|
||||
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(
|
||||
rpn=dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.7,
|
||||
neg_iou_thr=0.3,
|
||||
min_pos_iou=0.3,
|
||||
match_low_quality=True,
|
||||
ignore_iof_thr=-1),
|
||||
sampler=dict(
|
||||
type='RandomSampler',
|
||||
num=256,
|
||||
pos_fraction=0.5,
|
||||
neg_pos_ub=-1,
|
||||
add_gt_as_proposals=False),
|
||||
allowed_border=-1,
|
||||
pos_weight=-1,
|
||||
debug=False),
|
||||
rpn_proposal=dict(
|
||||
nms_across_levels=False,
|
||||
nms_pre=2000,
|
||||
nms_post=1000,
|
||||
max_per_img=1000,
|
||||
nms=dict(type='nms', iou_threshold=0.7),
|
||||
min_bbox_size=0),
|
||||
rcnn=dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.5,
|
||||
neg_iou_thr=0.5,
|
||||
min_pos_iou=0.5,
|
||||
match_low_quality=True,
|
||||
ignore_iof_thr=-1,
|
||||
gpu_assign_thr=50),
|
||||
sampler=dict(
|
||||
type='OHEMSampler',
|
||||
num=512,
|
||||
pos_fraction=0.25,
|
||||
neg_pos_ub=-1,
|
||||
add_gt_as_proposals=True),
|
||||
mask_size=28,
|
||||
pos_weight=-1,
|
||||
debug=False)),
|
||||
test_cfg=dict(
|
||||
rpn=dict(
|
||||
nms_across_levels=False,
|
||||
nms_pre=1000,
|
||||
nms_post=1000,
|
||||
max_per_img=1000,
|
||||
nms=dict(type='nms', iou_threshold=0.7),
|
||||
min_bbox_size=0),
|
||||
rcnn=dict(
|
||||
score_thr=0.05,
|
||||
nms=dict(type='nms', iou_threshold=0.5),
|
||||
max_per_img=100,
|
||||
mask_thr_binary=0.5)))
|
|
@ -1,43 +0,0 @@
|
|||
model_poly = dict(
|
||||
type='PANet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=18,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(type='FPEM_FFM', in_channels=[64, 128, 256, 512]),
|
||||
bbox_head=dict(
|
||||
type='PANHead',
|
||||
in_channels=[128, 128, 128, 128],
|
||||
out_channels=6,
|
||||
loss=dict(type='PANLoss'),
|
||||
postprocessor=dict(type='PANPostprocessor', text_repr_type='poly')),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
||||
|
||||
model_quad = dict(
|
||||
type='PANet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=18,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(type='FPEM_FFM', in_channels=[64, 128, 256, 512]),
|
||||
bbox_head=dict(
|
||||
type='PANHead',
|
||||
in_channels=[128, 128, 128, 128],
|
||||
out_channels=6,
|
||||
loss=dict(type='PANLoss'),
|
||||
postprocessor=dict(type='PANPostprocessor', text_repr_type='quad')),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
|
@ -1,21 +0,0 @@
|
|||
model = dict(
|
||||
type='PANet',
|
||||
pretrained='torchvision://resnet50',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(type='FPEM_FFM', in_channels=[256, 512, 1024, 2048]),
|
||||
bbox_head=dict(
|
||||
type='PANHead',
|
||||
in_channels=[128, 128, 128, 128],
|
||||
out_channels=6,
|
||||
loss=dict(type='PANLoss', speedup_bbox_thr=32),
|
||||
postprocessor=dict(type='PANPostprocessor', text_repr_type='poly')),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
|
@ -1,51 +0,0 @@
|
|||
model_poly = dict(
|
||||
type='PSENet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPNF',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
fusion_type='concat'),
|
||||
bbox_head=dict(
|
||||
type='PSEHead',
|
||||
in_channels=[256],
|
||||
out_channels=7,
|
||||
loss=dict(type='PSELoss'),
|
||||
postprocessor=dict(type='PSEPostprocessor', text_repr_type='poly')),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
||||
|
||||
model_quad = dict(
|
||||
type='PSENet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPNF',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
fusion_type='concat'),
|
||||
bbox_head=dict(
|
||||
type='PSEHead',
|
||||
in_channels=[256],
|
||||
out_channels=7,
|
||||
loss=dict(type='PSELoss'),
|
||||
postprocessor=dict(type='PSEPostprocessor', text_repr_type='quad')),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
|
@ -1,22 +0,0 @@
|
|||
model = dict(
|
||||
type='TextSnake',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPN_UNet', in_channels=[256, 512, 1024, 2048], out_channels=32),
|
||||
bbox_head=dict(
|
||||
type='TextSnakeHead',
|
||||
in_channels=32,
|
||||
loss=dict(type='TextSnakeLoss'),
|
||||
postprocessor=dict(
|
||||
type='TextSnakePostprocessor', text_repr_type='poly')),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
|
@ -1,88 +0,0 @@
|
|||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
train_pipeline_r18 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='ImgAug',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
|
||||
dict(type='EastRandomCrop', target_size=(640, 640)),
|
||||
dict(type='DBNetTargets', shrink_ratio=0.4),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_shrink')),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
|
||||
]
|
||||
|
||||
test_pipeline_1333_736 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(1333, 736),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(2944, 736), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
|
||||
# for dbnet_r50dcnv2_fpnc
|
||||
img_norm_cfg_r50dcnv2 = dict(
|
||||
mean=[122.67891434, 116.66876762, 104.00698793],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
to_rgb=True)
|
||||
|
||||
train_pipeline_r50dcnv2 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg_r50dcnv2),
|
||||
dict(
|
||||
type='ImgAug',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
|
||||
dict(type='EastRandomCrop', target_size=(640, 640)),
|
||||
dict(type='DBNetTargets', shrink_ratio=0.4),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_shrink')),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
|
||||
]
|
||||
|
||||
test_pipeline_4068_1024 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(4068, 1024),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(2944, 736), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg_r50dcnv2),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
|
@ -1,60 +0,0 @@
|
|||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='RandomScaling', size=800, scale=(0.75, 2.5)),
|
||||
dict(
|
||||
type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2),
|
||||
dict(
|
||||
type='RandomCropPolyInstances',
|
||||
instance_key='gt_masks',
|
||||
crop_ratio=0.8,
|
||||
min_side_ratio=0.3),
|
||||
dict(
|
||||
type='RandomRotatePolyInstances',
|
||||
rotate_ratio=0.5,
|
||||
max_angle=60,
|
||||
pad_with_fixed_color=False),
|
||||
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='DRRGTargets'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=[
|
||||
'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
|
||||
'gt_top_height_map', 'gt_bot_height_map', 'gt_sin_map',
|
||||
'gt_cos_map', 'gt_comp_attribs'
|
||||
],
|
||||
visualize=dict(flag=False, boundary_key='gt_text_mask')),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=[
|
||||
'img', 'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
|
||||
'gt_top_height_map', 'gt_bot_height_map', 'gt_sin_map',
|
||||
'gt_cos_map', 'gt_comp_attribs'
|
||||
])
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(1024, 640),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(1024, 640), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
|
@ -1,118 +0,0 @@
|
|||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
# for icdar2015
|
||||
leval_prop_range_icdar2015 = ((0, 0.4), (0.3, 0.7), (0.6, 1.0))
|
||||
train_pipeline_icdar2015 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(
|
||||
type='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5,
|
||||
contrast=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='RandomScaling', size=800, scale=(3. / 4, 5. / 2)),
|
||||
dict(
|
||||
type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2),
|
||||
dict(
|
||||
type='RandomCropPolyInstances',
|
||||
instance_key='gt_masks',
|
||||
crop_ratio=0.8,
|
||||
min_side_ratio=0.3),
|
||||
dict(
|
||||
type='RandomRotatePolyInstances',
|
||||
rotate_ratio=0.5,
|
||||
max_angle=30,
|
||||
pad_with_fixed_color=False),
|
||||
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='FCENetTargets',
|
||||
fourier_degree=5,
|
||||
level_proportion_range=leval_prop_range_icdar2015),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['p3_maps', 'p4_maps', 'p5_maps'],
|
||||
visualize=dict(flag=False, boundary_key=None)),
|
||||
dict(type='Collect', keys=['img', 'p3_maps', 'p4_maps', 'p5_maps'])
|
||||
]
|
||||
|
||||
img_scale_icdar2015 = (2260, 2260)
|
||||
test_pipeline_icdar2015 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale_icdar2015,
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(1280, 800), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
|
||||
# for ctw1500
|
||||
leval_prop_range_ctw1500 = ((0, 0.25), (0.2, 0.65), (0.55, 1.0))
|
||||
train_pipeline_ctw1500 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(
|
||||
type='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5,
|
||||
contrast=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='RandomScaling', size=800, scale=(3. / 4, 5. / 2)),
|
||||
dict(
|
||||
type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2),
|
||||
dict(
|
||||
type='RandomCropPolyInstances',
|
||||
instance_key='gt_masks',
|
||||
crop_ratio=0.8,
|
||||
min_side_ratio=0.3),
|
||||
dict(
|
||||
type='RandomRotatePolyInstances',
|
||||
rotate_ratio=0.5,
|
||||
max_angle=30,
|
||||
pad_with_fixed_color=False),
|
||||
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='FCENetTargets',
|
||||
fourier_degree=5,
|
||||
level_proportion_range=leval_prop_range_ctw1500),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['p3_maps', 'p4_maps', 'p5_maps'],
|
||||
visualize=dict(flag=False, boundary_key=None)),
|
||||
dict(type='Collect', keys=['img', 'p3_maps', 'p4_maps', 'p5_maps'])
|
||||
]
|
||||
|
||||
img_scale_ctw1500 = (1080, 736)
|
||||
test_pipeline_ctw1500 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale_ctw1500,
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(1280, 800), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
|
@ -1,57 +0,0 @@
|
|||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=None,
|
||||
keep_ratio=False,
|
||||
resize_type='indep_sample_in_range',
|
||||
scale_range=(640, 2560)),
|
||||
dict(type='RandomFlip', flip_ratio=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=(640, 640),
|
||||
mask_type='union_all',
|
||||
instance_key='gt_masks'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
|
||||
]
|
||||
|
||||
# for ctw1500
|
||||
img_scale_ctw1500 = (1600, 1600)
|
||||
test_pipeline_ctw1500 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale_ctw1500,
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
|
||||
# for icdar2015
|
||||
img_scale_icdar2015 = (1920, 1920)
|
||||
test_pipeline_icdar2015 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale_icdar2015,
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
|
@ -1,156 +0,0 @@
|
|||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
# for ctw1500
|
||||
img_scale_train_ctw1500 = [(3000, 640)]
|
||||
shrink_ratio_train_ctw1500 = (1.0, 0.7)
|
||||
target_size_train_ctw1500 = (640, 640)
|
||||
train_pipeline_ctw1500 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=img_scale_train_ctw1500,
|
||||
ratio_range=(0.7, 1.3),
|
||||
aspect_ratio_range=(0.9, 1.1),
|
||||
multiscale_mode='value',
|
||||
keep_ratio=False),
|
||||
# shrink_ratio is from big to small. The 1st must be 1.0
|
||||
dict(type='PANetTargets', shrink_ratio=shrink_ratio_train_ctw1500),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='RandomRotateTextDet'),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=target_size_train_ctw1500,
|
||||
instance_key='gt_kernels'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_kernels', 'gt_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_kernels')),
|
||||
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
|
||||
]
|
||||
|
||||
img_scale_test_ctw1500 = (3000, 640)
|
||||
test_pipeline_ctw1500 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale_test_ctw1500,
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(3000, 640), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
|
||||
# for icdar2015
|
||||
img_scale_train_icdar2015 = [(3000, 736)]
|
||||
shrink_ratio_train_icdar2015 = (1.0, 0.5)
|
||||
target_size_train_icdar2015 = (736, 736)
|
||||
train_pipeline_icdar2015 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=img_scale_train_icdar2015,
|
||||
ratio_range=(0.7, 1.3),
|
||||
aspect_ratio_range=(0.9, 1.1),
|
||||
multiscale_mode='value',
|
||||
keep_ratio=False),
|
||||
dict(type='PANetTargets', shrink_ratio=shrink_ratio_train_icdar2015),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='RandomRotateTextDet'),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=target_size_train_icdar2015,
|
||||
instance_key='gt_kernels'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_kernels', 'gt_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_kernels')),
|
||||
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
|
||||
]
|
||||
|
||||
img_scale_test_icdar2015 = (1333, 736)
|
||||
test_pipeline_icdar2015 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale_test_icdar2015,
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(3000, 640), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
|
||||
# for icdar2017
|
||||
img_scale_train_icdar2017 = [(3000, 800)]
|
||||
shrink_ratio_train_icdar2017 = (1.0, 0.5)
|
||||
target_size_train_icdar2017 = (800, 800)
|
||||
train_pipeline_icdar2017 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=img_scale_train_icdar2017,
|
||||
ratio_range=(0.7, 1.3),
|
||||
aspect_ratio_range=(0.9, 1.1),
|
||||
multiscale_mode='value',
|
||||
keep_ratio=False),
|
||||
dict(type='PANetTargets', shrink_ratio=shrink_ratio_train_icdar2017),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='RandomRotateTextDet'),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=target_size_train_icdar2017,
|
||||
instance_key='gt_kernels'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_kernels', 'gt_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_kernels')),
|
||||
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
|
||||
]
|
||||
|
||||
img_scale_test_icdar2017 = (1333, 800)
|
||||
test_pipeline_icdar2017 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale_test_icdar2017,
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(3000, 640), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
|
@ -1,70 +0,0 @@
|
|||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=[(3000, 736)],
|
||||
ratio_range=(0.5, 3),
|
||||
aspect_ratio_range=(1, 1),
|
||||
multiscale_mode='value',
|
||||
long_size_bound=1280,
|
||||
short_size_bound=640,
|
||||
resize_type='long_short_bound',
|
||||
keep_ratio=False),
|
||||
dict(type='PSENetTargets'),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='RandomRotateTextDet'),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=(640, 640),
|
||||
instance_key='gt_kernels'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_kernels', 'gt_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_kernels')),
|
||||
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
|
||||
]
|
||||
|
||||
# for ctw1500
|
||||
img_scale_test_ctw1500 = (1280, 1280)
|
||||
test_pipeline_ctw1500 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale_test_ctw1500,
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(1280, 1280), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
|
||||
# for icdar2015
|
||||
img_scale_test_icdar2015 = (2240, 2240)
|
||||
test_pipeline_icdar2015 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale_test_icdar2015,
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(1280, 1280), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
|
@ -1,65 +0,0 @@
|
|||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='RandomCropPolyInstances',
|
||||
instance_key='gt_masks',
|
||||
crop_ratio=0.65,
|
||||
min_side_ratio=0.3),
|
||||
dict(
|
||||
type='RandomRotatePolyInstances',
|
||||
rotate_ratio=0.5,
|
||||
max_angle=20,
|
||||
pad_with_fixed_color=False),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=[(3000, 736)], # unused
|
||||
ratio_range=(0.7, 1.3),
|
||||
aspect_ratio_range=(0.9, 1.1),
|
||||
multiscale_mode='value',
|
||||
long_size_bound=800,
|
||||
short_size_bound=480,
|
||||
resize_type='long_short_bound',
|
||||
keep_ratio=False),
|
||||
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='TextSnakeTargets'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=[
|
||||
'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
|
||||
'gt_radius_map', 'gt_sin_map', 'gt_cos_map'
|
||||
],
|
||||
visualize=dict(flag=False, boundary_key='gt_text_mask')),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=[
|
||||
'img', 'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
|
||||
'gt_radius_map', 'gt_sin_map', 'gt_cos_map'
|
||||
])
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(1333, 736),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(1333, 736), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
|
@ -1,25 +0,0 @@
|
|||
# Text Recognition Training set, including:
|
||||
# Synthetic Datasets: Syn90k
|
||||
|
||||
train_root = 'data/mixture/Syn90k'
|
||||
|
||||
train_img_prefix = f'{train_root}/mnt/ramdisk/max/90kDICT32px'
|
||||
train_ann_file = f'{train_root}/label.lmdb'
|
||||
|
||||
train = dict(
|
||||
type='OCRDataset',
|
||||
img_prefix=train_img_prefix,
|
||||
ann_file=train_ann_file,
|
||||
loader=dict(
|
||||
type='AnnFileLoader',
|
||||
repeat=1,
|
||||
file_format='lmdb',
|
||||
parser=dict(
|
||||
type='LineStrParser',
|
||||
keys=['filename', 'text'],
|
||||
keys_idx=[0, 1],
|
||||
separator=' ')),
|
||||
pipeline=None,
|
||||
test_mode=False)
|
||||
|
||||
train_list = [train]
|
|
@ -1,35 +0,0 @@
|
|||
# Text Recognition Training set, including:
|
||||
# Synthetic Datasets: SynthText, Syn90k
|
||||
# Both annotations are filtered so that
|
||||
# only alphanumeric terms are left
|
||||
|
||||
train_root = 'data/mixture'
|
||||
|
||||
train_img_prefix1 = f'{train_root}/Syn90k/mnt/ramdisk/max/90kDICT32px'
|
||||
train_ann_file1 = f'{train_root}/Syn90k/label.lmdb'
|
||||
|
||||
train1 = dict(
|
||||
type='OCRDataset',
|
||||
img_prefix=train_img_prefix1,
|
||||
ann_file=train_ann_file1,
|
||||
loader=dict(
|
||||
type='AnnFileLoader',
|
||||
repeat=1,
|
||||
file_format='lmdb',
|
||||
parser=dict(
|
||||
type='LineStrParser',
|
||||
keys=['filename', 'text'],
|
||||
keys_idx=[0, 1],
|
||||
separator=' ')),
|
||||
pipeline=None,
|
||||
test_mode=False)
|
||||
|
||||
train_img_prefix2 = f'{train_root}/SynthText/' + \
|
||||
'synthtext/SynthText_patch_horizontal'
|
||||
train_ann_file2 = f'{train_root}/SynthText/alphanumeric_label.lmdb'
|
||||
|
||||
train2 = {key: value for key, value in train1.items()}
|
||||
train2['img_prefix'] = train_img_prefix2
|
||||
train2['ann_file'] = train_ann_file2
|
||||
|
||||
train_list = [train1, train2]
|
|
@ -1,33 +0,0 @@
|
|||
# Text Recognition Training set, including:
|
||||
# Synthetic Datasets: SynthText, Syn90k
|
||||
|
||||
train_root = 'data/mixture'
|
||||
|
||||
train_img_prefix1 = f'{train_root}/Syn90k/mnt/ramdisk/max/90kDICT32px'
|
||||
train_ann_file1 = f'{train_root}/Syn90k/label.lmdb'
|
||||
|
||||
train1 = dict(
|
||||
type='OCRDataset',
|
||||
img_prefix=train_img_prefix1,
|
||||
ann_file=train_ann_file1,
|
||||
loader=dict(
|
||||
type='AnnFileLoader',
|
||||
repeat=1,
|
||||
file_format='lmdb',
|
||||
parser=dict(
|
||||
type='LineStrParser',
|
||||
keys=['filename', 'text'],
|
||||
keys_idx=[0, 1],
|
||||
separator=' ')),
|
||||
pipeline=None,
|
||||
test_mode=False)
|
||||
|
||||
train_img_prefix2 = f'{train_root}/SynthText/' + \
|
||||
'synthtext/SynthText_patch_horizontal'
|
||||
train_ann_file2 = f'{train_root}/SynthText/label.lmdb'
|
||||
|
||||
train2 = {key: value for key, value in train1.items()}
|
||||
train2['img_prefix'] = train_img_prefix2
|
||||
train2['ann_file'] = train_ann_file2
|
||||
|
||||
train_list = [train1, train2]
|
|
@ -1,81 +0,0 @@
|
|||
# Text Recognition Training set, including:
|
||||
# Synthetic Datasets: SynthText, SynthAdd, Syn90k
|
||||
# Real Dataset: IC11, IC13, IC15, COCO-Test, IIIT5k
|
||||
|
||||
train_prefix = 'data/mixture'
|
||||
|
||||
train_img_prefix1 = f'{train_prefix}/icdar_2011'
|
||||
train_img_prefix2 = f'{train_prefix}/icdar_2013'
|
||||
train_img_prefix3 = f'{train_prefix}/icdar_2015'
|
||||
train_img_prefix4 = f'{train_prefix}/coco_text'
|
||||
train_img_prefix5 = f'{train_prefix}/IIIT5K'
|
||||
train_img_prefix6 = f'{train_prefix}/SynthText_Add'
|
||||
train_img_prefix7 = f'{train_prefix}/SynthText'
|
||||
train_img_prefix8 = f'{train_prefix}/Syn90k'
|
||||
|
||||
train_ann_file1 = f'{train_prefix}/icdar_2011/train_label.txt',
|
||||
train_ann_file2 = f'{train_prefix}/icdar_2013/train_label.txt',
|
||||
train_ann_file3 = f'{train_prefix}/icdar_2015/train_label.txt',
|
||||
train_ann_file4 = f'{train_prefix}/coco_text/train_label.txt',
|
||||
train_ann_file5 = f'{train_prefix}/IIIT5K/train_label.txt',
|
||||
train_ann_file6 = f'{train_prefix}/SynthText_Add/label.txt',
|
||||
train_ann_file7 = f'{train_prefix}/SynthText/shuffle_labels.txt',
|
||||
train_ann_file8 = f'{train_prefix}/Syn90k/shuffle_labels.txt'
|
||||
|
||||
train1 = dict(
|
||||
type='OCRDataset',
|
||||
img_prefix=train_img_prefix1,
|
||||
ann_file=train_ann_file1,
|
||||
loader=dict(
|
||||
type='AnnFileLoader',
|
||||
repeat=20,
|
||||
file_format='txt',
|
||||
parser=dict(
|
||||
type='LineStrParser',
|
||||
keys=['filename', 'text'],
|
||||
keys_idx=[0, 1],
|
||||
separator=' ')),
|
||||
pipeline=None,
|
||||
test_mode=False)
|
||||
|
||||
train2 = {key: value for key, value in train1.items()}
|
||||
train2['img_prefix'] = train_img_prefix2
|
||||
train2['ann_file'] = train_ann_file2
|
||||
|
||||
train3 = {key: value for key, value in train1.items()}
|
||||
train3['img_prefix'] = train_img_prefix3
|
||||
train3['ann_file'] = train_ann_file3
|
||||
|
||||
train4 = {key: value for key, value in train1.items()}
|
||||
train4['img_prefix'] = train_img_prefix4
|
||||
train4['ann_file'] = train_ann_file4
|
||||
|
||||
train5 = {key: value for key, value in train1.items()}
|
||||
train5['img_prefix'] = train_img_prefix5
|
||||
train5['ann_file'] = train_ann_file5
|
||||
|
||||
train6 = dict(
|
||||
type='OCRDataset',
|
||||
img_prefix=train_img_prefix6,
|
||||
ann_file=train_ann_file6,
|
||||
loader=dict(
|
||||
type='AnnFileLoader',
|
||||
repeat=1,
|
||||
file_format='txt',
|
||||
parser=dict(
|
||||
type='LineStrParser',
|
||||
keys=['filename', 'text'],
|
||||
keys_idx=[0, 1],
|
||||
separator=' ')),
|
||||
pipeline=None,
|
||||
test_mode=False)
|
||||
|
||||
train7 = {key: value for key, value in train6.items()}
|
||||
train7['img_prefix'] = train_img_prefix7
|
||||
train7['ann_file'] = train_ann_file7
|
||||
|
||||
train8 = {key: value for key, value in train6.items()}
|
||||
train8['img_prefix'] = train_img_prefix8
|
||||
train8['ann_file'] = train_ann_file8
|
||||
|
||||
train_list = [train1, train2, train3, train4, train5, train6, train7, train8]
|
|
@ -1,41 +0,0 @@
|
|||
# Text Recognition Training set, including:
|
||||
# Synthetic Datasets: SynthText, Syn90k
|
||||
|
||||
train_root = 'data/mixture'
|
||||
|
||||
train_img_prefix1 = f'{train_root}/Syn90k/mnt/ramdisk/max/90kDICT32px'
|
||||
train_ann_file1 = f'{train_root}/Syn90k/label.lmdb'
|
||||
|
||||
train1 = dict(
|
||||
type='OCRDataset',
|
||||
img_prefix=train_img_prefix1,
|
||||
ann_file=train_ann_file1,
|
||||
loader=dict(
|
||||
type='AnnFileLoader',
|
||||
repeat=1,
|
||||
file_format='lmdb',
|
||||
parser=dict(
|
||||
type='LineStrParser',
|
||||
keys=['filename', 'text'],
|
||||
keys_idx=[0, 1],
|
||||
separator=' ')),
|
||||
pipeline=None,
|
||||
test_mode=False)
|
||||
|
||||
train_img_prefix2 = f'{train_root}/SynthText/' + \
|
||||
'synthtext/SynthText_patch_horizontal'
|
||||
train_ann_file2 = f'{train_root}/SynthText/label.lmdb'
|
||||
|
||||
train_img_prefix3 = f'{train_root}/SynthText_Add'
|
||||
train_ann_file3 = f'{train_root}/SynthText_Add/label.txt'
|
||||
|
||||
train2 = {key: value for key, value in train1.items()}
|
||||
train2['img_prefix'] = train_img_prefix2
|
||||
train2['ann_file'] = train_ann_file2
|
||||
|
||||
train3 = {key: value for key, value in train1.items()}
|
||||
train3['img_prefix'] = train_img_prefix3
|
||||
train3['ann_file'] = train_ann_file3
|
||||
train3['loader']['file_format'] = 'txt'
|
||||
|
||||
train_list = [train1, train2, train3]
|
|
@ -1,23 +0,0 @@
|
|||
# Text Recognition Training set, including:
|
||||
# Synthetic Datasets: SynthText (with character level boxes)
|
||||
|
||||
train_img_root = 'data/mixture'
|
||||
|
||||
train_img_prefix = f'{train_img_root}/SynthText'
|
||||
|
||||
train_ann_file = f'{train_img_root}/SynthText/instances_train.txt'
|
||||
|
||||
train = dict(
|
||||
type='OCRSegDataset',
|
||||
img_prefix=train_img_prefix,
|
||||
ann_file=train_ann_file,
|
||||
loader=dict(
|
||||
type='AnnFileLoader',
|
||||
repeat=1,
|
||||
file_format='txt',
|
||||
parser=dict(
|
||||
type='LineJsonParser', keys=['file_name', 'annotations', 'text'])),
|
||||
pipeline=None,
|
||||
test_mode=False)
|
||||
|
||||
train_list = [train]
|
|
@ -1,57 +0,0 @@
|
|||
# Text Recognition Testing set, including:
|
||||
# Regular Datasets: IIIT5K, SVT, IC13
|
||||
# Irregular Datasets: IC15, SVTP, CT80
|
||||
|
||||
test_root = 'data/mixture'
|
||||
|
||||
test_img_prefix1 = f'{test_root}/IIIT5K/'
|
||||
test_img_prefix2 = f'{test_root}/svt/'
|
||||
test_img_prefix3 = f'{test_root}/icdar_2013/'
|
||||
test_img_prefix4 = f'{test_root}/icdar_2015/'
|
||||
test_img_prefix5 = f'{test_root}/svtp/'
|
||||
test_img_prefix6 = f'{test_root}/ct80/'
|
||||
|
||||
test_ann_file1 = f'{test_root}/IIIT5K/test_label.txt'
|
||||
test_ann_file2 = f'{test_root}/svt/test_label.txt'
|
||||
test_ann_file3 = f'{test_root}/icdar_2013/test_label_1015.txt'
|
||||
test_ann_file4 = f'{test_root}/icdar_2015/test_label.txt'
|
||||
test_ann_file5 = f'{test_root}/svtp/test_label.txt'
|
||||
test_ann_file6 = f'{test_root}/ct80/test_label.txt'
|
||||
|
||||
test1 = dict(
|
||||
type='OCRDataset',
|
||||
img_prefix=test_img_prefix1,
|
||||
ann_file=test_ann_file1,
|
||||
loader=dict(
|
||||
type='AnnFileLoader',
|
||||
repeat=1,
|
||||
file_format='txt',
|
||||
parser=dict(
|
||||
type='LineStrParser',
|
||||
keys=['filename', 'text'],
|
||||
keys_idx=[0, 1],
|
||||
separator=' ')),
|
||||
pipeline=None,
|
||||
test_mode=True)
|
||||
|
||||
test2 = {key: value for key, value in test1.items()}
|
||||
test2['img_prefix'] = test_img_prefix2
|
||||
test2['ann_file'] = test_ann_file2
|
||||
|
||||
test3 = {key: value for key, value in test1.items()}
|
||||
test3['img_prefix'] = test_img_prefix3
|
||||
test3['ann_file'] = test_ann_file3
|
||||
|
||||
test4 = {key: value for key, value in test1.items()}
|
||||
test4['img_prefix'] = test_img_prefix4
|
||||
test4['ann_file'] = test_ann_file4
|
||||
|
||||
test5 = {key: value for key, value in test1.items()}
|
||||
test5['img_prefix'] = test_img_prefix5
|
||||
test5['ann_file'] = test_ann_file5
|
||||
|
||||
test6 = {key: value for key, value in test1.items()}
|
||||
test6['img_prefix'] = test_img_prefix6
|
||||
test6['ann_file'] = test_ann_file6
|
||||
|
||||
test_list = [test1, test2, test3, test4, test5, test6]
|
|
@ -1,34 +0,0 @@
|
|||
prefix = 'tests/data/ocr_char_ann_toy_dataset/'
|
||||
|
||||
train = dict(
|
||||
type='OCRSegDataset',
|
||||
img_prefix=f'{prefix}/imgs',
|
||||
ann_file=f'{prefix}/instances_train.txt',
|
||||
loader=dict(
|
||||
type='AnnFileLoader',
|
||||
repeat=100,
|
||||
file_format='txt',
|
||||
parser=dict(
|
||||
type='LineJsonParser', keys=['file_name', 'annotations', 'text'])),
|
||||
pipeline=None,
|
||||
test_mode=True)
|
||||
|
||||
test = dict(
|
||||
type='OCRDataset',
|
||||
img_prefix=f'{prefix}/imgs',
|
||||
ann_file=f'{prefix}/instances_test.txt',
|
||||
loader=dict(
|
||||
type='AnnFileLoader',
|
||||
repeat=1,
|
||||
file_format='txt',
|
||||
parser=dict(
|
||||
type='LineStrParser',
|
||||
keys=['filename', 'text'],
|
||||
keys_idx=[0, 1],
|
||||
separator=' ')),
|
||||
pipeline=None,
|
||||
test_mode=True)
|
||||
|
||||
train_list = [train]
|
||||
|
||||
test_list = [test]
|
|
@ -1,54 +0,0 @@
|
|||
dataset_type = 'OCRDataset'
|
||||
|
||||
root = 'tests/data/ocr_toy_dataset'
|
||||
img_prefix = f'{root}/imgs'
|
||||
train_anno_file1 = f'{root}/label.txt'
|
||||
|
||||
train1 = dict(
|
||||
type=dataset_type,
|
||||
img_prefix=img_prefix,
|
||||
ann_file=train_anno_file1,
|
||||
loader=dict(
|
||||
type='AnnFileLoader',
|
||||
repeat=100,
|
||||
file_format='txt',
|
||||
file_storage_backend='disk',
|
||||
parser=dict(
|
||||
type='LineStrParser',
|
||||
keys=['filename', 'text'],
|
||||
keys_idx=[0, 1],
|
||||
separator=' ')),
|
||||
pipeline=None,
|
||||
test_mode=False)
|
||||
|
||||
train_anno_file2 = f'{root}/label.lmdb'
|
||||
train2 = dict(
|
||||
type=dataset_type,
|
||||
img_prefix=img_prefix,
|
||||
ann_file=train_anno_file2,
|
||||
loader=dict(
|
||||
type='AnnFileLoader',
|
||||
repeat=100,
|
||||
file_format='lmdb',
|
||||
file_storage_backend='disk',
|
||||
parser=dict(type='LineJsonParser', keys=['filename', 'text'])),
|
||||
pipeline=None,
|
||||
test_mode=False)
|
||||
|
||||
test_anno_file1 = f'{root}/label.lmdb'
|
||||
test = dict(
|
||||
type=dataset_type,
|
||||
img_prefix=img_prefix,
|
||||
ann_file=test_anno_file1,
|
||||
loader=dict(
|
||||
type='AnnFileLoader',
|
||||
repeat=1,
|
||||
file_format='lmdb',
|
||||
file_storage_backend='disk',
|
||||
parser=dict(type='LineJsonParser', keys=['filename', 'text'])),
|
||||
pipeline=None,
|
||||
test_mode=True)
|
||||
|
||||
train_list = [train1, train2]
|
||||
|
||||
test_list = [test]
|
|
@ -1,70 +0,0 @@
|
|||
# num_chars depends on the configuration of label_convertor. The actual
|
||||
# dictionary size is 36 + 1 (<BOS/EOS>).
|
||||
# TODO: Automatically update num_chars based on the configuration of
|
||||
# label_convertor
|
||||
num_chars = 37
|
||||
max_seq_len = 26
|
||||
|
||||
label_convertor = dict(
|
||||
type='ABIConvertor',
|
||||
dict_type='DICT36',
|
||||
with_unknown=False,
|
||||
with_padding=False,
|
||||
lower=True,
|
||||
)
|
||||
|
||||
model = dict(
|
||||
type='ABINet',
|
||||
backbone=dict(type='ResNetABI'),
|
||||
encoder=dict(
|
||||
type='ABIVisionModel',
|
||||
encoder=dict(
|
||||
type='TransformerEncoder',
|
||||
n_layers=3,
|
||||
n_head=8,
|
||||
d_model=512,
|
||||
d_inner=2048,
|
||||
dropout=0.1,
|
||||
max_len=8 * 32,
|
||||
),
|
||||
decoder=dict(
|
||||
type='ABIVisionDecoder',
|
||||
in_channels=512,
|
||||
num_channels=64,
|
||||
attn_height=8,
|
||||
attn_width=32,
|
||||
attn_mode='nearest',
|
||||
use_result='feature',
|
||||
num_chars=num_chars,
|
||||
max_seq_len=max_seq_len,
|
||||
init_cfg=dict(type='Xavier', layer='Conv2d')),
|
||||
),
|
||||
decoder=dict(
|
||||
type='ABILanguageDecoder',
|
||||
d_model=512,
|
||||
n_head=8,
|
||||
d_inner=2048,
|
||||
n_layers=4,
|
||||
dropout=0.1,
|
||||
detach_tokens=True,
|
||||
use_self_attn=False,
|
||||
pad_idx=num_chars - 1,
|
||||
num_chars=num_chars,
|
||||
max_seq_len=max_seq_len,
|
||||
init_cfg=None),
|
||||
fuser=dict(
|
||||
type='ABIFuser',
|
||||
d_model=512,
|
||||
num_chars=num_chars,
|
||||
init_cfg=None,
|
||||
max_seq_len=max_seq_len,
|
||||
),
|
||||
loss=dict(
|
||||
type='ABILoss',
|
||||
enc_weight=1.0,
|
||||
dec_weight=1.0,
|
||||
fusion_weight=1.0,
|
||||
num_classes=num_chars),
|
||||
label_convertor=label_convertor,
|
||||
max_seq_len=max_seq_len,
|
||||
iter_size=3)
|
|
@ -1,12 +0,0 @@
|
|||
label_convertor = dict(
|
||||
type='CTCConvertor', dict_type='DICT36', with_unknown=False, lower=True)
|
||||
|
||||
model = dict(
|
||||
type='CRNNNet',
|
||||
preprocessor=None,
|
||||
backbone=dict(type='VeryDeepVgg', leaky_relu=False, input_channels=1),
|
||||
encoder=None,
|
||||
decoder=dict(type='CRNNDecoder', in_channels=512, rnn_flag=True),
|
||||
loss=dict(type='CTCLoss'),
|
||||
label_convertor=label_convertor,
|
||||
pretrained=None)
|
|
@ -1,18 +0,0 @@
|
|||
# model
|
||||
label_convertor = dict(
|
||||
type='CTCConvertor', dict_type='DICT36', with_unknown=False, lower=True)
|
||||
|
||||
model = dict(
|
||||
type='CRNNNet',
|
||||
preprocessor=dict(
|
||||
type='TPSPreprocessor',
|
||||
num_fiducial=20,
|
||||
img_size=(32, 100),
|
||||
rectified_img_size=(32, 100),
|
||||
num_img_channel=1),
|
||||
backbone=dict(type='VeryDeepVgg', leaky_relu=False, input_channels=1),
|
||||
encoder=None,
|
||||
decoder=dict(type='CRNNDecoder', in_channels=512, rnn_flag=True),
|
||||
loss=dict(type='CTCLoss'),
|
||||
label_convertor=label_convertor,
|
||||
pretrained=None)
|
|
@ -1,61 +0,0 @@
|
|||
label_convertor = dict(
|
||||
type='AttnConvertor', dict_type='DICT90', with_unknown=True)
|
||||
|
||||
model = dict(
|
||||
type='MASTER',
|
||||
backbone=dict(
|
||||
type='ResNet',
|
||||
in_channels=3,
|
||||
stem_channels=[64, 128],
|
||||
block_cfgs=dict(
|
||||
type='BasicBlock',
|
||||
plugins=dict(
|
||||
cfg=dict(
|
||||
type='GCAModule',
|
||||
ratio=0.0625,
|
||||
headers=1,
|
||||
pooling_type='att',
|
||||
is_att_scale=False,
|
||||
fusion_type='channel_add'),
|
||||
position='after_conv2')),
|
||||
arch_layers=[1, 2, 5, 3],
|
||||
arch_channels=[256, 256, 512, 512],
|
||||
strides=[1, 1, 1, 1],
|
||||
plugins=[
|
||||
dict(
|
||||
cfg=dict(type='Maxpool2d', kernel_size=2, stride=(2, 2)),
|
||||
stages=(True, True, False, False),
|
||||
position='before_stage'),
|
||||
dict(
|
||||
cfg=dict(type='Maxpool2d', kernel_size=(2, 1), stride=(2, 1)),
|
||||
stages=(False, False, True, False),
|
||||
position='before_stage'),
|
||||
dict(
|
||||
cfg=dict(
|
||||
type='ConvModule',
|
||||
kernel_size=3,
|
||||
stride=1,
|
||||
padding=1,
|
||||
norm_cfg=dict(type='BN'),
|
||||
act_cfg=dict(type='ReLU')),
|
||||
stages=(True, True, True, True),
|
||||
position='after_stage')
|
||||
],
|
||||
init_cfg=[
|
||||
dict(type='Kaiming', layer='Conv2d'),
|
||||
dict(type='Constant', val=1, layer='BatchNorm2d'),
|
||||
]),
|
||||
encoder=None,
|
||||
decoder=dict(
|
||||
type='MasterDecoder',
|
||||
d_model=512,
|
||||
n_head=8,
|
||||
attn_drop=0.,
|
||||
ffn_drop=0.,
|
||||
d_inner=2048,
|
||||
n_layers=3,
|
||||
feat_pe_drop=0.2,
|
||||
feat_size=6 * 40),
|
||||
loss=dict(type='TFLoss', reduction='mean'),
|
||||
label_convertor=label_convertor,
|
||||
max_seq_len=30)
|
|
@ -1,11 +0,0 @@
|
|||
label_convertor = dict(
|
||||
type='AttnConvertor', dict_type='DICT36', with_unknown=True, lower=True)
|
||||
|
||||
model = dict(
|
||||
type='NRTR',
|
||||
backbone=dict(type='NRTRModalityTransform'),
|
||||
encoder=dict(type='NRTREncoder', n_layers=12),
|
||||
decoder=dict(type='NRTRDecoder'),
|
||||
loss=dict(type='TFLoss'),
|
||||
label_convertor=label_convertor,
|
||||
max_seq_len=40)
|
|
@ -1,24 +0,0 @@
|
|||
label_convertor = dict(
|
||||
type='AttnConvertor', dict_type='DICT90', with_unknown=True)
|
||||
|
||||
hybrid_decoder = dict(type='SequenceAttentionDecoder')
|
||||
|
||||
position_decoder = dict(type='PositionAttentionDecoder')
|
||||
|
||||
model = dict(
|
||||
type='RobustScanner',
|
||||
backbone=dict(type='ResNet31OCR'),
|
||||
encoder=dict(
|
||||
type='ChannelReductionEncoder',
|
||||
in_channels=512,
|
||||
out_channels=128,
|
||||
),
|
||||
decoder=dict(
|
||||
type='RobustScannerDecoder',
|
||||
dim_input=512,
|
||||
dim_model=128,
|
||||
hybrid_decoder=hybrid_decoder,
|
||||
position_decoder=position_decoder),
|
||||
loss=dict(type='SARLoss'),
|
||||
label_convertor=label_convertor,
|
||||
max_seq_len=30)
|
|
@ -1,24 +0,0 @@
|
|||
label_convertor = dict(
|
||||
type='AttnConvertor', dict_type='DICT90', with_unknown=True)
|
||||
|
||||
model = dict(
|
||||
type='SARNet',
|
||||
backbone=dict(type='ResNet31OCR'),
|
||||
encoder=dict(
|
||||
type='SAREncoder',
|
||||
enc_bi_rnn=False,
|
||||
enc_do_rnn=0.1,
|
||||
enc_gru=False,
|
||||
),
|
||||
decoder=dict(
|
||||
type='ParallelSARDecoder',
|
||||
enc_bi_rnn=False,
|
||||
dec_bi_rnn=False,
|
||||
dec_do_rnn=0,
|
||||
dec_gru=False,
|
||||
pred_dropout=0.1,
|
||||
d_k=512,
|
||||
pred_concat=True),
|
||||
loss=dict(type='SARLoss'),
|
||||
label_convertor=label_convertor,
|
||||
max_seq_len=30)
|
|
@ -1,11 +0,0 @@
|
|||
label_convertor = dict(
|
||||
type='AttnConvertor', dict_type='DICT36', with_unknown=True, lower=True)
|
||||
|
||||
model = dict(
|
||||
type='SATRN',
|
||||
backbone=dict(type='ShallowCNN'),
|
||||
encoder=dict(type='SatrnEncoder'),
|
||||
decoder=dict(type='TFDecoder'),
|
||||
loss=dict(type='TFLoss'),
|
||||
label_convertor=label_convertor,
|
||||
max_seq_len=40)
|
|
@ -1,21 +0,0 @@
|
|||
label_convertor = dict(
|
||||
type='SegConvertor', dict_type='DICT36', with_unknown=True, lower=True)
|
||||
|
||||
model = dict(
|
||||
type='SegRecognizer',
|
||||
backbone=dict(
|
||||
type='ResNet31OCR',
|
||||
layers=[1, 2, 5, 3],
|
||||
channels=[32, 64, 128, 256, 512, 512],
|
||||
out_indices=[0, 1, 2, 3],
|
||||
stage4_pool_cfg=dict(kernel_size=2, stride=2),
|
||||
last_stage_pool=True),
|
||||
neck=dict(
|
||||
type='FPNOCR', in_channels=[128, 256, 512, 512], out_channels=256),
|
||||
head=dict(
|
||||
type='SegHead',
|
||||
in_channels=256,
|
||||
upsample_param=dict(scale_factor=2.0, mode='nearest')),
|
||||
loss=dict(
|
||||
type='SegLoss', seg_downsample_ratio=1.0, seg_with_loss_weight=True),
|
||||
label_convertor=label_convertor)
|
|
@ -1,96 +0,0 @@
|
|||
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='ResizeOCR',
|
||||
height=32,
|
||||
min_width=128,
|
||||
max_width=128,
|
||||
keep_aspect_ratio=False,
|
||||
width_downsample_ratio=0.25),
|
||||
dict(
|
||||
type='RandomWrapper',
|
||||
p=0.5,
|
||||
transforms=[
|
||||
dict(
|
||||
type='OneOfWrapper',
|
||||
transforms=[
|
||||
dict(
|
||||
type='RandomRotateTextDet',
|
||||
max_angle=15,
|
||||
),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='RandomAffine',
|
||||
degrees=15,
|
||||
translate=(0.3, 0.3),
|
||||
scale=(0.5, 2.),
|
||||
shear=(-45, 45),
|
||||
),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='RandomPerspective',
|
||||
distortion_scale=0.5,
|
||||
p=1,
|
||||
),
|
||||
])
|
||||
],
|
||||
),
|
||||
dict(
|
||||
type='RandomWrapper',
|
||||
p=0.25,
|
||||
transforms=[
|
||||
dict(type='PyramidRescale'),
|
||||
dict(
|
||||
type='Albu',
|
||||
transforms=[
|
||||
dict(type='GaussNoise', var_limit=(20, 20), p=0.5),
|
||||
dict(type='MotionBlur', blur_limit=6, p=0.5),
|
||||
]),
|
||||
]),
|
||||
dict(
|
||||
type='RandomWrapper',
|
||||
p=0.25,
|
||||
transforms=[
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=0.5,
|
||||
saturation=0.5,
|
||||
contrast=0.5,
|
||||
hue=0.1),
|
||||
]),
|
||||
dict(type='ToTensorOCR'),
|
||||
dict(type='NormalizeOCR', **img_norm_cfg),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img'],
|
||||
meta_keys=[
|
||||
'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio',
|
||||
'resize_shape'
|
||||
]),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiRotateAugOCR',
|
||||
rotate_degrees=[0, 90, 270],
|
||||
transforms=[
|
||||
dict(
|
||||
type='ResizeOCR',
|
||||
height=32,
|
||||
min_width=128,
|
||||
max_width=128,
|
||||
keep_aspect_ratio=False,
|
||||
width_downsample_ratio=0.25),
|
||||
dict(type='ToTensorOCR'),
|
||||
dict(type='NormalizeOCR', **img_norm_cfg),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img'],
|
||||
meta_keys=[
|
||||
'filename', 'ori_shape', 'img_shape', 'valid_ratio',
|
||||
'resize_shape', 'img_norm_cfg', 'ori_filename'
|
||||
]),
|
||||
])
|
||||
]
|
|
@ -1,35 +0,0 @@
|
|||
img_norm_cfg = dict(mean=[127], std=[127])
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='grayscale'),
|
||||
dict(
|
||||
type='ResizeOCR',
|
||||
height=32,
|
||||
min_width=100,
|
||||
max_width=100,
|
||||
keep_aspect_ratio=False),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img'],
|
||||
meta_keys=['filename', 'resize_shape', 'text', 'valid_ratio']),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='grayscale'),
|
||||
dict(
|
||||
type='ResizeOCR',
|
||||
height=32,
|
||||
min_width=32,
|
||||
max_width=None,
|
||||
keep_aspect_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img'],
|
||||
meta_keys=[
|
||||
'filename', 'resize_shape', 'valid_ratio', 'img_norm_cfg',
|
||||
'ori_filename', 'img_shape', 'ori_shape'
|
||||
]),
|
||||
]
|
|
@ -1,37 +0,0 @@
|
|||
img_norm_cfg = dict(mean=[0.5], std=[0.5])
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='grayscale'),
|
||||
dict(
|
||||
type='ResizeOCR',
|
||||
height=32,
|
||||
min_width=100,
|
||||
max_width=100,
|
||||
keep_aspect_ratio=False),
|
||||
dict(type='ToTensorOCR'),
|
||||
dict(type='NormalizeOCR', **img_norm_cfg),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img'],
|
||||
meta_keys=[
|
||||
'filename', 'ori_shape', 'resize_shape', 'text', 'valid_ratio'
|
||||
]),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='grayscale'),
|
||||
dict(
|
||||
type='ResizeOCR',
|
||||
height=32,
|
||||
min_width=32,
|
||||
max_width=100,
|
||||
keep_aspect_ratio=False),
|
||||
dict(type='ToTensorOCR'),
|
||||
dict(type='NormalizeOCR', **img_norm_cfg),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img'],
|
||||
meta_keys=[
|
||||
'filename', 'ori_shape', 'resize_shape', 'valid_ratio',
|
||||
'img_norm_cfg', 'ori_filename', 'img_shape'
|
||||
]),
|
||||
]
|
|
@ -1,42 +0,0 @@
|
|||
img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='ResizeOCR',
|
||||
height=48,
|
||||
min_width=48,
|
||||
max_width=160,
|
||||
keep_aspect_ratio=True),
|
||||
dict(type='ToTensorOCR'),
|
||||
dict(type='NormalizeOCR', **img_norm_cfg),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img'],
|
||||
meta_keys=[
|
||||
'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio',
|
||||
'resize_shape'
|
||||
]),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiRotateAugOCR',
|
||||
rotate_degrees=[0, 90, 270],
|
||||
transforms=[
|
||||
dict(
|
||||
type='ResizeOCR',
|
||||
height=48,
|
||||
min_width=48,
|
||||
max_width=160,
|
||||
keep_aspect_ratio=True),
|
||||
dict(type='ToTensorOCR'),
|
||||
dict(type='NormalizeOCR', **img_norm_cfg),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img'],
|
||||
meta_keys=[
|
||||
'filename', 'ori_shape', 'img_shape', 'valid_ratio',
|
||||
'img_norm_cfg', 'ori_filename', 'resize_shape'
|
||||
]),
|
||||
])
|
||||
]
|
|
@ -1,38 +0,0 @@
|
|||
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='ResizeOCR',
|
||||
height=32,
|
||||
min_width=32,
|
||||
max_width=160,
|
||||
keep_aspect_ratio=True,
|
||||
width_downsample_ratio=0.25),
|
||||
dict(type='ToTensorOCR'),
|
||||
dict(type='NormalizeOCR', **img_norm_cfg),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img'],
|
||||
meta_keys=[
|
||||
'filename', 'ori_shape', 'resize_shape', 'text', 'valid_ratio'
|
||||
]),
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='ResizeOCR',
|
||||
height=32,
|
||||
min_width=32,
|
||||
max_width=160,
|
||||
keep_aspect_ratio=True),
|
||||
dict(type='ToTensorOCR'),
|
||||
dict(type='NormalizeOCR', **img_norm_cfg),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img'],
|
||||
meta_keys=[
|
||||
'filename', 'ori_shape', 'resize_shape', 'valid_ratio',
|
||||
'img_norm_cfg', 'ori_filename', 'img_shape'
|
||||
])
|
||||
]
|
|
@ -1,43 +0,0 @@
|
|||
img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='ResizeOCR',
|
||||
height=48,
|
||||
min_width=48,
|
||||
max_width=160,
|
||||
keep_aspect_ratio=True,
|
||||
width_downsample_ratio=0.25),
|
||||
dict(type='ToTensorOCR'),
|
||||
dict(type='NormalizeOCR', **img_norm_cfg),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img'],
|
||||
meta_keys=[
|
||||
'filename', 'ori_shape', 'resize_shape', 'text', 'valid_ratio'
|
||||
]),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiRotateAugOCR',
|
||||
rotate_degrees=[0, 90, 270],
|
||||
transforms=[
|
||||
dict(
|
||||
type='ResizeOCR',
|
||||
height=48,
|
||||
min_width=48,
|
||||
max_width=160,
|
||||
keep_aspect_ratio=True,
|
||||
width_downsample_ratio=0.25),
|
||||
dict(type='ToTensorOCR'),
|
||||
dict(type='NormalizeOCR', **img_norm_cfg),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img'],
|
||||
meta_keys=[
|
||||
'filename', 'ori_shape', 'resize_shape', 'valid_ratio',
|
||||
'img_norm_cfg', 'ori_filename', 'img_shape'
|
||||
]),
|
||||
])
|
||||
]
|
|
@ -1,44 +0,0 @@
|
|||
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='ResizeOCR',
|
||||
height=32,
|
||||
min_width=100,
|
||||
max_width=100,
|
||||
keep_aspect_ratio=False,
|
||||
width_downsample_ratio=0.25),
|
||||
dict(type='ToTensorOCR'),
|
||||
dict(type='NormalizeOCR', **img_norm_cfg),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img'],
|
||||
meta_keys=[
|
||||
'filename', 'ori_shape', 'img_shape', 'text', 'valid_ratio',
|
||||
'resize_shape'
|
||||
]),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='MultiRotateAugOCR',
|
||||
rotate_degrees=[0, 90, 270],
|
||||
transforms=[
|
||||
dict(
|
||||
type='ResizeOCR',
|
||||
height=32,
|
||||
min_width=100,
|
||||
max_width=100,
|
||||
keep_aspect_ratio=False,
|
||||
width_downsample_ratio=0.25),
|
||||
dict(type='ToTensorOCR'),
|
||||
dict(type='NormalizeOCR', **img_norm_cfg),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img'],
|
||||
meta_keys=[
|
||||
'filename', 'ori_shape', 'img_shape', 'valid_ratio',
|
||||
'resize_shape', 'img_norm_cfg', 'ori_filename'
|
||||
]),
|
||||
])
|
||||
]
|
|
@ -1,66 +0,0 @@
|
|||
img_norm_cfg = dict(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
|
||||
|
||||
gt_label_convertor = dict(
|
||||
type='SegConvertor', dict_type='DICT36', with_unknown=True, lower=True)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='RandomPaddingOCR',
|
||||
max_ratio=[0.15, 0.2, 0.15, 0.2],
|
||||
box_type='char_quads'),
|
||||
dict(type='OpencvToPil'),
|
||||
dict(
|
||||
type='RandomRotateImageBox',
|
||||
min_angle=-17,
|
||||
max_angle=17,
|
||||
box_type='char_quads'),
|
||||
dict(type='PilToOpencv'),
|
||||
dict(
|
||||
type='ResizeOCR',
|
||||
height=64,
|
||||
min_width=64,
|
||||
max_width=512,
|
||||
keep_aspect_ratio=True),
|
||||
dict(
|
||||
type='OCRSegTargets',
|
||||
label_convertor=gt_label_convertor,
|
||||
box_type='char_quads'),
|
||||
dict(type='RandomRotateTextDet', rotate_ratio=0.5, max_angle=15),
|
||||
dict(type='ColorJitter', brightness=0.4, contrast=0.4, saturation=0.4),
|
||||
dict(type='ToTensorOCR'),
|
||||
dict(type='FancyPCA'),
|
||||
dict(type='NormalizeOCR', **img_norm_cfg),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_kernels'],
|
||||
visualize=dict(flag=False, boundary_key=None),
|
||||
call_super=False),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img', 'gt_kernels'],
|
||||
meta_keys=['filename', 'ori_shape', 'resize_shape'])
|
||||
]
|
||||
|
||||
test_img_norm_cfg = dict(
|
||||
mean=[x * 255 for x in img_norm_cfg['mean']],
|
||||
std=[x * 255 for x in img_norm_cfg['std']])
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(
|
||||
type='ResizeOCR',
|
||||
height=64,
|
||||
min_width=64,
|
||||
max_width=None,
|
||||
keep_aspect_ratio=True),
|
||||
dict(type='Normalize', **test_img_norm_cfg),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img'],
|
||||
meta_keys=[
|
||||
'filename', 'resize_shape', 'img_norm_cfg', 'ori_filename',
|
||||
'img_shape', 'ori_shape'
|
||||
])
|
||||
]
|
|
@ -1,8 +0,0 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='Adadelta', lr=0.5)
|
||||
optimizer_config = dict(grad_clip=dict(max_norm=0.5))
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[8, 14, 16])
|
||||
# running settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=18)
|
||||
checkpoint_config = dict(interval=1)
|
|
@ -1,8 +0,0 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='Adadelta', lr=1.0)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[])
|
||||
# running settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=5)
|
||||
checkpoint_config = dict(interval=1)
|
|
@ -1,8 +0,0 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='Adam', lr=1e-3)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
# learning policy
|
||||
lr_config = dict(policy='poly', power=0.9)
|
||||
# running settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=600)
|
||||
checkpoint_config = dict(interval=100)
|
|
@ -1,12 +0,0 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='Adam', lr=4e-4)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
# learning policy
|
||||
lr_config = dict(
|
||||
policy='step',
|
||||
warmup='linear',
|
||||
warmup_iters=100,
|
||||
warmup_ratio=1.0 / 3,
|
||||
step=[11])
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=12)
|
||||
checkpoint_config = dict(interval=1)
|
|
@ -1,14 +0,0 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='Adam', lr=1e-4)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
# learning policy
|
||||
lr_config = dict(
|
||||
policy='step',
|
||||
step=[16, 18],
|
||||
warmup='linear',
|
||||
warmup_iters=1,
|
||||
warmup_ratio=0.001,
|
||||
warmup_by_epoch=True)
|
||||
# running settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=20)
|
||||
checkpoint_config = dict(interval=1)
|
|
@ -1,8 +0,0 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='Adam', lr=1e-3)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[3, 4])
|
||||
# running settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=5)
|
||||
checkpoint_config = dict(interval=1)
|
|
@ -1,8 +0,0 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='Adam', lr=1e-4)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[200, 400])
|
||||
# running settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=600)
|
||||
checkpoint_config = dict(interval=100)
|
|
@ -1,8 +0,0 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='Adam', lr=1e-3)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[3, 4])
|
||||
# running settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=6)
|
||||
checkpoint_config = dict(interval=1)
|
|
@ -1,8 +0,0 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
# learning policy
|
||||
lr_config = dict(policy='poly', power=0.9, min_lr=1e-7, by_epoch=False)
|
||||
# running settings
|
||||
runner = dict(type='IterBasedRunner', max_iters=100000)
|
||||
checkpoint_config = dict(interval=10000)
|
|
@ -1,8 +0,0 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
# learning policy
|
||||
lr_config = dict(policy='poly', power=0.9, min_lr=1e-7, by_epoch=True)
|
||||
# running settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=1200)
|
||||
checkpoint_config = dict(interval=100)
|
|
@ -1,8 +0,0 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=1e-3, momentum=0.90, weight_decay=5e-4)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
# learning policy
|
||||
lr_config = dict(policy='poly', power=0.9, min_lr=1e-7, by_epoch=True)
|
||||
# running settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=1500)
|
||||
checkpoint_config = dict(interval=100)
|
|
@ -1,13 +0,0 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=0.08, momentum=0.9, weight_decay=0.0001)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
# learning policy
|
||||
lr_config = dict(
|
||||
policy='step',
|
||||
warmup='linear',
|
||||
warmup_iters=500,
|
||||
warmup_ratio=0.001,
|
||||
step=[80, 128])
|
||||
# running settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=160)
|
||||
checkpoint_config = dict(interval=10)
|
|
@ -1,8 +0,0 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=1e-3, momentum=0.99, weight_decay=5e-4)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[200, 400])
|
||||
# running settings
|
||||
runner = dict(type='EpochBasedRunner', max_epochs=600)
|
||||
checkpoint_config = dict(interval=100)
|
|
@ -0,0 +1,41 @@
|
|||
# oCLIP
|
||||
|
||||
> [Language Matters: A Weakly Supervised Vision-Language Pre-training Approach for Scene Text Detection and Spotting](https://www.ecva.net/papers/eccv_2022/papers_ECCV/papers/136880282.pdf)
|
||||
|
||||
<!-- [ALGORITHM] -->
|
||||
|
||||
## Abstract
|
||||
|
||||
Recently, Vision-Language Pre-training (VLP) techniques have greatly benefited various vision-language tasks by jointly learning visual and textual representations, which intuitively helps in Optical Character Recognition (OCR) tasks due to the rich visual and textual information in scene text images. However, these methods cannot well cope with OCR tasks because of the difficulty in both instance-level text encoding and image-text pair acquisition (i.e. images and captured texts in them). This paper presents a weakly supervised pre-training method, oCLIP, which can acquire effective scene text representations by jointly learning and aligning visual and textual information. Our network consists of an image encoder and a character-aware text encoder that extract visual and textual features, respectively, as well as a visual-textual decoder that models the interaction among textual and visual features for learning effective scene text representations. With the learning of textual features, the pre-trained model can attend texts in images well with character awareness. Besides, these designs enable the learning from weakly annotated texts (i.e. partial texts in images without text bounding boxes) which mitigates the data annotation constraint greatly. Experiments over the weakly annotated images in ICDAR2019-LSVT show that our pre-trained model improves F-score by +2.5% and +4.8% while transferring its weights to other text detection and spotting networks, respectively. In addition, the proposed method outperforms existing pre-training techniques consistently across multiple public datasets (e.g., +3.2% and +1.3% for Total-Text and CTW1500).
|
||||
|
||||
<div align=center>
|
||||
<img src="https://user-images.githubusercontent.com/24622904/199475057-aa688422-518d-4d7a-86fc-1be0cc1b5dc6.png"/>
|
||||
</div>
|
||||
|
||||
## Models
|
||||
|
||||
| Backbone | Pre-train Data | Model |
|
||||
| :-------: | :------------: | :-------------------------------------------------------------------------------: |
|
||||
| ResNet-50 | SynthText | [Link](https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth) |
|
||||
|
||||
```{note}
|
||||
The model is converted from the official [oCLIP](https://github.com/bytedance/oclip.git).
|
||||
```
|
||||
|
||||
## Supported Text Detection Models
|
||||
|
||||
| | [DBNet](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#dbnet) | [DBNet++](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#dbnetpp) | [FCENet](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#fcenet) | [TextSnake](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#fcenet) | [PSENet](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#psenet) | [DRRG](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#drrg) | [Mask R-CNN](https://mmocr.readthedocs.io/en/dev-1.x/textdet_models.html#mask-r-cnn) |
|
||||
| :-------: | :------------------------------------------------------------------------: | :----------------------------------------------------------------------------: | :--------------------------------------------------------------------------: | :-----------------------------------------------------------------------------: | :--------------------------------------------------------------------------: | :----------------------------------------------------------------------: | :----------------------------------------------------------------------------------: |
|
||||
| ICDAR2015 | ✓ | ✓ | ✓ | | ✓ | | ✓ |
|
||||
| CTW1500 | | | ✓ | ✓ | ✓ | ✓ | ✓ |
|
||||
|
||||
## Citation
|
||||
|
||||
```bibtex
|
||||
@article{xue2022language,
|
||||
title={Language Matters: A Weakly Supervised Vision-Language Pre-training Approach for Scene Text Detection and Spotting},
|
||||
author={Xue, Chuhui and Zhang, Wenqing and Hao, Yu and Lu, Shijian and Torr, Philip and Bai, Song},
|
||||
journal={Proceedings of the European Conference on Computer Vision (ECCV)},
|
||||
year={2022}
|
||||
}
|
||||
```
|
|
@ -0,0 +1,13 @@
|
|||
Collections:
|
||||
- Name: oCLIP
|
||||
Metadata:
|
||||
Training Data: SynthText
|
||||
Architecture:
|
||||
- CLIPResNet
|
||||
Paper:
|
||||
URL: https://arxiv.org/abs/2203.03911
|
||||
Title: 'Language Matters: A Weakly Supervised Vision-Language Pre-training Approach for Scene Text Detection and Spotting'
|
||||
README: configs/backbone/oclip/README.md
|
||||
|
||||
Models:
|
||||
Weights: https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth
|
|
@ -0,0 +1,26 @@
|
|||
wildreceipt_openset_data_root = 'data/wildreceipt/'
|
||||
|
||||
wildreceipt_openset_train = dict(
|
||||
type='WildReceiptDataset',
|
||||
data_root=wildreceipt_openset_data_root,
|
||||
metainfo=dict(category=[
|
||||
dict(id=0, name='bg'),
|
||||
dict(id=1, name='key'),
|
||||
dict(id=2, name='value'),
|
||||
dict(id=3, name='other')
|
||||
]),
|
||||
ann_file='openset_train.txt',
|
||||
pipeline=None)
|
||||
|
||||
wildreceipt_openset_test = dict(
|
||||
type='WildReceiptDataset',
|
||||
data_root=wildreceipt_openset_data_root,
|
||||
metainfo=dict(category=[
|
||||
dict(id=0, name='bg'),
|
||||
dict(id=1, name='key'),
|
||||
dict(id=2, name='value'),
|
||||
dict(id=3, name='other')
|
||||
]),
|
||||
ann_file='openset_test.txt',
|
||||
test_mode=True,
|
||||
pipeline=None)
|
|
@ -0,0 +1,16 @@
|
|||
wildreceipt_data_root = 'data/wildreceipt/'
|
||||
|
||||
wildreceipt_train = dict(
|
||||
type='WildReceiptDataset',
|
||||
data_root=wildreceipt_data_root,
|
||||
metainfo=wildreceipt_data_root + 'class_list.txt',
|
||||
ann_file='train.txt',
|
||||
pipeline=None)
|
||||
|
||||
wildreceipt_test = dict(
|
||||
type='WildReceiptDataset',
|
||||
data_root=wildreceipt_data_root,
|
||||
metainfo=wildreceipt_data_root + 'class_list.txt',
|
||||
ann_file='test.txt',
|
||||
test_mode=True,
|
||||
pipeline=None)
|
|
@ -0,0 +1,33 @@
|
|||
default_scope = 'mmocr'
|
||||
env_cfg = dict(
|
||||
cudnn_benchmark=False,
|
||||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||
dist_cfg=dict(backend='nccl'),
|
||||
)
|
||||
randomness = dict(seed=None)
|
||||
|
||||
default_hooks = dict(
|
||||
timer=dict(type='IterTimerHook'),
|
||||
logger=dict(type='LoggerHook', interval=100),
|
||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||
checkpoint=dict(type='CheckpointHook', interval=1),
|
||||
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||
sync_buffer=dict(type='SyncBuffersHook'),
|
||||
visualization=dict(
|
||||
type='VisualizationHook',
|
||||
interval=1,
|
||||
enable=False,
|
||||
show=False,
|
||||
draw_gt=False,
|
||||
draw_pred=False),
|
||||
)
|
||||
|
||||
# Logging
|
||||
log_level = 'INFO'
|
||||
log_processor = dict(type='LogProcessor', window_size=10, by_epoch=True)
|
||||
|
||||
load_from = None
|
||||
resume = False
|
||||
|
||||
visualizer = dict(
|
||||
type='KIELocalVisualizer', name='visualizer', is_openset=False)
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue