mirror of https://github.com/open-mmlab/mmocr.git
[Checkpoints] Add ST-pretrained DB-series models and logs (#1635)
* [Fix] Auto scale lr * updatepull/1708/head
parent
c870046a4a
commit
2a2cab3c8c
|
@ -0,0 +1,14 @@
|
|||
_base_ = 'default_runtime.py'
|
||||
|
||||
default_hooks = dict(
|
||||
logger=dict(type='LoggerHook', interval=1000),
|
||||
checkpoint=dict(
|
||||
type='CheckpointHook',
|
||||
interval=10000,
|
||||
by_epoch=False,
|
||||
max_keep_ckpts=1),
|
||||
)
|
||||
|
||||
# Evaluation
|
||||
val_evaluator = None
|
||||
test_evaluator = None
|
|
@ -4,7 +4,7 @@ optim_wrapper = dict(
|
|||
optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001))
|
||||
|
||||
train_cfg = dict(type='IterBasedTrainLoop', max_iters=100000)
|
||||
test_cfg = dict(type='TestLoop')
|
||||
test_cfg = None
|
||||
val_cfg = None
|
||||
# learning policy
|
||||
param_scheduler = [
|
||||
|
|
|
@ -14,6 +14,12 @@ Recently, segmentation-based methods are quite popular in scene text detection,
|
|||
|
||||
## Results and models
|
||||
|
||||
### SynthText
|
||||
|
||||
| Method | Backbone | Training set | #iters | Download |
|
||||
| :-----------------------------------------------------------------------: | :------: | :----------: | :-----: | :--------------------------------------------------------------------------------------------------: |
|
||||
| [DBNet_r18](/configs/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext.py) | ResNet18 | SynthText | 100,000 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext/dbnet_resnet18_fpnc_100k_synthtext-2e9bf392.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext/20221214_150351.log) |
|
||||
|
||||
### ICDAR2015
|
||||
|
||||
| Method | Backbone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
|
|
|
@ -1,15 +1,44 @@
|
|||
_base_ = [
|
||||
'_base_dbnet_resnet18_fpnc.py',
|
||||
'../_base_/datasets/synthtext.py',
|
||||
'../_base_/default_runtime.py',
|
||||
'../_base_/pretrain_runtime.py',
|
||||
'../_base_/schedules/schedule_sgd_100k.py',
|
||||
]
|
||||
|
||||
file_client_args = dict(backend='disk')
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(type='FixInvalidPolygon'),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5),
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
|
||||
dict(type='RandomCrop', min_side_ratio=0.1),
|
||||
dict(type='Resize', scale=(640, 640), keep_ratio=True),
|
||||
dict(type='Pad', size=(640, 640)),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape'))
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
synthtext_textdet_train = _base_.synthtext_textdet_train
|
||||
synthtext_textdet_train.pipeline = _base_.train_pipeline
|
||||
synthtext_textdet_test = _base_.synthtext_textdet_test
|
||||
synthtext_textdet_test.pipeline = _base_.test_pipeline
|
||||
synthtext_textdet_train.pipeline = train_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
|
@ -18,13 +47,4 @@ train_dataloader = dict(
|
|||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=synthtext_textdet_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=synthtext_textdet_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
auto_scale_lr = dict(base_batch_size=16)
|
||||
|
|
|
@ -14,12 +14,18 @@ Recently, segmentation-based scene text detection methods have drawn extensive a
|
|||
|
||||
## Results and models
|
||||
|
||||
### SynthText
|
||||
|
||||
| Method | BackBone | Training set | #iters | Download |
|
||||
| :--------------------------------------------------------------------------------: | :------------: | :----------: | :-----: | :-----------------------------------------------------------------------------------: |
|
||||
| [DBNetpp_r50dcn](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py) | ResNet50-dcnv2 | SynthText | 100,000 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext-00f0a80b.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext/20221215_013531.log) |
|
||||
|
||||
### ICDAR2015
|
||||
|
||||
| Method | BackBone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download |
|
||||
| :----------------------------: | :------------------------------: | :--------------------------------------: | :-------------: | :------------: | :-----: | :-------: | :-------: | :----: | :----: | :------------------------------: |
|
||||
| [DBNetpp_r50](/configs/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015.py) | ResNet50 | - | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9079 | 0.8209 | 0.8622 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015/dbnetpp_resnet50_fpnc_1200e_icdar2015_20221025_185550-013730aa.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015/20221025_185550.log) |
|
||||
| [DBNetpp_r50dcn](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py) | ResNet50 | [Synthtext](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py) ([model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/tmp_1.0_pretrain/dbnetpp_r50dcnv2_fpnc_100k_iter_synthtext-20220502-352fec8a.pth)) | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9116 | 0.8291 | 0.8684 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015_20220829_230108-f289bd20.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/20220829_230108.log) |
|
||||
| [DBNetpp_r50dcn](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py) | ResNet50-dcnv2 | [Synthtext](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py) ([model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/tmp_1.0_pretrain/dbnetpp_r50dcnv2_fpnc_100k_iter_synthtext-20220502-352fec8a.pth)) | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9116 | 0.8291 | 0.8684 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015_20220829_230108-f289bd20.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/20220829_230108.log) |
|
||||
| [DBNetpp_r50-oclip](/configs/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015.py) | [ResNet50-oCLIP](https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth) | - | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9174 | 0.8609 | 0.8882 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015_20221101_124139-4ecb39ac.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015/20221101_124139.log) |
|
||||
|
||||
## Citation
|
||||
|
|
|
@ -1,34 +1,44 @@
|
|||
_base_ = [
|
||||
'_base_dbnetpp_resnet50-dcnv2_fpnc.py',
|
||||
'../_base_/default_runtime.py',
|
||||
'../_base_/pretrain_runtime.py',
|
||||
'../_base_/datasets/synthtext.py',
|
||||
'../_base_/schedules/schedule_sgd_100k.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = [_base_.synthtext_textdet_train]
|
||||
test_list = [_base_.synthtext_textdet_test]
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_bbox=True,
|
||||
with_polygon=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(type='FixInvalidPolygon'),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5),
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
|
||||
dict(type='RandomCrop', min_side_ratio=0.1),
|
||||
dict(type='Resize', scale=(640, 640), keep_ratio=True),
|
||||
dict(type='Pad', size=(640, 640)),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape'))
|
||||
]
|
||||
|
||||
synthtext_textdet_train = _base_.synthtext_textdet_train
|
||||
synthtext_textdet_train.pipeline = train_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='ConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=_base_.train_pipeline))
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type='ConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=_base_.test_pipeline))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
dataset=synthtext_textdet_train)
|
||||
|
||||
auto_scale_lr = dict(base_batch_size=16)
|
||||
|
|
Loading…
Reference in New Issue