From 2a2cab3c8c17cac31e6d00f2b68f9eb467a15727 Mon Sep 17 00:00:00 2001 From: Tong Gao Date: Mon, 6 Feb 2023 15:16:08 +0800 Subject: [PATCH] [Checkpoints] Add ST-pretrained DB-series models and logs (#1635) * [Fix] Auto scale lr * update --- configs/textdet/_base_/pretrain_runtime.py | 14 ++++++ .../_base_/schedules/schedule_sgd_100k.py | 2 +- configs/textdet/dbnet/README.md | 6 +++ .../dbnet_resnet18_fpnc_100k_synthtext.py | 46 ++++++++++++----- configs/textdet/dbnetpp/README.md | 8 ++- ...etpp_resnet50-dcnv2_fpnc_100k_synthtext.py | 50 +++++++++++-------- 6 files changed, 91 insertions(+), 35 deletions(-) create mode 100644 configs/textdet/_base_/pretrain_runtime.py diff --git a/configs/textdet/_base_/pretrain_runtime.py b/configs/textdet/_base_/pretrain_runtime.py new file mode 100644 index 00000000..cb2800d5 --- /dev/null +++ b/configs/textdet/_base_/pretrain_runtime.py @@ -0,0 +1,14 @@ +_base_ = 'default_runtime.py' + +default_hooks = dict( + logger=dict(type='LoggerHook', interval=1000), + checkpoint=dict( + type='CheckpointHook', + interval=10000, + by_epoch=False, + max_keep_ckpts=1), +) + +# Evaluation +val_evaluator = None +test_evaluator = None diff --git a/configs/textdet/_base_/schedules/schedule_sgd_100k.py b/configs/textdet/_base_/schedules/schedule_sgd_100k.py index 61286916..f760774b 100644 --- a/configs/textdet/_base_/schedules/schedule_sgd_100k.py +++ b/configs/textdet/_base_/schedules/schedule_sgd_100k.py @@ -4,7 +4,7 @@ optim_wrapper = dict( optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001)) train_cfg = dict(type='IterBasedTrainLoop', max_iters=100000) -test_cfg = dict(type='TestLoop') +test_cfg = None val_cfg = None # learning policy param_scheduler = [ diff --git a/configs/textdet/dbnet/README.md b/configs/textdet/dbnet/README.md index 442e0f65..07c91edb 100644 --- a/configs/textdet/dbnet/README.md +++ b/configs/textdet/dbnet/README.md @@ -14,6 +14,12 @@ Recently, segmentation-based methods are quite popular in scene text detection, ## Results and models +### SynthText + +| Method | Backbone | Training set | #iters | Download | +| :-----------------------------------------------------------------------: | :------: | :----------: | :-----: | :--------------------------------------------------------------------------------------------------: | +| [DBNet_r18](/configs/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext.py) | ResNet18 | SynthText | 100,000 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext/dbnet_resnet18_fpnc_100k_synthtext-2e9bf392.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext/20221214_150351.log) | + ### ICDAR2015 | Method | Backbone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download | diff --git a/configs/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext.py b/configs/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext.py index 8ea33b14..f02528b5 100644 --- a/configs/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext.py +++ b/configs/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext.py @@ -1,15 +1,44 @@ _base_ = [ '_base_dbnet_resnet18_fpnc.py', '../_base_/datasets/synthtext.py', - '../_base_/default_runtime.py', + '../_base_/pretrain_runtime.py', '../_base_/schedules/schedule_sgd_100k.py', ] +file_client_args = dict(backend='disk') + +train_pipeline = [ + dict( + type='LoadImageFromFile', + file_client_args=file_client_args, + color_type='color_ignore_orientation'), + dict( + type='LoadOCRAnnotations', + with_polygon=True, + with_bbox=True, + with_label=True, + ), + dict(type='FixInvalidPolygon'), + dict( + type='TorchVisionWrapper', + op='ColorJitter', + brightness=32.0 / 255, + saturation=0.5), + dict( + type='ImgAugWrapper', + args=[['Fliplr', 0.5], + dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]), + dict(type='RandomCrop', min_side_ratio=0.1), + dict(type='Resize', scale=(640, 640), keep_ratio=True), + dict(type='Pad', size=(640, 640)), + dict( + type='PackTextDetInputs', + meta_keys=('img_path', 'ori_shape', 'img_shape')) +] + # dataset settings synthtext_textdet_train = _base_.synthtext_textdet_train -synthtext_textdet_train.pipeline = _base_.train_pipeline -synthtext_textdet_test = _base_.synthtext_textdet_test -synthtext_textdet_test.pipeline = _base_.test_pipeline +synthtext_textdet_train.pipeline = train_pipeline train_dataloader = dict( batch_size=16, @@ -18,13 +47,4 @@ train_dataloader = dict( sampler=dict(type='DefaultSampler', shuffle=True), dataset=synthtext_textdet_train) -val_dataloader = dict( - batch_size=1, - num_workers=4, - persistent_workers=True, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=synthtext_textdet_test) - -test_dataloader = val_dataloader - auto_scale_lr = dict(base_batch_size=16) diff --git a/configs/textdet/dbnetpp/README.md b/configs/textdet/dbnetpp/README.md index e32cb172..7f9b668a 100644 --- a/configs/textdet/dbnetpp/README.md +++ b/configs/textdet/dbnetpp/README.md @@ -14,12 +14,18 @@ Recently, segmentation-based scene text detection methods have drawn extensive a ## Results and models +### SynthText + +| Method | BackBone | Training set | #iters | Download | +| :--------------------------------------------------------------------------------: | :------------: | :----------: | :-----: | :-----------------------------------------------------------------------------------: | +| [DBNetpp_r50dcn](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py) | ResNet50-dcnv2 | SynthText | 100,000 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext-00f0a80b.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext/20221215_013531.log) | + ### ICDAR2015 | Method | BackBone | Pretrained Model | Training set | Test set | #epochs | Test size | Precision | Recall | Hmean | Download | | :----------------------------: | :------------------------------: | :--------------------------------------: | :-------------: | :------------: | :-----: | :-------: | :-------: | :----: | :----: | :------------------------------: | | [DBNetpp_r50](/configs/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015.py) | ResNet50 | - | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9079 | 0.8209 | 0.8622 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015/dbnetpp_resnet50_fpnc_1200e_icdar2015_20221025_185550-013730aa.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015/20221025_185550.log) | -| [DBNetpp_r50dcn](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py) | ResNet50 | [Synthtext](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py) ([model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/tmp_1.0_pretrain/dbnetpp_r50dcnv2_fpnc_100k_iter_synthtext-20220502-352fec8a.pth)) | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9116 | 0.8291 | 0.8684 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015_20220829_230108-f289bd20.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/20220829_230108.log) | +| [DBNetpp_r50dcn](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py) | ResNet50-dcnv2 | [Synthtext](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py) ([model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/tmp_1.0_pretrain/dbnetpp_r50dcnv2_fpnc_100k_iter_synthtext-20220502-352fec8a.pth)) | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9116 | 0.8291 | 0.8684 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015_20220829_230108-f289bd20.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/20220829_230108.log) | | [DBNetpp_r50-oclip](/configs/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015.py) | [ResNet50-oCLIP](https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth) | - | ICDAR2015 Train | ICDAR2015 Test | 1200 | 1024 | 0.9174 | 0.8609 | 0.8882 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015_20221101_124139-4ecb39ac.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015/20221101_124139.log) | ## Citation diff --git a/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py b/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py index c3db2f84..7174055d 100644 --- a/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py +++ b/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py @@ -1,34 +1,44 @@ _base_ = [ '_base_dbnetpp_resnet50-dcnv2_fpnc.py', - '../_base_/default_runtime.py', + '../_base_/pretrain_runtime.py', '../_base_/datasets/synthtext.py', '../_base_/schedules/schedule_sgd_100k.py', ] -# dataset settings -train_list = [_base_.synthtext_textdet_train] -test_list = [_base_.synthtext_textdet_test] +train_pipeline = [ + dict(type='LoadImageFromFile', color_type='color_ignore_orientation'), + dict( + type='LoadOCRAnnotations', + with_bbox=True, + with_polygon=True, + with_label=True, + ), + dict(type='FixInvalidPolygon'), + dict( + type='TorchVisionWrapper', + op='ColorJitter', + brightness=32.0 / 255, + saturation=0.5), + dict( + type='ImgAugWrapper', + args=[['Fliplr', 0.5], + dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]), + dict(type='RandomCrop', min_side_ratio=0.1), + dict(type='Resize', scale=(640, 640), keep_ratio=True), + dict(type='Pad', size=(640, 640)), + dict( + type='PackTextDetInputs', + meta_keys=('img_path', 'ori_shape', 'img_shape')) +] + +synthtext_textdet_train = _base_.synthtext_textdet_train +synthtext_textdet_train.pipeline = train_pipeline train_dataloader = dict( batch_size=16, num_workers=8, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=True), - dataset=dict( - type='ConcatDataset', - datasets=train_list, - pipeline=_base_.train_pipeline)) - -val_dataloader = dict( - batch_size=16, - num_workers=8, - persistent_workers=True, - sampler=dict(type='DefaultSampler', shuffle=False), - dataset=dict( - type='ConcatDataset', - datasets=test_list, - pipeline=_base_.test_pipeline)) - -test_dataloader = val_dataloader + dataset=synthtext_textdet_train) auto_scale_lr = dict(base_batch_size=16)