From 2a2cab3c8c17cac31e6d00f2b68f9eb467a15727 Mon Sep 17 00:00:00 2001
From: Tong Gao <gaotongxiao@gmail.com>
Date: Mon, 6 Feb 2023 15:16:08 +0800
Subject: [PATCH] [Checkpoints] Add ST-pretrained DB-series models and logs
 (#1635)

* [Fix] Auto scale lr

* update
---
 configs/textdet/_base_/pretrain_runtime.py    | 14 ++++++
 .../_base_/schedules/schedule_sgd_100k.py     |  2 +-
 configs/textdet/dbnet/README.md               |  6 +++
 .../dbnet_resnet18_fpnc_100k_synthtext.py     | 46 ++++++++++++-----
 configs/textdet/dbnetpp/README.md             |  8 ++-
 ...etpp_resnet50-dcnv2_fpnc_100k_synthtext.py | 50 +++++++++++--------
 6 files changed, 91 insertions(+), 35 deletions(-)
 create mode 100644 configs/textdet/_base_/pretrain_runtime.py

diff --git a/configs/textdet/_base_/pretrain_runtime.py b/configs/textdet/_base_/pretrain_runtime.py
new file mode 100644
index 00000000..cb2800d5
--- /dev/null
+++ b/configs/textdet/_base_/pretrain_runtime.py
@@ -0,0 +1,14 @@
+_base_ = 'default_runtime.py'
+
+default_hooks = dict(
+    logger=dict(type='LoggerHook', interval=1000),
+    checkpoint=dict(
+        type='CheckpointHook',
+        interval=10000,
+        by_epoch=False,
+        max_keep_ckpts=1),
+)
+
+# Evaluation
+val_evaluator = None
+test_evaluator = None
diff --git a/configs/textdet/_base_/schedules/schedule_sgd_100k.py b/configs/textdet/_base_/schedules/schedule_sgd_100k.py
index 61286916..f760774b 100644
--- a/configs/textdet/_base_/schedules/schedule_sgd_100k.py
+++ b/configs/textdet/_base_/schedules/schedule_sgd_100k.py
@@ -4,7 +4,7 @@ optim_wrapper = dict(
     optimizer=dict(type='SGD', lr=0.007, momentum=0.9, weight_decay=0.0001))
 
 train_cfg = dict(type='IterBasedTrainLoop', max_iters=100000)
-test_cfg = dict(type='TestLoop')
+test_cfg = None
 val_cfg = None
 # learning policy
 param_scheduler = [
diff --git a/configs/textdet/dbnet/README.md b/configs/textdet/dbnet/README.md
index 442e0f65..07c91edb 100644
--- a/configs/textdet/dbnet/README.md
+++ b/configs/textdet/dbnet/README.md
@@ -14,6 +14,12 @@ Recently, segmentation-based methods are quite popular in scene text detection,
 
 ## Results and models
 
+### SynthText
+
+|                                  Method                                   | Backbone | Training set | #iters  |                                               Download                                               |
+| :-----------------------------------------------------------------------: | :------: | :----------: | :-----: | :--------------------------------------------------------------------------------------------------: |
+| [DBNet_r18](/configs/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext.py) | ResNet18 |  SynthText   | 100,000 | [model](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext/dbnet_resnet18_fpnc_100k_synthtext-2e9bf392.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext/20221214_150351.log) |
+
 ### ICDAR2015
 
 |             Method             |             Backbone             |             Pretrained Model             |  Training set   |    Test set    | #epochs | Test size | Precision | Recall | Hmean  |             Download             |
diff --git a/configs/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext.py b/configs/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext.py
index 8ea33b14..f02528b5 100644
--- a/configs/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext.py
+++ b/configs/textdet/dbnet/dbnet_resnet18_fpnc_100k_synthtext.py
@@ -1,15 +1,44 @@
 _base_ = [
     '_base_dbnet_resnet18_fpnc.py',
     '../_base_/datasets/synthtext.py',
-    '../_base_/default_runtime.py',
+    '../_base_/pretrain_runtime.py',
     '../_base_/schedules/schedule_sgd_100k.py',
 ]
 
+file_client_args = dict(backend='disk')
+
+train_pipeline = [
+    dict(
+        type='LoadImageFromFile',
+        file_client_args=file_client_args,
+        color_type='color_ignore_orientation'),
+    dict(
+        type='LoadOCRAnnotations',
+        with_polygon=True,
+        with_bbox=True,
+        with_label=True,
+    ),
+    dict(type='FixInvalidPolygon'),
+    dict(
+        type='TorchVisionWrapper',
+        op='ColorJitter',
+        brightness=32.0 / 255,
+        saturation=0.5),
+    dict(
+        type='ImgAugWrapper',
+        args=[['Fliplr', 0.5],
+              dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
+    dict(type='RandomCrop', min_side_ratio=0.1),
+    dict(type='Resize', scale=(640, 640), keep_ratio=True),
+    dict(type='Pad', size=(640, 640)),
+    dict(
+        type='PackTextDetInputs',
+        meta_keys=('img_path', 'ori_shape', 'img_shape'))
+]
+
 # dataset settings
 synthtext_textdet_train = _base_.synthtext_textdet_train
-synthtext_textdet_train.pipeline = _base_.train_pipeline
-synthtext_textdet_test = _base_.synthtext_textdet_test
-synthtext_textdet_test.pipeline = _base_.test_pipeline
+synthtext_textdet_train.pipeline = train_pipeline
 
 train_dataloader = dict(
     batch_size=16,
@@ -18,13 +47,4 @@ train_dataloader = dict(
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=synthtext_textdet_train)
 
-val_dataloader = dict(
-    batch_size=1,
-    num_workers=4,
-    persistent_workers=True,
-    sampler=dict(type='DefaultSampler', shuffle=False),
-    dataset=synthtext_textdet_test)
-
-test_dataloader = val_dataloader
-
 auto_scale_lr = dict(base_batch_size=16)
diff --git a/configs/textdet/dbnetpp/README.md b/configs/textdet/dbnetpp/README.md
index e32cb172..7f9b668a 100644
--- a/configs/textdet/dbnetpp/README.md
+++ b/configs/textdet/dbnetpp/README.md
@@ -14,12 +14,18 @@ Recently, segmentation-based scene text detection methods have drawn extensive a
 
 ## Results and models
 
+### SynthText
+
+|                                       Method                                       |    BackBone    | Training set | #iters  |                                       Download                                        |
+| :--------------------------------------------------------------------------------: | :------------: | :----------: | :-----: | :-----------------------------------------------------------------------------------: |
+| [DBNetpp_r50dcn](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py) | ResNet50-dcnv2 |  SynthText   | 100,000 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext-00f0a80b.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext/20221215_013531.log) |
+
 ### ICDAR2015
 
 |             Method             |             BackBone             |             Pretrained Model             |  Training set   |    Test set    | #epochs | Test size | Precision | Recall | Hmean  |             Download             |
 | :----------------------------: | :------------------------------: | :--------------------------------------: | :-------------: | :------------: | :-----: | :-------: | :-------: | :----: | :----: | :------------------------------: |
 | [DBNetpp_r50](/configs/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015.py) |             ResNet50             |                    -                     | ICDAR2015 Train | ICDAR2015 Test |  1200   |   1024    |  0.9079   | 0.8209 | 0.8622 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015/dbnetpp_resnet50_fpnc_1200e_icdar2015_20221025_185550-013730aa.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50_fpnc_1200e_icdar2015/20221025_185550.log) |
-| [DBNetpp_r50dcn](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py) |             ResNet50             | [Synthtext](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py) ([model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/tmp_1.0_pretrain/dbnetpp_r50dcnv2_fpnc_100k_iter_synthtext-20220502-352fec8a.pth)) | ICDAR2015 Train | ICDAR2015 Test |  1200   |   1024    |  0.9116   | 0.8291 | 0.8684 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015_20220829_230108-f289bd20.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/20220829_230108.log) |
+| [DBNetpp_r50dcn](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015.py) |          ResNet50-dcnv2          | [Synthtext](/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py) ([model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/tmp_1.0_pretrain/dbnetpp_r50dcnv2_fpnc_100k_iter_synthtext-20220502-352fec8a.pth)) | ICDAR2015 Train | ICDAR2015 Test |  1200   |   1024    |  0.9116   | 0.8291 | 0.8684 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015_20220829_230108-f289bd20.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_1200e_icdar2015/20220829_230108.log) |
 | [DBNetpp_r50-oclip](/configs/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015.py) | [ResNet50-oCLIP](https://download.openmmlab.com/mmocr/backbone/resnet50-oclip-7ba0c533.pth) |                    -                     | ICDAR2015 Train | ICDAR2015 Test |  1200   |   1024    |  0.9174   | 0.8609 | 0.8882 | [model](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015_20221101_124139-4ecb39ac.pth) \| [log](https://download.openmmlab.com/mmocr/textdet/dbnetpp/dbnetpp_resnet50-oclip_fpnc_1200e_icdar2015/20221101_124139.log) |
 
 ## Citation
diff --git a/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py b/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py
index c3db2f84..7174055d 100644
--- a/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py
+++ b/configs/textdet/dbnetpp/dbnetpp_resnet50-dcnv2_fpnc_100k_synthtext.py
@@ -1,34 +1,44 @@
 _base_ = [
     '_base_dbnetpp_resnet50-dcnv2_fpnc.py',
-    '../_base_/default_runtime.py',
+    '../_base_/pretrain_runtime.py',
     '../_base_/datasets/synthtext.py',
     '../_base_/schedules/schedule_sgd_100k.py',
 ]
 
-# dataset settings
-train_list = [_base_.synthtext_textdet_train]
-test_list = [_base_.synthtext_textdet_test]
+train_pipeline = [
+    dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
+    dict(
+        type='LoadOCRAnnotations',
+        with_bbox=True,
+        with_polygon=True,
+        with_label=True,
+    ),
+    dict(type='FixInvalidPolygon'),
+    dict(
+        type='TorchVisionWrapper',
+        op='ColorJitter',
+        brightness=32.0 / 255,
+        saturation=0.5),
+    dict(
+        type='ImgAugWrapper',
+        args=[['Fliplr', 0.5],
+              dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
+    dict(type='RandomCrop', min_side_ratio=0.1),
+    dict(type='Resize', scale=(640, 640), keep_ratio=True),
+    dict(type='Pad', size=(640, 640)),
+    dict(
+        type='PackTextDetInputs',
+        meta_keys=('img_path', 'ori_shape', 'img_shape'))
+]
+
+synthtext_textdet_train = _base_.synthtext_textdet_train
+synthtext_textdet_train.pipeline = train_pipeline
 
 train_dataloader = dict(
     batch_size=16,
     num_workers=8,
     persistent_workers=True,
     sampler=dict(type='DefaultSampler', shuffle=True),
-    dataset=dict(
-        type='ConcatDataset',
-        datasets=train_list,
-        pipeline=_base_.train_pipeline))
-
-val_dataloader = dict(
-    batch_size=16,
-    num_workers=8,
-    persistent_workers=True,
-    sampler=dict(type='DefaultSampler', shuffle=False),
-    dataset=dict(
-        type='ConcatDataset',
-        datasets=test_list,
-        pipeline=_base_.test_pipeline))
-
-test_dataloader = val_dataloader
+    dataset=synthtext_textdet_train)
 
 auto_scale_lr = dict(base_batch_size=16)