[Config] Refactor & fix DB configs (#1188)

pull/1203/head
Tong Gao 2022-07-25 19:11:57 +08:00 committed by GitHub
parent 8c2873f061
commit 2b476bd8c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
15 changed files with 337 additions and 373 deletions

View File

@ -1,4 +1,3 @@
# custom_imports = dict(imports=['mmcv.transforms'], allow_failed_imports=False) # noqa
default_scope = 'mmocr'
default_hooks = dict(

View File

@ -0,0 +1,24 @@
default_scope = 'mmocr'
default_hooks = dict(
timer=dict(type='IterTimerHook'),
logger=dict(type='LoggerHook', interval=5),
param_scheduler=dict(type='ParamSchedulerHook'),
checkpoint=dict(type='CheckpointHook', interval=20),
sampler_seed=dict(type='DistSamplerSeedHook'),
)
env_cfg = dict(
cudnn_benchmark=True,
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
dist_cfg=dict(backend='nccl'),
)
log_level = 'INFO'
load_from = None
resume = False
val_evaluator = dict(type='HmeanIOUMetric')
test_evaluator = val_evaluator
visualizer = dict(type='TextDetLocalVisualizer', name='visualizer')

View File

@ -0,0 +1,67 @@
file_client_args = dict(backend='disk')
model = dict(
type='DBNet',
backbone=dict(
type='mmdet.ResNet',
depth=18,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
norm_eval=False,
style='caffe'),
neck=dict(
type='FPNC', in_channels=[64, 128, 256, 512], lateral_channels=256),
det_head=dict(
type='DBHead',
in_channels=256,
module_loss=dict(type='DBModuleLoss'),
postprocessor=dict(type='DBPostprocessor', text_repr_type='quad')),
data_preprocessor=dict(
type='TextDetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32))
train_pipeline_r18 = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True,
),
dict(
type='TorchVisionWrapper',
op='ColorJitter',
brightness=32.0 / 255,
saturation=0.5),
dict(
type='ImgAugWrapper',
args=[['Fliplr', 0.5],
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
dict(type='RandomCrop', min_side_ratio=0.1),
dict(type='Resize', scale=(640, 640), keep_ratio=True),
dict(type='Pad', size=(640, 640)),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape'))
]
test_pipeline_1333_736 = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(1333, 736), keep_ratio=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
'instances'))
]

View File

@ -0,0 +1,69 @@
file_client_args = dict(backend='disk')
model = dict(
type='DBNet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=False,
style='pytorch',
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
stage_with_dcn=(False, True, True, True)),
neck=dict(
type='FPNC', in_channels=[256, 512, 1024, 2048], lateral_channels=256),
det_head=dict(
type='DBHead',
in_channels=256,
module_loss=dict(type='DBModuleLoss'),
postprocessor=dict(type='DBPostprocessor', text_repr_type='quad')),
data_preprocessor=dict(
type='TextDetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32))
train_pipeline_r50dcnv2 = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(
type='LoadOCRAnnotations',
with_bbox=True,
with_polygon=True,
with_label=True,
),
dict(
type='TorchVisionWrapper',
op='ColorJitter',
brightness=32.0 / 255,
saturation=0.5),
dict(
type='ImgAugWrapper',
args=[['Fliplr', 0.5],
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
dict(type='RandomCrop', min_side_ratio=0.1),
dict(type='Resize', scale=(640, 640), keep_ratio=True),
dict(type='Pad', size=(640, 640)),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape'))
]
test_pipeline_4068_1024 = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(4068, 1024), keep_ratio=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
'instances'))
]

View File

@ -1,25 +0,0 @@
model = dict(
type='DBNet',
backbone=dict(
type='mmdet.ResNet',
depth=18,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
norm_eval=False,
style='caffe'),
neck=dict(
type='FPNC', in_channels=[64, 128, 256, 512], lateral_channels=256),
det_head=dict(
type='DBHead',
in_channels=256,
module_loss=dict(type='DBModuleLoss'),
postprocessor=dict(type='DBPostprocessor', text_repr_type='quad')),
data_preprocessor=dict(
type='TextDetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32))

View File

@ -1,58 +1,32 @@
_base_ = [
'dbnet_r18_fpnc.py', '../../_base_/default_runtime.py',
'../../_base_/schedules/schedule_sgd_100k_iters.py',
'_base_dbnet_r18_fpnc.py',
'../../_base_/det_datasets/synthtext.py',
'../../_base_/det_pipelines/dbnet_pipeline.py'
'../../_base_/textdet_default_runtime.py',
'../../_base_/schedules/schedule_sgd_100k_iters.py',
]
# dataset settings
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline_r18 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(
type='ImgAug',
args=[['Fliplr', 0.5],
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]],
clip_invalid_ploys=False),
dict(type='EastRandomCrop', target_size=(640, 640)),
dict(type='DBNetTargets', shrink_ratio=0.4),
dict(type='Pad', size_divisor=32),
dict(
type='CustomFormatBundle',
keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
visualize=dict(flag=False, boundary_key='gt_shrink')),
dict(
type='Collect',
keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
]
test_pipeline_1333_736 = {{_base_.test_pipeline_1333_736}}
data = dict(
samples_per_gpu=16,
workers_per_gpu=8,
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type='UniformConcatDataset',
train_dataloader = dict(
batch_size=16,
num_workers=8,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='ConcatDataset',
datasets=train_list,
pipeline=train_pipeline_r18),
val=dict(
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_1333_736),
test=dict(
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_1333_736))
pipeline=_base_.train_pipeline_r18))
evaluation = dict(interval=999999, metric='hmean-iou') # do not evaluate
val_dataloader = dict(
batch_size=16,
num_workers=8,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='ConcatDataset',
datasets=test_list,
pipeline=_base_.test_pipeline_1333_736))
test_dataloader = val_dataloader

View File

@ -1,77 +1,32 @@
_base_ = [
'dbnet_r18_fpnc.py',
'_base_dbnet_r18_fpnc.py',
'../../_base_/det_datasets/icdar2015.py',
'../../_base_/default_runtime.py',
'../../_base_/textdet_default_runtime.py',
'../../_base_/schedules/schedule_sgd_1200e.py',
]
# dataset settings
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
file_client_args = dict(backend='disk')
default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=20), )
train_pipeline_r18 = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True,
),
dict(
type='TorchVisionWrapper',
op='ColorJitter',
brightness=32.0 / 255,
saturation=0.5),
dict(
type='ImgAugWrapper',
args=[['Fliplr', 0.5],
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
dict(type='RandomCrop', min_side_ratio=0.1),
dict(type='Resize', scale=(640, 640), keep_ratio=True),
dict(type='Pad', size=(640, 640)),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape'))
]
test_pipeline_1333_736 = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(1333, 736), keep_ratio=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
'instances'))
]
train_dataloader = dict(
batch_size=16,
num_workers=8,
persistent_workers=False,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='ConcatDataset', datasets=train_list,
pipeline=train_pipeline_r18))
type='ConcatDataset',
datasets=train_list,
pipeline=_base_.train_pipeline_r18))
val_dataloader = dict(
batch_size=16,
num_workers=8,
persistent_workers=False,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='ConcatDataset',
datasets=test_list,
pipeline=test_pipeline_1333_736))
pipeline=_base_.test_pipeline_1333_736))
test_dataloader = val_dataloader
val_evaluator = dict(type='HmeanIOUMetric')
test_evaluator = val_evaluator
visualizer = dict(type='TextDetLocalVisualizer', name='visualizer')

View File

@ -1,27 +0,0 @@
model = dict(
type='DBNet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=False,
style='pytorch',
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
stage_with_dcn=(False, True, True, True)),
neck=dict(
type='FPNC', in_channels=[256, 512, 1024, 2048], lateral_channels=256),
det_head=dict(
type='DBHead',
in_channels=256,
module_loss=dict(type='DBModuleLoss'),
postprocessor=dict(type='DBPostprocessor', text_repr_type='quad')),
data_preprocessor=dict(
type='TextDetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32))

View File

@ -1,60 +1,31 @@
_base_ = [
'dbnet_r50dcnv2_fpnc.py', '../../_base_/default_runtime.py',
'../../_base_/schedules/schedule_sgd_100k_iters.py',
'_base_dbnet_r50dcnv2_fpnc.py',
'../../_base_/textdet_default_runtime.py',
'../../_base_/det_datasets/synthtext.py',
'../../_base_/det_pipelines/dbnet_pipeline.py'
'../../_base_/schedules/schedule_sgd_100k_iters.py',
]
# dataset settings
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
img_norm_cfg_r50dcnv2 = dict(
mean=[122.67891434, 116.66876762, 104.00698793],
std=[58.395, 57.12, 57.375],
to_rgb=True)
train_pipeline_r50dcnv2 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg_r50dcnv2),
dict(
type='ImgAug',
args=[['Fliplr', 0.5],
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]],
clip_invalid_ploys=False),
dict(type='EastRandomCrop', target_size=(640, 640)),
dict(type='DBNetTargets', shrink_ratio=0.4),
dict(type='Pad', size_divisor=32),
dict(
type='CustomFormatBundle',
keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
visualize=dict(flag=False, boundary_key='gt_shrink')),
dict(
type='Collect',
keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
]
test_pipeline_4068_1024 = {{_base_.test_pipeline_4068_1024}}
data = dict(
samples_per_gpu=16,
workers_per_gpu=8,
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type='UniformConcatDataset',
train_dataloader = dict(
batch_size=16,
num_workers=8,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='ConcatDataset',
datasets=train_list,
pipeline=train_pipeline_r50dcnv2),
val=dict(
type='UniformConcatDataset',
pipeline=_base_.train_pipeline_r50dcnv2))
val_dataloader = dict(
batch_size=16,
num_workers=8,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='ConcatDataset',
datasets=test_list,
pipeline=test_pipeline_4068_1024),
test=dict(
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_4068_1024))
pipeline=_base_.test_pipeline_4068_1024))
evaluation = dict(interval=999999, metric='hmean-iou') # do not evaluate
test_dataloader = val_dataloader

View File

@ -1,57 +1,15 @@
_base_ = [
'dbnet_r50dcnv2_fpnc.py',
'_base_dbnet_r50dcnv2_fpnc.py',
'../../_base_/det_datasets/icdar2015.py',
'../../_base_/default_runtime.py',
'../../_base_/textdet_default_runtime.py',
'../../_base_/schedules/schedule_sgd_1200e.py',
]
# dataset settings
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
file_client_args = dict(backend='disk')
default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=20), )
load_from = 'checkpoints/textdet/dbnet/res50dcnv2_synthtext.pth'
train_pipeline_r50dcnv2 = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(
type='LoadOCRAnnotations',
with_bbox=True,
with_polygon=True,
with_label=True,
),
dict(
type='TorchVisionWrapper',
op='ColorJitter',
brightness=32.0 / 255,
saturation=0.5),
dict(
type='ImgAugWrapper',
args=[['Fliplr', 0.5],
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
dict(type='RandomCrop', min_side_ratio=0.1),
dict(type='Resize', scale=(640, 640), keep_ratio=True),
dict(type='Pad', size=(640, 640)),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape'))
]
test_pipeline_4068_1024 = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(4068, 1024), keep_ratio=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
'instances'))
]
load_from = 'https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_r50dcnv2_fpnc_sbn_2e_synthtext_20210325-aa96e477.pth' # noqa
train_dataloader = dict(
batch_size=16,
@ -61,7 +19,8 @@ train_dataloader = dict(
dataset=dict(
type='ConcatDataset',
datasets=train_list,
pipeline=train_pipeline_r50dcnv2))
pipeline=_base_.train_pipeline_r50dcnv2))
val_dataloader = dict(
batch_size=16,
num_workers=8,
@ -70,10 +29,6 @@ val_dataloader = dict(
dataset=dict(
type='ConcatDataset',
datasets=test_list,
pipeline=test_pipeline_4068_1024))
pipeline=_base_.test_pipeline_4068_1024))
test_dataloader = val_dataloader
val_evaluator = dict(type='HmeanIOUMetric')
test_evaluator = val_evaluator
visualizer = dict(type='TextDetLocalVisualizer', name='visualizer')

View File

@ -0,0 +1,66 @@
model = dict(
type='DBNet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=False,
style='pytorch',
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
stage_with_dcn=(False, True, True, True)),
neck=dict(
type='FPNC',
in_channels=[256, 512, 1024, 2048],
lateral_channels=256,
asf_cfg=dict(attention_type='ScaleChannelSpatial')),
det_head=dict(
type='DBHead',
in_channels=256,
module_loss=dict(type='DBModuleLoss'),
postprocessor=dict(
type='DBPostprocessor', text_repr_type='quad',
epsilon_ratio=0.002)),
data_preprocessor=dict(
type='TextDetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32))
train_pipeline_r50dcnv2 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadOCRAnnotations',
with_bbox=True,
with_polygon=True,
with_label=True,
),
dict(
type='TorchVisionWrapper',
op='ColorJitter',
brightness=32.0 / 255,
saturation=0.5),
dict(
type='ImgAugWrapper',
args=[['Fliplr', 0.5],
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
dict(type='RandomCrop', min_side_ratio=0.1),
dict(type='Resize', scale=(640, 640), keep_ratio=True),
dict(type='Pad', size=(640, 640)),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape'))
]
test_pipeline_4068_1024 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(type='Resize', scale=(4068, 1024), keep_ratio=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
'instances'))
]

View File

@ -1,33 +0,0 @@
model = dict(
type='DBNet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=False,
style='pytorch',
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
stage_with_dcn=(False, True, True, True)),
neck=dict(
type='FPNC',
in_channels=[256, 512, 1024, 2048],
lateral_channels=256,
asf_cfg=dict(attention_type='ScaleChannelSpatial')),
det_head=dict(
type='DBHead',
in_channels=256,
module_loss=dict(
type='DBModuleLoss', alpha=5.0, beta=10.0, bbce_loss=True),
postprocessor=dict(
type='DBPostprocessor', text_repr_type='quad',
epsilon_ratio=0.002)),
data_preprocessor=dict(
type='TextDetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32))

View File

@ -1,61 +1,32 @@
_base_ = [
'dbnetpp_r50dcnv2_fpnc.py', '../../_base_/default_runtime.py',
'../../_base_/schedules/schedule_sgd_100k_iters.py',
'_base_dbnetpp_r50dcnv2_fpnc.py',
'../../_base_/textdet_default_runtime.py',
'../../_base_/det_datasets/synthtext.py',
'../../_base_/det_pipelines/dbnet_pipeline.py'
'../../_base_/schedules/schedule_sgd_100k_iters.py',
]
# dataset settings
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
img_norm_cfg_r50dcnv2 = dict(
mean=[122.67891434, 116.66876762, 104.00698793],
std=[58.395, 57.12, 57.375],
to_rgb=True)
train_pipeline_r50dcnv2 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg_r50dcnv2),
dict(
type='ImgAug',
args=[['Fliplr', 0.5],
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]],
clip_invalid_ploys=False),
dict(type='EastRandomCrop', target_size=(640, 640)),
dict(type='DBNetTargets', shrink_ratio=0.4),
dict(type='Pad', size_divisor=32),
dict(
type='CustomFormatBundle',
keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
visualize=dict(flag=False, boundary_key='gt_shrink')),
dict(
type='Collect',
keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
]
test_pipeline_4068_1024 = {{_base_.test_pipeline_4068_1024}}
data = dict(
samples_per_gpu=16,
workers_per_gpu=8,
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type='UniformConcatDataset',
train_dataloader = dict(
batch_size=16,
num_workers=8,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='ConcatDataset',
datasets=train_list,
pipeline=train_pipeline_r50dcnv2),
val=dict(
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_4068_1024),
test=dict(
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_4068_1024))
pipeline=_base_.train_pipeline_r50dcnv2))
evaluation = dict(interval=200000, metric='hmean-iou') # do not evaluate
val_dataloader = dict(
batch_size=16,
num_workers=8,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='ConcatDataset',
datasets=test_list,
pipeline=_base_.test_pipeline_4068_1024))
test_dataloader = val_dataloader

View File

@ -1,38 +1,32 @@
_base_ = [
'dbnetpp_r50dcnv2_fpnc.py', '../../_base_/default_runtime.py',
'../../_base_/schedules/schedule_sgd_1200e.py',
'_base_dbnetpp_r50dcnv2_fpnc.py',
'../../_base_/textdet_default_runtime.py',
'../../_base_/det_datasets/icdar2015.py',
'../../_base_/det_pipelines/dbnet_pipeline.py'
'../../_base_/schedules/schedule_sgd_1200e.py',
]
# dataset settings
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
train_pipeline_r50dcnv2 = {{_base_.train_pipeline_r50dcnv2}}
test_pipeline_4068_1024 = {{_base_.test_pipeline_4068_1024}}
load_from = 'checkpoints/textdet/dbnetpp/res50dcnv2_synthtext.pth'
data = dict(
samples_per_gpu=32,
workers_per_gpu=8,
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type='UniformConcatDataset',
train_dataloader = dict(
batch_size=16,
num_workers=8,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='ConcatDataset',
datasets=train_list,
pipeline=train_pipeline_r50dcnv2),
val=dict(
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_4068_1024),
test=dict(
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_4068_1024))
pipeline=_base_.train_pipeline_r50dcnv2))
evaluation = dict(
interval=100,
metric='hmean-iou',
save_best='0_hmean-iou:hmean',
rule='greater')
val_dataloader = dict(
batch_size=16,
num_workers=8,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='ConcatDataset',
datasets=test_list,
pipeline=_base_.test_pipeline_4068_1024))
test_dataloader = val_dataloader

View File

@ -33,6 +33,8 @@ class DBPostprocessor(BaseTextDetPostProcessor):
predicted. Defaults to 5.
unclip_ratio (float): The unclip ratio for text regions dilation.
Defaults to 1.5.
epsilon_ratio (float): The epsilon ratio for approximation accuracy.
Defaults to 0.01.
max_candidates (int): The maximum candidate number. Defaults to 3000.
"""
@ -43,6 +45,7 @@ class DBPostprocessor(BaseTextDetPostProcessor):
min_text_score: float = 0.3,
min_text_width: int = 5,
unclip_ratio: float = 1.5,
epsilon_ratio: float = 0.01,
max_candidates: int = 3000,
**kwargs) -> None:
super().__init__(
@ -53,6 +56,7 @@ class DBPostprocessor(BaseTextDetPostProcessor):
self.min_text_score = min_text_score
self.min_text_width = min_text_width
self.unclip_ratio = unclip_ratio
self.epsilon_ratio = epsilon_ratio
self.max_candidates = max_candidates
def get_text_instances(self, pred_results: Tuple[Tensor, Tensor, Tensor],
@ -88,7 +92,7 @@ class DBPostprocessor(BaseTextDetPostProcessor):
for i, poly in enumerate(contours):
if i > self.max_candidates:
break
epsilon = 0.01 * cv2.arcLength(poly, True)
epsilon = self.epsilon_ratio * cv2.arcLength(poly, True)
approx = cv2.approxPolyDP(poly, epsilon, True)
poly_pts = approx.reshape((-1, 2))
if poly_pts.shape[0] < 4: