mirror of https://github.com/open-mmlab/mmocr.git
[Config] Refactor & fix DB configs (#1188)
parent
8c2873f061
commit
2b476bd8c0
|
@ -1,4 +1,3 @@
|
|||
# custom_imports = dict(imports=['mmcv.transforms'], allow_failed_imports=False) # noqa
|
||||
default_scope = 'mmocr'
|
||||
|
||||
default_hooks = dict(
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
default_scope = 'mmocr'
|
||||
|
||||
default_hooks = dict(
|
||||
timer=dict(type='IterTimerHook'),
|
||||
logger=dict(type='LoggerHook', interval=5),
|
||||
param_scheduler=dict(type='ParamSchedulerHook'),
|
||||
checkpoint=dict(type='CheckpointHook', interval=20),
|
||||
sampler_seed=dict(type='DistSamplerSeedHook'),
|
||||
)
|
||||
|
||||
env_cfg = dict(
|
||||
cudnn_benchmark=True,
|
||||
mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0),
|
||||
dist_cfg=dict(backend='nccl'),
|
||||
)
|
||||
|
||||
log_level = 'INFO'
|
||||
load_from = None
|
||||
resume = False
|
||||
|
||||
val_evaluator = dict(type='HmeanIOUMetric')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
visualizer = dict(type='TextDetLocalVisualizer', name='visualizer')
|
|
@ -0,0 +1,67 @@
|
|||
file_client_args = dict(backend='disk')
|
||||
|
||||
model = dict(
|
||||
type='DBNet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=18,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
|
||||
norm_eval=False,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPNC', in_channels=[64, 128, 256, 512], lateral_channels=256),
|
||||
det_head=dict(
|
||||
type='DBHead',
|
||||
in_channels=256,
|
||||
module_loss=dict(type='DBModuleLoss'),
|
||||
postprocessor=dict(type='DBPostprocessor', text_repr_type='quad')),
|
||||
data_preprocessor=dict(
|
||||
type='TextDetDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_size_divisor=32))
|
||||
|
||||
train_pipeline_r18 = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5),
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
|
||||
dict(type='RandomCrop', min_side_ratio=0.1),
|
||||
dict(type='Resize', scale=(640, 640), keep_ratio=True),
|
||||
dict(type='Pad', size=(640, 640)),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape'))
|
||||
]
|
||||
|
||||
test_pipeline_1333_736 = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1333, 736), keep_ratio=True),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
|
||||
'instances'))
|
||||
]
|
|
@ -0,0 +1,69 @@
|
|||
file_client_args = dict(backend='disk')
|
||||
|
||||
model = dict(
|
||||
type='DBNet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
stage_with_dcn=(False, True, True, True)),
|
||||
neck=dict(
|
||||
type='FPNC', in_channels=[256, 512, 1024, 2048], lateral_channels=256),
|
||||
det_head=dict(
|
||||
type='DBHead',
|
||||
in_channels=256,
|
||||
module_loss=dict(type='DBModuleLoss'),
|
||||
postprocessor=dict(type='DBPostprocessor', text_repr_type='quad')),
|
||||
data_preprocessor=dict(
|
||||
type='TextDetDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_size_divisor=32))
|
||||
|
||||
train_pipeline_r50dcnv2 = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_bbox=True,
|
||||
with_polygon=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5),
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
|
||||
dict(type='RandomCrop', min_side_ratio=0.1),
|
||||
dict(type='Resize', scale=(640, 640), keep_ratio=True),
|
||||
dict(type='Pad', size=(640, 640)),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape'))
|
||||
]
|
||||
|
||||
test_pipeline_4068_1024 = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(4068, 1024), keep_ratio=True),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
|
||||
'instances'))
|
||||
]
|
|
@ -1,25 +0,0 @@
|
|||
model = dict(
|
||||
type='DBNet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=18,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
|
||||
norm_eval=False,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPNC', in_channels=[64, 128, 256, 512], lateral_channels=256),
|
||||
det_head=dict(
|
||||
type='DBHead',
|
||||
in_channels=256,
|
||||
module_loss=dict(type='DBModuleLoss'),
|
||||
postprocessor=dict(type='DBPostprocessor', text_repr_type='quad')),
|
||||
data_preprocessor=dict(
|
||||
type='TextDetDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_size_divisor=32))
|
|
@ -1,58 +1,32 @@
|
|||
_base_ = [
|
||||
'dbnet_r18_fpnc.py', '../../_base_/default_runtime.py',
|
||||
'../../_base_/schedules/schedule_sgd_100k_iters.py',
|
||||
'_base_dbnet_r18_fpnc.py',
|
||||
'../../_base_/det_datasets/synthtext.py',
|
||||
'../../_base_/det_pipelines/dbnet_pipeline.py'
|
||||
'../../_base_/textdet_default_runtime.py',
|
||||
'../../_base_/schedules/schedule_sgd_100k_iters.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
train_pipeline_r18 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='ImgAug',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]],
|
||||
clip_invalid_ploys=False),
|
||||
dict(type='EastRandomCrop', target_size=(640, 640)),
|
||||
dict(type='DBNetTargets', shrink_ratio=0.4),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_shrink')),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
|
||||
]
|
||||
test_pipeline_1333_736 = {{_base_.test_pipeline_1333_736}}
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=16,
|
||||
workers_per_gpu=8,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=dict(
|
||||
type='UniformConcatDataset',
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='ConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline_r18),
|
||||
val=dict(
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_1333_736),
|
||||
test=dict(
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_1333_736))
|
||||
pipeline=_base_.train_pipeline_r18))
|
||||
|
||||
evaluation = dict(interval=999999, metric='hmean-iou') # do not evaluate
|
||||
val_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type='ConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=_base_.test_pipeline_1333_736))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
|
|
@ -1,77 +1,32 @@
|
|||
_base_ = [
|
||||
'dbnet_r18_fpnc.py',
|
||||
'_base_dbnet_r18_fpnc.py',
|
||||
'../../_base_/det_datasets/icdar2015.py',
|
||||
'../../_base_/default_runtime.py',
|
||||
'../../_base_/textdet_default_runtime.py',
|
||||
'../../_base_/schedules/schedule_sgd_1200e.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
file_client_args = dict(backend='disk')
|
||||
default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=20), )
|
||||
|
||||
train_pipeline_r18 = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5),
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
|
||||
dict(type='RandomCrop', min_side_ratio=0.1),
|
||||
dict(type='Resize', scale=(640, 640), keep_ratio=True),
|
||||
dict(type='Pad', size=(640, 640)),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape'))
|
||||
]
|
||||
|
||||
test_pipeline_1333_736 = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1333, 736), keep_ratio=True),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
|
||||
'instances'))
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=False,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='ConcatDataset', datasets=train_list,
|
||||
pipeline=train_pipeline_r18))
|
||||
type='ConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=_base_.train_pipeline_r18))
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=False,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type='ConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_1333_736))
|
||||
pipeline=_base_.test_pipeline_1333_736))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='HmeanIOUMetric')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
visualizer = dict(type='TextDetLocalVisualizer', name='visualizer')
|
||||
|
|
|
@ -1,27 +0,0 @@
|
|||
model = dict(
|
||||
type='DBNet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
stage_with_dcn=(False, True, True, True)),
|
||||
neck=dict(
|
||||
type='FPNC', in_channels=[256, 512, 1024, 2048], lateral_channels=256),
|
||||
det_head=dict(
|
||||
type='DBHead',
|
||||
in_channels=256,
|
||||
module_loss=dict(type='DBModuleLoss'),
|
||||
postprocessor=dict(type='DBPostprocessor', text_repr_type='quad')),
|
||||
data_preprocessor=dict(
|
||||
type='TextDetDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_size_divisor=32))
|
|
@ -1,60 +1,31 @@
|
|||
_base_ = [
|
||||
'dbnet_r50dcnv2_fpnc.py', '../../_base_/default_runtime.py',
|
||||
'../../_base_/schedules/schedule_sgd_100k_iters.py',
|
||||
'_base_dbnet_r50dcnv2_fpnc.py',
|
||||
'../../_base_/textdet_default_runtime.py',
|
||||
'../../_base_/det_datasets/synthtext.py',
|
||||
'../../_base_/det_pipelines/dbnet_pipeline.py'
|
||||
'../../_base_/schedules/schedule_sgd_100k_iters.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
|
||||
img_norm_cfg_r50dcnv2 = dict(
|
||||
mean=[122.67891434, 116.66876762, 104.00698793],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
to_rgb=True)
|
||||
train_pipeline_r50dcnv2 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg_r50dcnv2),
|
||||
dict(
|
||||
type='ImgAug',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]],
|
||||
clip_invalid_ploys=False),
|
||||
dict(type='EastRandomCrop', target_size=(640, 640)),
|
||||
dict(type='DBNetTargets', shrink_ratio=0.4),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_shrink')),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
|
||||
]
|
||||
test_pipeline_4068_1024 = {{_base_.test_pipeline_4068_1024}}
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=16,
|
||||
workers_per_gpu=8,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=dict(
|
||||
type='UniformConcatDataset',
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='ConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline_r50dcnv2),
|
||||
val=dict(
|
||||
type='UniformConcatDataset',
|
||||
pipeline=_base_.train_pipeline_r50dcnv2))
|
||||
val_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type='ConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_4068_1024),
|
||||
test=dict(
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_4068_1024))
|
||||
pipeline=_base_.test_pipeline_4068_1024))
|
||||
|
||||
evaluation = dict(interval=999999, metric='hmean-iou') # do not evaluate
|
||||
test_dataloader = val_dataloader
|
||||
|
|
|
@ -1,57 +1,15 @@
|
|||
_base_ = [
|
||||
'dbnet_r50dcnv2_fpnc.py',
|
||||
'_base_dbnet_r50dcnv2_fpnc.py',
|
||||
'../../_base_/det_datasets/icdar2015.py',
|
||||
'../../_base_/default_runtime.py',
|
||||
'../../_base_/textdet_default_runtime.py',
|
||||
'../../_base_/schedules/schedule_sgd_1200e.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
file_client_args = dict(backend='disk')
|
||||
default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=20), )
|
||||
|
||||
load_from = 'checkpoints/textdet/dbnet/res50dcnv2_synthtext.pth'
|
||||
|
||||
train_pipeline_r50dcnv2 = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_bbox=True,
|
||||
with_polygon=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5),
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
|
||||
dict(type='RandomCrop', min_side_ratio=0.1),
|
||||
dict(type='Resize', scale=(640, 640), keep_ratio=True),
|
||||
dict(type='Pad', size=(640, 640)),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape'))
|
||||
]
|
||||
|
||||
test_pipeline_4068_1024 = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(4068, 1024), keep_ratio=True),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
|
||||
'instances'))
|
||||
]
|
||||
load_from = 'https://download.openmmlab.com/mmocr/textdet/dbnet/dbnet_r50dcnv2_fpnc_sbn_2e_synthtext_20210325-aa96e477.pth' # noqa
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
|
@ -61,7 +19,8 @@ train_dataloader = dict(
|
|||
dataset=dict(
|
||||
type='ConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline_r50dcnv2))
|
||||
pipeline=_base_.train_pipeline_r50dcnv2))
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
|
@ -70,10 +29,6 @@ val_dataloader = dict(
|
|||
dataset=dict(
|
||||
type='ConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_4068_1024))
|
||||
pipeline=_base_.test_pipeline_4068_1024))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='HmeanIOUMetric')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
visualizer = dict(type='TextDetLocalVisualizer', name='visualizer')
|
||||
|
|
|
@ -0,0 +1,66 @@
|
|||
model = dict(
|
||||
type='DBNet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
stage_with_dcn=(False, True, True, True)),
|
||||
neck=dict(
|
||||
type='FPNC',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
lateral_channels=256,
|
||||
asf_cfg=dict(attention_type='ScaleChannelSpatial')),
|
||||
det_head=dict(
|
||||
type='DBHead',
|
||||
in_channels=256,
|
||||
module_loss=dict(type='DBModuleLoss'),
|
||||
postprocessor=dict(
|
||||
type='DBPostprocessor', text_repr_type='quad',
|
||||
epsilon_ratio=0.002)),
|
||||
data_preprocessor=dict(
|
||||
type='TextDetDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_size_divisor=32))
|
||||
|
||||
train_pipeline_r50dcnv2 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_bbox=True,
|
||||
with_polygon=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5),
|
||||
dict(
|
||||
type='ImgAugWrapper',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
|
||||
dict(type='RandomCrop', min_side_ratio=0.1),
|
||||
dict(type='Resize', scale=(640, 640), keep_ratio=True),
|
||||
dict(type='Pad', size=(640, 640)),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape'))
|
||||
]
|
||||
|
||||
test_pipeline_4068_1024 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(4068, 1024), keep_ratio=True),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
|
||||
'instances'))
|
||||
]
|
|
@ -1,33 +0,0 @@
|
|||
model = dict(
|
||||
type='DBNet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
stage_with_dcn=(False, True, True, True)),
|
||||
neck=dict(
|
||||
type='FPNC',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
lateral_channels=256,
|
||||
asf_cfg=dict(attention_type='ScaleChannelSpatial')),
|
||||
det_head=dict(
|
||||
type='DBHead',
|
||||
in_channels=256,
|
||||
module_loss=dict(
|
||||
type='DBModuleLoss', alpha=5.0, beta=10.0, bbce_loss=True),
|
||||
postprocessor=dict(
|
||||
type='DBPostprocessor', text_repr_type='quad',
|
||||
epsilon_ratio=0.002)),
|
||||
data_preprocessor=dict(
|
||||
type='TextDetDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_size_divisor=32))
|
|
@ -1,61 +1,32 @@
|
|||
_base_ = [
|
||||
'dbnetpp_r50dcnv2_fpnc.py', '../../_base_/default_runtime.py',
|
||||
'../../_base_/schedules/schedule_sgd_100k_iters.py',
|
||||
'_base_dbnetpp_r50dcnv2_fpnc.py',
|
||||
'../../_base_/textdet_default_runtime.py',
|
||||
'../../_base_/det_datasets/synthtext.py',
|
||||
'../../_base_/det_pipelines/dbnet_pipeline.py'
|
||||
'../../_base_/schedules/schedule_sgd_100k_iters.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
|
||||
img_norm_cfg_r50dcnv2 = dict(
|
||||
mean=[122.67891434, 116.66876762, 104.00698793],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
to_rgb=True)
|
||||
train_pipeline_r50dcnv2 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg_r50dcnv2),
|
||||
dict(
|
||||
type='ImgAug',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]],
|
||||
clip_invalid_ploys=False),
|
||||
dict(type='EastRandomCrop', target_size=(640, 640)),
|
||||
dict(type='DBNetTargets', shrink_ratio=0.4),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_shrink')),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
|
||||
]
|
||||
|
||||
test_pipeline_4068_1024 = {{_base_.test_pipeline_4068_1024}}
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=16,
|
||||
workers_per_gpu=8,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=dict(
|
||||
type='UniformConcatDataset',
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='ConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline_r50dcnv2),
|
||||
val=dict(
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_4068_1024),
|
||||
test=dict(
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_4068_1024))
|
||||
pipeline=_base_.train_pipeline_r50dcnv2))
|
||||
|
||||
evaluation = dict(interval=200000, metric='hmean-iou') # do not evaluate
|
||||
val_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type='ConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=_base_.test_pipeline_4068_1024))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
|
|
@ -1,38 +1,32 @@
|
|||
_base_ = [
|
||||
'dbnetpp_r50dcnv2_fpnc.py', '../../_base_/default_runtime.py',
|
||||
'../../_base_/schedules/schedule_sgd_1200e.py',
|
||||
'_base_dbnetpp_r50dcnv2_fpnc.py',
|
||||
'../../_base_/textdet_default_runtime.py',
|
||||
'../../_base_/det_datasets/icdar2015.py',
|
||||
'../../_base_/det_pipelines/dbnet_pipeline.py'
|
||||
'../../_base_/schedules/schedule_sgd_1200e.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
|
||||
train_pipeline_r50dcnv2 = {{_base_.train_pipeline_r50dcnv2}}
|
||||
test_pipeline_4068_1024 = {{_base_.test_pipeline_4068_1024}}
|
||||
|
||||
load_from = 'checkpoints/textdet/dbnetpp/res50dcnv2_synthtext.pth'
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=32,
|
||||
workers_per_gpu=8,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=dict(
|
||||
type='UniformConcatDataset',
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='ConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline_r50dcnv2),
|
||||
val=dict(
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_4068_1024),
|
||||
test=dict(
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_4068_1024))
|
||||
pipeline=_base_.train_pipeline_r50dcnv2))
|
||||
|
||||
evaluation = dict(
|
||||
interval=100,
|
||||
metric='hmean-iou',
|
||||
save_best='0_hmean-iou:hmean',
|
||||
rule='greater')
|
||||
val_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type='ConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=_base_.test_pipeline_4068_1024))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
|
|
@ -33,6 +33,8 @@ class DBPostprocessor(BaseTextDetPostProcessor):
|
|||
predicted. Defaults to 5.
|
||||
unclip_ratio (float): The unclip ratio for text regions dilation.
|
||||
Defaults to 1.5.
|
||||
epsilon_ratio (float): The epsilon ratio for approximation accuracy.
|
||||
Defaults to 0.01.
|
||||
max_candidates (int): The maximum candidate number. Defaults to 3000.
|
||||
"""
|
||||
|
||||
|
@ -43,6 +45,7 @@ class DBPostprocessor(BaseTextDetPostProcessor):
|
|||
min_text_score: float = 0.3,
|
||||
min_text_width: int = 5,
|
||||
unclip_ratio: float = 1.5,
|
||||
epsilon_ratio: float = 0.01,
|
||||
max_candidates: int = 3000,
|
||||
**kwargs) -> None:
|
||||
super().__init__(
|
||||
|
@ -53,6 +56,7 @@ class DBPostprocessor(BaseTextDetPostProcessor):
|
|||
self.min_text_score = min_text_score
|
||||
self.min_text_width = min_text_width
|
||||
self.unclip_ratio = unclip_ratio
|
||||
self.epsilon_ratio = epsilon_ratio
|
||||
self.max_candidates = max_candidates
|
||||
|
||||
def get_text_instances(self, pred_results: Tuple[Tensor, Tensor, Tensor],
|
||||
|
@ -88,7 +92,7 @@ class DBPostprocessor(BaseTextDetPostProcessor):
|
|||
for i, poly in enumerate(contours):
|
||||
if i > self.max_candidates:
|
||||
break
|
||||
epsilon = 0.01 * cv2.arcLength(poly, True)
|
||||
epsilon = self.epsilon_ratio * cv2.arcLength(poly, True)
|
||||
approx = cv2.approxPolyDP(poly, epsilon, True)
|
||||
poly_pts = approx.reshape((-1, 2))
|
||||
if poly_pts.shape[0] < 4:
|
||||
|
|
Loading…
Reference in New Issue