[Refactor] Refactor and rename several textdet configs (#1294)

* update

* fix

* fix comments

* fix
pull/1303/head
Xinyu Wang 2022-08-22 14:27:56 +08:00 committed by GitHub
parent b0b6dadc00
commit 8d0c6a013a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
25 changed files with 422 additions and 888 deletions

View File

@ -0,0 +1,16 @@
_base_ = [
'_base_fcenet_resnet50_fpn.py',
]
model = dict(
backbone=dict(
norm_eval=True,
style='pytorch',
dcn=dict(type='DCNv2', deform_groups=2, fallback_on_stride=False),
stage_with_dcn=(False, True, True, True)),
det_head=dict(
module_loss=dict(
type='FCEModuleLoss',
num_sample=50,
level_proportion_range=((0, 0.25), (0.2, 0.65), (0.55, 1.0))),
postprocessor=dict(text_repr_type='poly', alpha=1.0, beta=2.0)))

View File

@ -1,17 +1,44 @@
_base_ = [
'fcenet_r50dcnv2_fpn.py',
'../../_base_/det_datasets/ctw1500.py',
'../../_base_/default_runtime.py',
'../../_base_/schedules/schedule_sgd_1500e.py',
]
# dataset settings
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
file_client_args = dict(backend='disk')
default_hooks = dict(
checkpoint=dict(type='CheckpointHook', interval=20),
logger=dict(type='LoggerHook', interval=20))
model = dict(
type='FCENet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
norm_eval=False,
style='pytorch'),
neck=dict(
type='mmdet.FPN',
in_channels=[512, 1024, 2048],
out_channels=256,
add_extra_convs='on_output',
num_outs=3,
relu_before_extra_convs=True,
act_cfg=None),
det_head=dict(
type='FCEHead',
in_channels=256,
fourier_degree=5,
module_loss=dict(type='FCEModuleLoss', num_sample=50),
postprocessor=dict(
type='FCEPostprocessor',
scales=(8, 16, 32),
text_repr_type='quad',
num_reconstr_points=50,
alpha=1.2,
beta=1.0,
score_thr=0.3)),
data_preprocessor=dict(
type='TextDetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32))
train_pipeline = [
dict(
@ -67,12 +94,13 @@ train_pipeline = [
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
]
test_pipeline = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(1080, 736), keep_ratio=True),
dict(type='Resize', scale=(2260, 2260), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(
@ -84,25 +112,3 @@ test_pipeline = [
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
]
train_dataloader = dict(
batch_size=8,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='ConcatDataset', datasets=train_list, pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='ConcatDataset', datasets=test_list, pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='HmeanIOUMetric')
test_evaluator = val_evaluator
visualizer = dict(
type='TextDetLocalVisualizer', name='visualizer', save_dir='imgs')

View File

@ -1,39 +0,0 @@
model = dict(
type='FCENet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
norm_eval=False,
style='pytorch'),
neck=dict(
type='mmdet.FPN',
in_channels=[512, 1024, 2048],
out_channels=256,
add_extra_convs='on_output',
num_outs=3,
relu_before_extra_convs=True,
act_cfg=None),
det_head=dict(
type='FCEHead',
in_channels=256,
fourier_degree=5,
module_loss=dict(type='FCEModuleLoss', num_sample=50),
postprocessor=dict(
type='FCEPostprocessor',
scales=(8, 16, 32),
text_repr_type='quad',
num_reconstr_points=50,
alpha=1.2,
beta=1.0,
score_thr=0.3)),
data_preprocessor=dict(
type='TextDetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32))

View File

@ -1,103 +0,0 @@
_base_ = [
'fcenet_r50_fpn.py',
'../../_base_/det_datasets/icdar2015.py',
'../../_base_/default_runtime.py',
'../../_base_/schedules/schedule_sgd_1500e.py',
]
# dataset settings
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
file_client_args = dict(backend='disk')
default_hooks = dict(
checkpoint=dict(type='CheckpointHook', interval=20),
logger=dict(type='LoggerHook', interval=20))
train_pipeline = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True,
),
dict(
type='RandomResize',
scale=(800, 800),
ratio_range=(0.75, 2.5),
keep_ratio=True),
dict(
type='TextDetRandomCropFlip',
crop_ratio=0.5,
iter_num=1,
min_area_ratio=0.2),
dict(
type='RandomApply',
transforms=[dict(type='RandomCrop', min_side_ratio=0.3)],
prob=0.8),
dict(
type='RandomRotate',
max_angle=30,
pad_with_fixed_color=False,
use_canvas=True),
dict(
type='RandomChoice',
transforms=[[
dict(type='Resize', scale=800, keep_ratio=True),
dict(type='SourceImagePad', target_scale=800)
],
dict(type='Resize', scale=800, keep_ratio=False)],
prob=[0.6, 0.4]),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(
type='TorchVisionWrapper',
op='ColorJitter',
brightness=32.0 / 255,
saturation=0.5,
contrast=0.5),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
]
test_pipeline = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(2260, 2260), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
]
train_dataloader = dict(
batch_size=8,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='ConcatDataset', datasets=train_list, pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='ConcatDataset', datasets=test_list, pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='HmeanIOUMetric')
test_evaluator = val_evaluator
visualizer = dict(
type='TextDetLocalVisualizer', name='visualizer', save_dir='imgs')

View File

@ -1,44 +0,0 @@
model = dict(
type='FCENet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch',
dcn=dict(type='DCNv2', deform_groups=2, fallback_on_stride=False),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
stage_with_dcn=(False, True, True, True)),
neck=dict(
type='mmdet.FPN',
in_channels=[512, 1024, 2048],
out_channels=256,
add_extra_convs='on_output',
num_outs=3,
relu_before_extra_convs=True,
act_cfg=None),
det_head=dict(
type='FCEHead',
in_channels=256,
fourier_degree=5,
module_loss=dict(
type='FCEModuleLoss',
num_sample=50,
level_proportion_range=((0, 0.25), (0.2, 0.65), (0.55, 1.0))),
postprocessor=dict(
type='FCEPostprocessor',
scales=(8, 16, 32),
text_repr_type='poly',
num_reconstr_points=50,
alpha=1.0,
beta=2.0,
score_thr=0.3)),
data_preprocessor=dict(
type='TextDetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32))

View File

@ -0,0 +1,49 @@
_base_ = [
'_base_fcenet_resnet50-dcnv2_fpn.py',
'../../_base_/det_datasets/ctw1500.py',
'../../_base_/textdet_default_runtime.py',
'../../_base_/schedules/schedule_sgd_1500e.py',
]
file_client_args = dict(backend='disk')
# dataset settings
ctw_det_train = _base_.ctw_det_train
ctw_det_test = _base_.ctw_det_test
# test pipeline for CTW1500
ctw_test_pipeline = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(1080, 736), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
]
ctw_det_train.pipeline = _base_.train_pipeline
ctw_det_test.pipeline = ctw_test_pipeline
train_dataloader = dict(
batch_size=8,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=ctw_det_train)
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=ctw_det_test)
test_dataloader = val_dataloader

View File

@ -0,0 +1,28 @@
_base_ = [
'_base_fcenet_resnet50_fpn.py',
'../../_base_/det_datasets/icdar2015.py',
'../../_base_/textdet_default_runtime.py',
'../../_base_/schedules/schedule_sgd_1500e.py',
]
# dataset settings
ic15_det_train = _base_.ic15_det_train
ic15_det_test = _base_.ic15_det_test
ic15_det_train.pipeline = _base_.train_pipeline
ic15_det_test.pipeline = _base_.test_pipeline
train_dataloader = dict(
batch_size=8,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=ic15_det_train)
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=ic15_det_test)
test_dataloader = val_dataloader

View File

@ -15,9 +15,9 @@ Collections:
README: configs/textdet/fcenet/README.md
Models:
- Name: fcenet_r50dcnv2_fpn_1500e_ctw1500
- Name: fcenet_resnet50-dcnv2_fpn_1500e_ctw1500
In Collection: FCENet
Config: configs/textdet/fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500.py
Config: configs/textdet/fcenet/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500.py
Metadata:
Training Data: CTW1500
Results:
@ -26,9 +26,9 @@ Models:
Metrics:
hmean-iou: 0.8500
Weights: https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500_20211022-e326d7ec.pth
- Name: fcenet_r50_fpn_1500e_icdar2015
- Name: fcenet_resnet50_fpn_1500e_icdar2015
In Collection: FCENet
Config: configs/textdet/fcenet/fcenet_r50_fpn_1500e_icdar2015.py
Config: configs/textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015.py
Metadata:
Training Data: ICDAR2015
Results:

View File

@ -1,4 +1,5 @@
# model settings
file_client_args = dict(backend='disk')
model = dict(
type='MMDetWrapper',
text_repr_type='poly',
@ -132,3 +133,50 @@ model = dict(
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100,
mask_thr_binary=0.5))))
train_pipeline = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True,
),
dict(
type='TorchVisionWrapper',
op='ColorJitter',
brightness=32.0 / 255,
saturation=0.5,
contrast=0.5),
dict(
type='RandomResize',
scale=(640, 640),
ratio_range=(1.0, 4.125),
keep_ratio=True),
dict(type='RandomFlip', prob=0.5),
dict(type='TextDetRandomCrop', target_size=(640, 640)),
dict(type='MMOCR2MMDet', poly2mask=True),
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'flip',
'scale_factor', 'flip_direction'))
]
test_pipeline = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(1920, 1920), keep_ratio=True),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
]

View File

@ -0,0 +1,48 @@
_base_ = [
'_base_mask-rcnn_resnet50_fpn.py',
'../../_base_/det_datasets/ctw1500.py',
'../../_base_/textdet_default_runtime.py',
'../../_base_/schedules/schedule_sgd_160e.py',
]
# dataset settings
ctw_det_train = _base_.ctw_det_train
ctw_det_test = _base_.ctw_det_test
# test pipeline for CTW1500
ctw_test_pipeline = [
dict(
type='LoadImageFromFile',
file_client_args=dict(backend='disk'),
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(1600, 1600), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
]
ctw_det_train.pipeline = _base_.train_pipeline
ctw_det_test.pipeline = ctw_test_pipeline
train_dataloader = dict(
batch_size=8,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=ctw_det_train)
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=ctw_det_test)
test_dataloader = val_dataloader

View File

@ -0,0 +1,28 @@
_base_ = [
'_base_mask-rcnn_resnet50_fpn.py',
'../../_base_/det_datasets/icdar2015.py',
'../../_base_/textdet_default_runtime.py',
'../../_base_/schedules/schedule_sgd_160e.py',
]
# dataset settings
ic15_det_train = _base_.ic15_det_train
ic15_det_test = _base_.ic15_det_test
ic15_det_train.pipeline = _base_.train_pipeline
ic15_det_test.pipeline = _base_.test_pipeline
train_dataloader = dict(
batch_size=8,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=ic15_det_train)
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=ic15_det_test)
test_dataloader = val_dataloader

View File

@ -0,0 +1,14 @@
_base_ = [
'mask-rcnn_resnet50_fpn_160e_icdar2015.py',
'../../_base_/det_datasets/icdar2017.py',
]
ic17_det_train = _base_.ic17_det_train
ic17_det_test = _base_.ic17_det_test
# use the same pipeline as icdar2015
ic17_det_train.pipeline = _base_.train_pipeline
ic17_det_test.pipeline = _base_.test_pipeline
train_dataloader = dict(dataset=ic17_det_train)
val_dataloader = dict(dataset=ic17_det_test)
test_dataloader = val_dataloader

View File

@ -1,85 +0,0 @@
_base_ = [
'ocr_mask_rcnn_r50_fpn_ohem_poly.py',
'../../_base_/det_datasets/ctw1500.py',
'../../_base_/default_runtime.py',
'../../_base_/schedules/schedule_sgd_160e.py',
]
# dataset settings
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
file_client_args = dict(backend='disk')
default_hooks = dict(
checkpoint=dict(type='CheckpointHook', interval=20),
logger=dict(type='LoggerHook', interval=20))
train_pipeline = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True,
),
dict(
type='TorchVisionWrapper',
op='ColorJitter',
brightness=32.0 / 255,
saturation=0.5,
contrast=0.5),
dict(
type='RandomResize',
scale=(640, 640),
ratio_range=(1.0, 4.125),
resize_type='Resize',
keep_ratio=True),
dict(type='RandomFlip', prob=0.5),
dict(type='TextDetRandomCrop', target_size=(640, 640)),
dict(type='MMOCR2MMDet', poly2mask=True),
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'flip',
'scale_factor', 'flip_direction'))
]
test_pipeline = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(1600, 1600), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
]
train_dataloader = dict(
batch_size=8,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='ConcatDataset', datasets=train_list, pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='ConcatDataset', datasets=test_list, pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='HmeanIOUMetric')
test_evaluator = val_evaluator
visualizer = dict(
type='TextDetLocalVisualizer', name='visualizer', save_dir='imgs')

View File

@ -1,85 +0,0 @@
_base_ = [
'ocr_mask_rcnn_r50_fpn_ohem_poly.py',
'../../_base_/det_datasets/icdar2015.py',
'../../_base_/default_runtime.py',
'../../_base_/schedules/schedule_sgd_160e.py',
]
# dataset settings
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
file_client_args = dict(backend='disk')
default_hooks = dict(
checkpoint=dict(type='CheckpointHook', interval=20),
logger=dict(type='LoggerHook', interval=20))
train_pipeline = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True,
),
dict(
type='TorchVisionWrapper',
op='ColorJitter',
brightness=32.0 / 255,
saturation=0.5,
contrast=0.5),
dict(
type='RandomResize',
scale=(640, 640),
ratio_range=(1.0, 4.125),
resize_type='mmocr.Resize',
keep_ratio=True),
dict(type='RandomFlip', prob=0.5),
dict(type='TextDetRandomCrop', target_size=(640, 640)),
dict(type='MMOCR2MMDet', poly2mask=True),
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'flip',
'scale_factor', 'flip_direction'))
]
test_pipeline = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(1920, 1920), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
]
train_dataloader = dict(
batch_size=8,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='ConcatDataset', datasets=train_list, pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='ConcatDataset', datasets=test_list, pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='HmeanIOUMetric')
test_evaluator = val_evaluator
visualizer = dict(
type='TextDetLocalVisualizer', name='visualizer', save_dir='imgs')

View File

@ -1,86 +0,0 @@
_base_ = [
'ocr_mask_rcnn_r50_fpn_ohem_poly.py',
'../../_base_/det_datasets/icdar2017.py',
'../../_base_/default_runtime.py',
'../../_base_/schedules/schedule_sgd_160e.py',
]
# dataset settings
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
file_client_args = dict(backend='disk')
default_hooks = dict(
checkpoint=dict(type='CheckpointHook', interval=20),
logger=dict(type='LoggerHook', interval=20))
train_pipeline = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True,
),
dict(
type='TorchVisionWrapper',
op='ColorJitter',
brightness=32.0 / 255,
saturation=0.5,
contrast=0.5),
dict(
type='RandomResize',
scale=(640, 640),
ratio_range=(1.0, 4.125),
keep_ratio=True),
dict(type='RandomFlip', prob=0.5),
dict(type='TextDetRandomCrop', target_size=(640, 640)),
dict(type='MMOCR2MMDet', poly2mask=True),
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'flip',
'scale_factor', 'flip_direction'))
]
test_pipeline = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(1920, 1920), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
]
train_dataloader = dict(
batch_size=8,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='ConcatDataset', datasets=train_list, pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='ConcatDataset', datasets=test_list, pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='HmeanIOUMetric')
test_evaluator = val_evaluator
visualizer = dict(
type='TextDetLocalVisualizer', name='visualizer', save_dir='imgs')

View File

@ -16,9 +16,9 @@ Collections:
README: configs/textdet/maskrcnn/README.md
Models:
- Name: mask_rcnn_r50_fpn_160e_ctw1500
- Name: mask-rcnn_resnet50_fpn_160e_ctw1500
In Collection: Mask R-CNN
Config: configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py
Config: configs/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_ctw1500.py
Metadata:
Training Data: CTW1500
Results:
@ -28,9 +28,9 @@ Models:
hmean: 0.7486
Weights: https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500_20210219-96497a76.pth
- Name: mask_rcnn_r50_fpn_160e_icdar2015
- Name: mask-rcnn_resnet50_fpn_160e_icdar2015
In Collection: Mask R-CNN
Config: configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py
Config: configs/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015.py
Metadata:
Training Data: ICDAR2015
Results:
@ -40,9 +40,9 @@ Models:
hmean: 0.8280
Weights: https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015_20210219-8eb340a3.pth
- Name: mask_rcnn_r50_fpn_160e_icdar2017
- Name: mask-rcnn_resnet50_fpn_160e_icdar2017
In Collection: Mask R-CNN
Config: configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py
Config: configs/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2017.py
Metadata:
Training Data: ICDAR2017
Results:

View File

@ -1,134 +0,0 @@
# model settings
model = dict(
type='MMDetWrapper',
text_repr_type='quad',
cfg=dict(
type='MaskRCNN',
data_preprocessor=dict(
type='DetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32),
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch',
init_cfg=dict(
type='Pretrained', checkpoint='torchvision://resnet50')),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[4],
ratios=[0.17, 0.44, 1.13, 2.90, 7.46],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(
type='RoIAlign', output_size=7, sampling_ratio=0.),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=1,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(
type='RoIAlign', output_size=14, sampling_ratio=0.),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=1,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=2000,
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False)),
test_cfg=dict(
rpn=dict(
nms_pre=1000,
max_per_img=1000,
nms=dict(type='nms', iou_threshold=0.7),
min_bbox_size=0),
rcnn=dict(
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=100,
mask_thr_binary=0.5))))

View File

@ -1,20 +1,35 @@
# TODO Train on ICDAR 2017
_base_ = [
'psenet_r50_fpnf.py',
'../../_base_/default_runtime.py',
'../../_base_/schedules/schedule_sgd_600e.py',
'../../_base_/det_datasets/icdar2017.py',
]
# dataset settings
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
file_client_args = dict(backend='disk')
default_hooks = dict(
checkpoint=dict(type='CheckpointHook', interval=100),
logger=dict(type='LoggerHook', interval=20))
model = {{_base_.model_quad}}
model = dict(
type='PSENet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='SyncBN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
norm_eval=True,
style='caffe'),
neck=dict(
type='FPNF',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
fusion_type='concat'),
det_head=dict(
type='PSEHead',
in_channels=[256],
hidden_dim=256,
out_channel=7,
module_loss=dict(type='PSEModuleLoss'),
postprocessor=dict(type='PSEPostprocessor', text_repr_type='poly')),
data_preprocessor=dict(
type='TextDetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32))
train_pipeline = [
dict(
@ -47,29 +62,12 @@ test_pipeline = [
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(2240, 2240), keep_ratio=True),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
'instances'))
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
]
train_dataloader = dict(
batch_size=16,
num_workers=8,
persistent_workers=False,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='ConcatDataset', datasets=train_list, pipeline=train_pipeline))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='ConcatDataset', datasets=test_list, pipeline=test_pipeline))
test_dataloader = val_dataloader
val_evaluator = dict(type='HmeanIOUMetric')
test_evaluator = val_evaluator
visualizer = dict(type='TextDetLocalVisualizer', name='visualizer')

View File

@ -15,9 +15,9 @@ Collections:
README: configs/textdet/psenet/README.md
Models:
- Name: psenet_r50_fpnf_600e_ctw1500
- Name: psenet_resnet50_fpnf_600e_ctw1500
In Collection: PSENet
Config: configs/textdet/psenet/psenet_r50_fpnf_600e_ctw1500.py
Config: configs/textdet/psenet/psenet_resnet50_fpnf_600e_ctw1500.py
Metadata:
Training Data: CTW1500
Results:
@ -27,9 +27,9 @@ Models:
hmean-iou: 0.784
Weights: https://download.openmmlab.com/mmocr/textdet/psenet/psenet_r50_fpnf_600e_ctw1500_20210401-216fed50.pth
- Name: psenet_r50_fpnf_600e_icdar2015
- Name: psenet_resnet50_fpnf_600e_icdar2015
In Collection: PSENet
Config: configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py
Config: configs/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015.py
Metadata:
Training Data: ICDAR2015
Results:
@ -37,11 +37,11 @@ Models:
Dataset: ICDAR2015
Metrics:
hmean-iou: 0.806
Weights: https://download.openmmlab.com/mmocr/textdet/psenet/psenet_r50_fpnf_600e_icdar2015-c6131f0d.pth
Weights: https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015-c6131f0d.pth
- Name: psenet_r50_fpnf_600e_icdar2015
- Name: psenet_resnet50_fpnf_600e_icdar2015
In Collection: PSENet
Config: configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py
Config: configs/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015.py
Metadata:
Training Data: ICDAR2017 ICDAR2015
Results:

View File

@ -1,61 +0,0 @@
model_poly = dict(
type='PSENet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='SyncBN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
norm_eval=True,
style='caffe'),
neck=dict(
type='FPNF',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
fusion_type='concat'),
det_head=dict(
type='PSEHead',
in_channels=[256],
hidden_dim=256,
out_channel=7,
module_loss=dict(type='PSEModuleLoss'),
postprocessor=dict(type='PSEPostprocessor', text_repr_type='poly')),
data_preprocessor=dict(
type='TextDetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32))
model_quad = dict(
type='PSENet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='SyncBN', requires_grad=True),
norm_eval=True,
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
style='pytorch'),
neck=dict(
type='FPNF',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
fusion_type='concat'),
det_head=dict(
type='PSEHead',
in_channels=[256],
hidden_dim=256,
out_channel=7,
module_loss=dict(type='PSEModuleLoss'),
postprocessor=dict(type='PSEPostprocessor', text_repr_type='quad')),
data_preprocessor=dict(
type='TextDetDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32))

View File

@ -1,75 +0,0 @@
_base_ = [
'psenet_r50_fpnf.py',
'../../_base_/default_runtime.py',
'../../_base_/schedules/schedule_adam_step_600e.py',
'../../_base_/det_datasets/ctw1500.py',
]
# dataset settings
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
file_client_args = dict(backend='disk')
default_hooks = dict(
checkpoint=dict(type='CheckpointHook', interval=100),
logger=dict(type='LoggerHook', interval=20))
model = {{_base_.model_poly}}
train_pipeline_ctw = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True),
dict(
type='TorchVisionWrapper',
op='ColorJitter',
brightness=32.0 / 255,
saturation=0.5),
dict(type='ShortScaleAspectJitter', short_size=736, scale_divisor=32),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='RandomRotate', max_angle=10),
dict(type='TextDetRandomCrop', target_size=(736, 736)),
dict(type='Pad', size=(736, 736)),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
]
test_pipeline_ctw = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(1280, 1280), keep_ratio=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
'instances'))
]
train_dataloader = dict(
batch_size=16,
num_workers=8,
persistent_workers=False,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='ConcatDataset', datasets=train_list,
pipeline=train_pipeline_ctw))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='ConcatDataset', datasets=test_list, pipeline=test_pipeline_ctw))
test_dataloader = val_dataloader
val_evaluator = dict(type='HmeanIOUMetric')
test_evaluator = val_evaluator
visualizer = dict(type='TextDetLocalVisualizer', name='visualizer')

View File

@ -1,84 +0,0 @@
_base_ = [
'psenet_r50_fpnf.py',
'../../_base_/det_datasets/icdar2015.py',
'../../_base_/default_runtime.py',
'../../_base_/schedules/schedule_adam_step_600e.py',
]
# dataset settings
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
file_client_args = dict(backend='disk')
default_hooks = dict(
checkpoint=dict(type='CheckpointHook', interval=100),
logger=dict(type='LoggerHook', interval=20))
model = {{_base_.model_quad}}
train_pipeline_icdar2015 = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True),
dict(
type='TorchVisionWrapper',
op='ColorJitter',
brightness=32.0 / 255,
saturation=0.5),
dict(type='ShortScaleAspectJitter', short_size=736, scale_divisor=32),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='RandomRotate', max_angle=10),
dict(type='TextDetRandomCrop', target_size=(736, 736)),
dict(type='Pad', size=(736, 736)),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
]
test_pipeline_icdar2015 = [
dict(
type='LoadImageFromFile',
file_client_args=file_client_args,
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(2240, 2240), keep_ratio=True),
# add loading annotation after ``Resize`` because ground truth
# does not need to do resize data transform
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
]
train_dataloader = dict(
batch_size=16,
num_workers=8,
persistent_workers=False,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='ConcatDataset',
datasets=train_list,
pipeline=train_pipeline_icdar2015))
val_dataloader = dict(
batch_size=1,
num_workers=4,
persistent_workers=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type='ConcatDataset',
datasets=test_list,
pipeline=test_pipeline_icdar2015))
test_dataloader = val_dataloader
val_evaluator = dict(type='HmeanIOUMetric')
test_evaluator = val_evaluator
visualizer = dict(type='TextDetLocalVisualizer', name='visualizer')

View File

@ -0,0 +1,46 @@
_base_ = [
'_base_psenet_resnet50_fpnf.py',
'../../_base_/det_datasets/ctw1500.py',
'../../_base_/textdet_default_runtime.py',
'../../_base_/schedules/schedule_adam_step_600e.py',
]
# dataset settings
ctw_det_train = _base_.ctw_det_train
ctw_det_test = _base_.ctw_det_test
test_pipeline_ctw = [
dict(
type='LoadImageFromFile',
file_client_args=_base_.file_client_args,
color_type='color_ignore_orientation'),
dict(type='Resize', scale=(1280, 1280), keep_ratio=True),
dict(
type='LoadOCRAnnotations',
with_polygon=True,
with_bbox=True,
with_label=True),
dict(
type='PackTextDetInputs',
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
]
# pipeline settings
ctw_det_train.pipeline = _base_.train_pipeline
ctw_det_test.pipeline = test_pipeline_ctw
train_dataloader = dict(
batch_size=16,
num_workers=8,
persistent_workers=False,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=ctw_det_train)
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=ctw_det_test)
test_dataloader = val_dataloader

View File

@ -0,0 +1,35 @@
_base_ = [
'_base_psenet_resnet50_fpnf.py',
'../../_base_/det_datasets/icdar2015.py',
'../../_base_/textdet_default_runtime.py',
'../../_base_/schedules/schedule_adam_step_600e.py',
]
# dataset settings
ic15_det_train = _base_.ic15_det_train
ic15_det_test = _base_.ic15_det_test
# use quadrilaterals for icdar2015
model = dict(
backbone=dict(style='pytorch'),
det_head=dict(postprocessor=dict(text_repr_type='quad')))
# pipeline settings
ic15_det_train.pipeline = _base_.train_pipeline
ic15_det_test.pipeline = _base_.test_pipeline
train_dataloader = dict(
batch_size=16,
num_workers=8,
persistent_workers=False,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=ic15_det_train)
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=ic15_det_test)
test_dataloader = val_dataloader

View File

@ -0,0 +1,14 @@
_base_ = [
'psenet_resnet50_fpnf_600e_icdar2015.py',
'../../_base_/det_datasets/icdar2017.py',
]
ic17_det_train = _base_.ic17_det_train
ic17_det_test = _base_.ic17_det_test
# use the same pipeline as icdar2015
ic17_det_train.pipeline = _base_.train_pipeline_icdar2015
ic17_det_test.pipeline = _base_.test_pipeline_icdar2015
train_dataloader = dict(dataset=ic17_det_train)
val_dataloader = dict(dataset=ic17_det_test)
test_dataloader = val_dataloader