mirror of https://github.com/open-mmlab/mmocr.git
[Refactor] Refactor and rename several textdet configs (#1294)
* update * fix * fix comments * fixpull/1303/head
parent
b0b6dadc00
commit
8d0c6a013a
|
@ -0,0 +1,16 @@
|
|||
_base_ = [
|
||||
'_base_fcenet_resnet50_fpn.py',
|
||||
]
|
||||
|
||||
model = dict(
|
||||
backbone=dict(
|
||||
norm_eval=True,
|
||||
style='pytorch',
|
||||
dcn=dict(type='DCNv2', deform_groups=2, fallback_on_stride=False),
|
||||
stage_with_dcn=(False, True, True, True)),
|
||||
det_head=dict(
|
||||
module_loss=dict(
|
||||
type='FCEModuleLoss',
|
||||
num_sample=50,
|
||||
level_proportion_range=((0, 0.25), (0.2, 0.65), (0.55, 1.0))),
|
||||
postprocessor=dict(text_repr_type='poly', alpha=1.0, beta=2.0)))
|
|
@ -1,17 +1,44 @@
|
|||
_base_ = [
|
||||
'fcenet_r50dcnv2_fpn.py',
|
||||
'../../_base_/det_datasets/ctw1500.py',
|
||||
'../../_base_/default_runtime.py',
|
||||
'../../_base_/schedules/schedule_sgd_1500e.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
file_client_args = dict(backend='disk')
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(type='CheckpointHook', interval=20),
|
||||
logger=dict(type='LoggerHook', interval=20))
|
||||
|
||||
model = dict(
|
||||
type='FCENet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=False,
|
||||
style='pytorch'),
|
||||
neck=dict(
|
||||
type='mmdet.FPN',
|
||||
in_channels=[512, 1024, 2048],
|
||||
out_channels=256,
|
||||
add_extra_convs='on_output',
|
||||
num_outs=3,
|
||||
relu_before_extra_convs=True,
|
||||
act_cfg=None),
|
||||
det_head=dict(
|
||||
type='FCEHead',
|
||||
in_channels=256,
|
||||
fourier_degree=5,
|
||||
module_loss=dict(type='FCEModuleLoss', num_sample=50),
|
||||
postprocessor=dict(
|
||||
type='FCEPostprocessor',
|
||||
scales=(8, 16, 32),
|
||||
text_repr_type='quad',
|
||||
num_reconstr_points=50,
|
||||
alpha=1.2,
|
||||
beta=1.0,
|
||||
score_thr=0.3)),
|
||||
data_preprocessor=dict(
|
||||
type='TextDetDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_size_divisor=32))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
|
@ -67,12 +94,13 @@ train_pipeline = [
|
|||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1080, 736), keep_ratio=True),
|
||||
dict(type='Resize', scale=(2260, 2260), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(
|
||||
|
@ -84,25 +112,3 @@ test_pipeline = [
|
|||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=8,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='ConcatDataset', datasets=train_list, pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type='ConcatDataset', datasets=test_list, pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='HmeanIOUMetric')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
visualizer = dict(
|
||||
type='TextDetLocalVisualizer', name='visualizer', save_dir='imgs')
|
|
@ -1,39 +0,0 @@
|
|||
model = dict(
|
||||
type='FCENet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=False,
|
||||
style='pytorch'),
|
||||
neck=dict(
|
||||
type='mmdet.FPN',
|
||||
in_channels=[512, 1024, 2048],
|
||||
out_channels=256,
|
||||
add_extra_convs='on_output',
|
||||
num_outs=3,
|
||||
relu_before_extra_convs=True,
|
||||
act_cfg=None),
|
||||
det_head=dict(
|
||||
type='FCEHead',
|
||||
in_channels=256,
|
||||
fourier_degree=5,
|
||||
module_loss=dict(type='FCEModuleLoss', num_sample=50),
|
||||
postprocessor=dict(
|
||||
type='FCEPostprocessor',
|
||||
scales=(8, 16, 32),
|
||||
text_repr_type='quad',
|
||||
num_reconstr_points=50,
|
||||
alpha=1.2,
|
||||
beta=1.0,
|
||||
score_thr=0.3)),
|
||||
data_preprocessor=dict(
|
||||
type='TextDetDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_size_divisor=32))
|
|
@ -1,103 +0,0 @@
|
|||
_base_ = [
|
||||
'fcenet_r50_fpn.py',
|
||||
'../../_base_/det_datasets/icdar2015.py',
|
||||
'../../_base_/default_runtime.py',
|
||||
'../../_base_/schedules/schedule_sgd_1500e.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
file_client_args = dict(backend='disk')
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(type='CheckpointHook', interval=20),
|
||||
logger=dict(type='LoggerHook', interval=20))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(800, 800),
|
||||
ratio_range=(0.75, 2.5),
|
||||
keep_ratio=True),
|
||||
dict(
|
||||
type='TextDetRandomCropFlip',
|
||||
crop_ratio=0.5,
|
||||
iter_num=1,
|
||||
min_area_ratio=0.2),
|
||||
dict(
|
||||
type='RandomApply',
|
||||
transforms=[dict(type='RandomCrop', min_side_ratio=0.3)],
|
||||
prob=0.8),
|
||||
dict(
|
||||
type='RandomRotate',
|
||||
max_angle=30,
|
||||
pad_with_fixed_color=False,
|
||||
use_canvas=True),
|
||||
dict(
|
||||
type='RandomChoice',
|
||||
transforms=[[
|
||||
dict(type='Resize', scale=800, keep_ratio=True),
|
||||
dict(type='SourceImagePad', target_scale=800)
|
||||
],
|
||||
dict(type='Resize', scale=800, keep_ratio=False)],
|
||||
prob=[0.6, 0.4]),
|
||||
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5,
|
||||
contrast=0.5),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(2260, 2260), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=8,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='ConcatDataset', datasets=train_list, pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type='ConcatDataset', datasets=test_list, pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='HmeanIOUMetric')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
visualizer = dict(
|
||||
type='TextDetLocalVisualizer', name='visualizer', save_dir='imgs')
|
|
@ -1,44 +0,0 @@
|
|||
model = dict(
|
||||
type='FCENet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=True,
|
||||
style='pytorch',
|
||||
dcn=dict(type='DCNv2', deform_groups=2, fallback_on_stride=False),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
stage_with_dcn=(False, True, True, True)),
|
||||
neck=dict(
|
||||
type='mmdet.FPN',
|
||||
in_channels=[512, 1024, 2048],
|
||||
out_channels=256,
|
||||
add_extra_convs='on_output',
|
||||
num_outs=3,
|
||||
relu_before_extra_convs=True,
|
||||
act_cfg=None),
|
||||
det_head=dict(
|
||||
type='FCEHead',
|
||||
in_channels=256,
|
||||
fourier_degree=5,
|
||||
module_loss=dict(
|
||||
type='FCEModuleLoss',
|
||||
num_sample=50,
|
||||
level_proportion_range=((0, 0.25), (0.2, 0.65), (0.55, 1.0))),
|
||||
postprocessor=dict(
|
||||
type='FCEPostprocessor',
|
||||
scales=(8, 16, 32),
|
||||
text_repr_type='poly',
|
||||
num_reconstr_points=50,
|
||||
alpha=1.0,
|
||||
beta=2.0,
|
||||
score_thr=0.3)),
|
||||
data_preprocessor=dict(
|
||||
type='TextDetDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_size_divisor=32))
|
|
@ -0,0 +1,49 @@
|
|||
_base_ = [
|
||||
'_base_fcenet_resnet50-dcnv2_fpn.py',
|
||||
'../../_base_/det_datasets/ctw1500.py',
|
||||
'../../_base_/textdet_default_runtime.py',
|
||||
'../../_base_/schedules/schedule_sgd_1500e.py',
|
||||
]
|
||||
|
||||
file_client_args = dict(backend='disk')
|
||||
# dataset settings
|
||||
ctw_det_train = _base_.ctw_det_train
|
||||
ctw_det_test = _base_.ctw_det_test
|
||||
|
||||
# test pipeline for CTW1500
|
||||
ctw_test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1080, 736), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
ctw_det_train.pipeline = _base_.train_pipeline
|
||||
ctw_det_test.pipeline = ctw_test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=8,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=ctw_det_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=ctw_det_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
|
@ -0,0 +1,28 @@
|
|||
_base_ = [
|
||||
'_base_fcenet_resnet50_fpn.py',
|
||||
'../../_base_/det_datasets/icdar2015.py',
|
||||
'../../_base_/textdet_default_runtime.py',
|
||||
'../../_base_/schedules/schedule_sgd_1500e.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
ic15_det_train = _base_.ic15_det_train
|
||||
ic15_det_test = _base_.ic15_det_test
|
||||
ic15_det_train.pipeline = _base_.train_pipeline
|
||||
ic15_det_test.pipeline = _base_.test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=8,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=ic15_det_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=ic15_det_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
|
@ -15,9 +15,9 @@ Collections:
|
|||
README: configs/textdet/fcenet/README.md
|
||||
|
||||
Models:
|
||||
- Name: fcenet_r50dcnv2_fpn_1500e_ctw1500
|
||||
- Name: fcenet_resnet50-dcnv2_fpn_1500e_ctw1500
|
||||
In Collection: FCENet
|
||||
Config: configs/textdet/fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500.py
|
||||
Config: configs/textdet/fcenet/fcenet_resnet50-dcnv2_fpn_1500e_ctw1500.py
|
||||
Metadata:
|
||||
Training Data: CTW1500
|
||||
Results:
|
||||
|
@ -26,9 +26,9 @@ Models:
|
|||
Metrics:
|
||||
hmean-iou: 0.8500
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500_20211022-e326d7ec.pth
|
||||
- Name: fcenet_r50_fpn_1500e_icdar2015
|
||||
- Name: fcenet_resnet50_fpn_1500e_icdar2015
|
||||
In Collection: FCENet
|
||||
Config: configs/textdet/fcenet/fcenet_r50_fpn_1500e_icdar2015.py
|
||||
Config: configs/textdet/fcenet/fcenet_resnet50_fpn_1500e_icdar2015.py
|
||||
Metadata:
|
||||
Training Data: ICDAR2015
|
||||
Results:
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
# model settings
|
||||
file_client_args = dict(backend='disk')
|
||||
|
||||
model = dict(
|
||||
type='MMDetWrapper',
|
||||
text_repr_type='poly',
|
||||
|
@ -132,3 +133,50 @@ model = dict(
|
|||
nms=dict(type='nms', iou_threshold=0.5),
|
||||
max_per_img=100,
|
||||
mask_thr_binary=0.5))))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5,
|
||||
contrast=0.5),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(640, 640),
|
||||
ratio_range=(1.0, 4.125),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='TextDetRandomCrop', target_size=(640, 640)),
|
||||
dict(type='MMOCR2MMDet', poly2mask=True),
|
||||
dict(
|
||||
type='mmdet.PackDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'flip',
|
||||
'scale_factor', 'flip_direction'))
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1920, 1920), keep_ratio=True),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
|
@ -0,0 +1,48 @@
|
|||
_base_ = [
|
||||
'_base_mask-rcnn_resnet50_fpn.py',
|
||||
'../../_base_/det_datasets/ctw1500.py',
|
||||
'../../_base_/textdet_default_runtime.py',
|
||||
'../../_base_/schedules/schedule_sgd_160e.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
ctw_det_train = _base_.ctw_det_train
|
||||
ctw_det_test = _base_.ctw_det_test
|
||||
|
||||
# test pipeline for CTW1500
|
||||
ctw_test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=dict(backend='disk'),
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1600, 1600), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
ctw_det_train.pipeline = _base_.train_pipeline
|
||||
ctw_det_test.pipeline = ctw_test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=8,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=ctw_det_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=ctw_det_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
|
@ -0,0 +1,28 @@
|
|||
_base_ = [
|
||||
'_base_mask-rcnn_resnet50_fpn.py',
|
||||
'../../_base_/det_datasets/icdar2015.py',
|
||||
'../../_base_/textdet_default_runtime.py',
|
||||
'../../_base_/schedules/schedule_sgd_160e.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
ic15_det_train = _base_.ic15_det_train
|
||||
ic15_det_test = _base_.ic15_det_test
|
||||
ic15_det_train.pipeline = _base_.train_pipeline
|
||||
ic15_det_test.pipeline = _base_.test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=8,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=ic15_det_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=ic15_det_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
|
@ -0,0 +1,14 @@
|
|||
_base_ = [
|
||||
'mask-rcnn_resnet50_fpn_160e_icdar2015.py',
|
||||
'../../_base_/det_datasets/icdar2017.py',
|
||||
]
|
||||
|
||||
ic17_det_train = _base_.ic17_det_train
|
||||
ic17_det_test = _base_.ic17_det_test
|
||||
# use the same pipeline as icdar2015
|
||||
ic17_det_train.pipeline = _base_.train_pipeline
|
||||
ic17_det_test.pipeline = _base_.test_pipeline
|
||||
|
||||
train_dataloader = dict(dataset=ic17_det_train)
|
||||
val_dataloader = dict(dataset=ic17_det_test)
|
||||
test_dataloader = val_dataloader
|
|
@ -1,85 +0,0 @@
|
|||
_base_ = [
|
||||
'ocr_mask_rcnn_r50_fpn_ohem_poly.py',
|
||||
'../../_base_/det_datasets/ctw1500.py',
|
||||
'../../_base_/default_runtime.py',
|
||||
'../../_base_/schedules/schedule_sgd_160e.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
file_client_args = dict(backend='disk')
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(type='CheckpointHook', interval=20),
|
||||
logger=dict(type='LoggerHook', interval=20))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5,
|
||||
contrast=0.5),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(640, 640),
|
||||
ratio_range=(1.0, 4.125),
|
||||
resize_type='Resize',
|
||||
keep_ratio=True),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='TextDetRandomCrop', target_size=(640, 640)),
|
||||
dict(type='MMOCR2MMDet', poly2mask=True),
|
||||
dict(
|
||||
type='mmdet.PackDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'flip',
|
||||
'scale_factor', 'flip_direction'))
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1600, 1600), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=8,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='ConcatDataset', datasets=train_list, pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type='ConcatDataset', datasets=test_list, pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='HmeanIOUMetric')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
visualizer = dict(
|
||||
type='TextDetLocalVisualizer', name='visualizer', save_dir='imgs')
|
|
@ -1,85 +0,0 @@
|
|||
_base_ = [
|
||||
'ocr_mask_rcnn_r50_fpn_ohem_poly.py',
|
||||
'../../_base_/det_datasets/icdar2015.py',
|
||||
'../../_base_/default_runtime.py',
|
||||
'../../_base_/schedules/schedule_sgd_160e.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
file_client_args = dict(backend='disk')
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(type='CheckpointHook', interval=20),
|
||||
logger=dict(type='LoggerHook', interval=20))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5,
|
||||
contrast=0.5),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(640, 640),
|
||||
ratio_range=(1.0, 4.125),
|
||||
resize_type='mmocr.Resize',
|
||||
keep_ratio=True),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='TextDetRandomCrop', target_size=(640, 640)),
|
||||
dict(type='MMOCR2MMDet', poly2mask=True),
|
||||
dict(
|
||||
type='mmdet.PackDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'flip',
|
||||
'scale_factor', 'flip_direction'))
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1920, 1920), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=8,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='ConcatDataset', datasets=train_list, pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type='ConcatDataset', datasets=test_list, pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='HmeanIOUMetric')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
visualizer = dict(
|
||||
type='TextDetLocalVisualizer', name='visualizer', save_dir='imgs')
|
|
@ -1,86 +0,0 @@
|
|||
_base_ = [
|
||||
'ocr_mask_rcnn_r50_fpn_ohem_poly.py',
|
||||
'../../_base_/det_datasets/icdar2017.py',
|
||||
'../../_base_/default_runtime.py',
|
||||
'../../_base_/schedules/schedule_sgd_160e.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
file_client_args = dict(backend='disk')
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(type='CheckpointHook', interval=20),
|
||||
logger=dict(type='LoggerHook', interval=20))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True,
|
||||
),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5,
|
||||
contrast=0.5),
|
||||
dict(
|
||||
type='RandomResize',
|
||||
scale=(640, 640),
|
||||
ratio_range=(1.0, 4.125),
|
||||
keep_ratio=True),
|
||||
dict(type='RandomFlip', prob=0.5),
|
||||
dict(type='TextDetRandomCrop', target_size=(640, 640)),
|
||||
dict(type='MMOCR2MMDet', poly2mask=True),
|
||||
dict(
|
||||
type='mmdet.PackDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'flip',
|
||||
'scale_factor', 'flip_direction'))
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1920, 1920), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=8,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='ConcatDataset', datasets=train_list, pipeline=train_pipeline))
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type='ConcatDataset', datasets=test_list, pipeline=test_pipeline))
|
||||
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='HmeanIOUMetric')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
visualizer = dict(
|
||||
type='TextDetLocalVisualizer', name='visualizer', save_dir='imgs')
|
|
@ -16,9 +16,9 @@ Collections:
|
|||
README: configs/textdet/maskrcnn/README.md
|
||||
|
||||
Models:
|
||||
- Name: mask_rcnn_r50_fpn_160e_ctw1500
|
||||
- Name: mask-rcnn_resnet50_fpn_160e_ctw1500
|
||||
In Collection: Mask R-CNN
|
||||
Config: configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py
|
||||
Config: configs/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_ctw1500.py
|
||||
Metadata:
|
||||
Training Data: CTW1500
|
||||
Results:
|
||||
|
@ -28,9 +28,9 @@ Models:
|
|||
hmean: 0.7486
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500_20210219-96497a76.pth
|
||||
|
||||
- Name: mask_rcnn_r50_fpn_160e_icdar2015
|
||||
- Name: mask-rcnn_resnet50_fpn_160e_icdar2015
|
||||
In Collection: Mask R-CNN
|
||||
Config: configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py
|
||||
Config: configs/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2015.py
|
||||
Metadata:
|
||||
Training Data: ICDAR2015
|
||||
Results:
|
||||
|
@ -40,9 +40,9 @@ Models:
|
|||
hmean: 0.8280
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015_20210219-8eb340a3.pth
|
||||
|
||||
- Name: mask_rcnn_r50_fpn_160e_icdar2017
|
||||
- Name: mask-rcnn_resnet50_fpn_160e_icdar2017
|
||||
In Collection: Mask R-CNN
|
||||
Config: configs/textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py
|
||||
Config: configs/textdet/maskrcnn/mask-rcnn_resnet50_fpn_160e_icdar2017.py
|
||||
Metadata:
|
||||
Training Data: ICDAR2017
|
||||
Results:
|
||||
|
|
|
@ -1,134 +0,0 @@
|
|||
# model settings
|
||||
model = dict(
|
||||
type='MMDetWrapper',
|
||||
text_repr_type='quad',
|
||||
cfg=dict(
|
||||
type='MaskRCNN',
|
||||
data_preprocessor=dict(
|
||||
type='DetDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_size_divisor=32),
|
||||
backbone=dict(
|
||||
type='ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=True,
|
||||
style='pytorch',
|
||||
init_cfg=dict(
|
||||
type='Pretrained', checkpoint='torchvision://resnet50')),
|
||||
neck=dict(
|
||||
type='FPN',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
num_outs=5),
|
||||
rpn_head=dict(
|
||||
type='RPNHead',
|
||||
in_channels=256,
|
||||
feat_channels=256,
|
||||
anchor_generator=dict(
|
||||
type='AnchorGenerator',
|
||||
scales=[4],
|
||||
ratios=[0.17, 0.44, 1.13, 2.90, 7.46],
|
||||
strides=[4, 8, 16, 32, 64]),
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[.0, .0, .0, .0],
|
||||
target_stds=[1.0, 1.0, 1.0, 1.0]),
|
||||
loss_cls=dict(
|
||||
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
|
||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
||||
roi_head=dict(
|
||||
type='StandardRoIHead',
|
||||
bbox_roi_extractor=dict(
|
||||
type='SingleRoIExtractor',
|
||||
roi_layer=dict(
|
||||
type='RoIAlign', output_size=7, sampling_ratio=0.),
|
||||
out_channels=256,
|
||||
featmap_strides=[4, 8, 16, 32]),
|
||||
bbox_head=dict(
|
||||
type='Shared2FCBBoxHead',
|
||||
in_channels=256,
|
||||
fc_out_channels=1024,
|
||||
roi_feat_size=7,
|
||||
num_classes=1,
|
||||
bbox_coder=dict(
|
||||
type='DeltaXYWHBBoxCoder',
|
||||
target_means=[0., 0., 0., 0.],
|
||||
target_stds=[0.1, 0.1, 0.2, 0.2]),
|
||||
reg_class_agnostic=False,
|
||||
loss_cls=dict(
|
||||
type='CrossEntropyLoss',
|
||||
use_sigmoid=False,
|
||||
loss_weight=1.0),
|
||||
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
|
||||
mask_roi_extractor=dict(
|
||||
type='SingleRoIExtractor',
|
||||
roi_layer=dict(
|
||||
type='RoIAlign', output_size=14, sampling_ratio=0.),
|
||||
out_channels=256,
|
||||
featmap_strides=[4, 8, 16, 32]),
|
||||
mask_head=dict(
|
||||
type='FCNMaskHead',
|
||||
num_convs=4,
|
||||
in_channels=256,
|
||||
conv_out_channels=256,
|
||||
num_classes=1,
|
||||
loss_mask=dict(
|
||||
type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
|
||||
# model training and testing settings
|
||||
train_cfg=dict(
|
||||
rpn=dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.7,
|
||||
neg_iou_thr=0.3,
|
||||
min_pos_iou=0.3,
|
||||
match_low_quality=True,
|
||||
ignore_iof_thr=-1),
|
||||
sampler=dict(
|
||||
type='RandomSampler',
|
||||
num=256,
|
||||
pos_fraction=0.5,
|
||||
neg_pos_ub=-1,
|
||||
add_gt_as_proposals=False),
|
||||
allowed_border=-1,
|
||||
pos_weight=-1,
|
||||
debug=False),
|
||||
rpn_proposal=dict(
|
||||
nms_pre=2000,
|
||||
max_per_img=1000,
|
||||
nms=dict(type='nms', iou_threshold=0.7),
|
||||
min_bbox_size=0),
|
||||
rcnn=dict(
|
||||
assigner=dict(
|
||||
type='MaxIoUAssigner',
|
||||
pos_iou_thr=0.5,
|
||||
neg_iou_thr=0.5,
|
||||
min_pos_iou=0.5,
|
||||
match_low_quality=True,
|
||||
ignore_iof_thr=-1),
|
||||
sampler=dict(
|
||||
type='RandomSampler',
|
||||
num=512,
|
||||
pos_fraction=0.25,
|
||||
neg_pos_ub=-1,
|
||||
add_gt_as_proposals=True),
|
||||
mask_size=28,
|
||||
pos_weight=-1,
|
||||
debug=False)),
|
||||
test_cfg=dict(
|
||||
rpn=dict(
|
||||
nms_pre=1000,
|
||||
max_per_img=1000,
|
||||
nms=dict(type='nms', iou_threshold=0.7),
|
||||
min_bbox_size=0),
|
||||
rcnn=dict(
|
||||
score_thr=0.05,
|
||||
nms=dict(type='nms', iou_threshold=0.5),
|
||||
max_per_img=100,
|
||||
mask_thr_binary=0.5))))
|
|
@ -1,20 +1,35 @@
|
|||
# TODO Train on ICDAR 2017
|
||||
_base_ = [
|
||||
'psenet_r50_fpnf.py',
|
||||
'../../_base_/default_runtime.py',
|
||||
'../../_base_/schedules/schedule_sgd_600e.py',
|
||||
'../../_base_/det_datasets/icdar2017.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
file_client_args = dict(backend='disk')
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(type='CheckpointHook', interval=100),
|
||||
logger=dict(type='LoggerHook', interval=20))
|
||||
|
||||
model = {{_base_.model_quad}}
|
||||
model = dict(
|
||||
type='PSENet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPNF',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
fusion_type='concat'),
|
||||
det_head=dict(
|
||||
type='PSEHead',
|
||||
in_channels=[256],
|
||||
hidden_dim=256,
|
||||
out_channel=7,
|
||||
module_loss=dict(type='PSEModuleLoss'),
|
||||
postprocessor=dict(type='PSEPostprocessor', text_repr_type='poly')),
|
||||
data_preprocessor=dict(
|
||||
type='TextDetDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_size_divisor=32))
|
||||
|
||||
train_pipeline = [
|
||||
dict(
|
||||
|
@ -47,29 +62,12 @@ test_pipeline = [
|
|||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(2240, 2240), keep_ratio=True),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
|
||||
'instances'))
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='ConcatDataset', datasets=train_list, pipeline=train_pipeline))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type='ConcatDataset', datasets=test_list, pipeline=test_pipeline))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='HmeanIOUMetric')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
visualizer = dict(type='TextDetLocalVisualizer', name='visualizer')
|
|
@ -15,9 +15,9 @@ Collections:
|
|||
README: configs/textdet/psenet/README.md
|
||||
|
||||
Models:
|
||||
- Name: psenet_r50_fpnf_600e_ctw1500
|
||||
- Name: psenet_resnet50_fpnf_600e_ctw1500
|
||||
In Collection: PSENet
|
||||
Config: configs/textdet/psenet/psenet_r50_fpnf_600e_ctw1500.py
|
||||
Config: configs/textdet/psenet/psenet_resnet50_fpnf_600e_ctw1500.py
|
||||
Metadata:
|
||||
Training Data: CTW1500
|
||||
Results:
|
||||
|
@ -27,9 +27,9 @@ Models:
|
|||
hmean-iou: 0.784
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/psenet/psenet_r50_fpnf_600e_ctw1500_20210401-216fed50.pth
|
||||
|
||||
- Name: psenet_r50_fpnf_600e_icdar2015
|
||||
- Name: psenet_resnet50_fpnf_600e_icdar2015
|
||||
In Collection: PSENet
|
||||
Config: configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py
|
||||
Config: configs/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015.py
|
||||
Metadata:
|
||||
Training Data: ICDAR2015
|
||||
Results:
|
||||
|
@ -37,11 +37,11 @@ Models:
|
|||
Dataset: ICDAR2015
|
||||
Metrics:
|
||||
hmean-iou: 0.806
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/psenet/psenet_r50_fpnf_600e_icdar2015-c6131f0d.pth
|
||||
Weights: https://download.openmmlab.com/mmocr/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015-c6131f0d.pth
|
||||
|
||||
- Name: psenet_r50_fpnf_600e_icdar2015
|
||||
- Name: psenet_resnet50_fpnf_600e_icdar2015
|
||||
In Collection: PSENet
|
||||
Config: configs/textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py
|
||||
Config: configs/textdet/psenet/psenet_resnet50_fpnf_600e_icdar2015.py
|
||||
Metadata:
|
||||
Training Data: ICDAR2017 ICDAR2015
|
||||
Results:
|
||||
|
|
|
@ -1,61 +0,0 @@
|
|||
model_poly = dict(
|
||||
type='PSENet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPNF',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
fusion_type='concat'),
|
||||
det_head=dict(
|
||||
type='PSEHead',
|
||||
in_channels=[256],
|
||||
hidden_dim=256,
|
||||
out_channel=7,
|
||||
module_loss=dict(type='PSEModuleLoss'),
|
||||
postprocessor=dict(type='PSEPostprocessor', text_repr_type='poly')),
|
||||
data_preprocessor=dict(
|
||||
type='TextDetDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_size_divisor=32))
|
||||
|
||||
model_quad = dict(
|
||||
type='PSENet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
||||
norm_eval=True,
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
style='pytorch'),
|
||||
neck=dict(
|
||||
type='FPNF',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
fusion_type='concat'),
|
||||
det_head=dict(
|
||||
type='PSEHead',
|
||||
in_channels=[256],
|
||||
hidden_dim=256,
|
||||
out_channel=7,
|
||||
module_loss=dict(type='PSEModuleLoss'),
|
||||
postprocessor=dict(type='PSEPostprocessor', text_repr_type='quad')),
|
||||
data_preprocessor=dict(
|
||||
type='TextDetDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_size_divisor=32))
|
|
@ -1,75 +0,0 @@
|
|||
_base_ = [
|
||||
'psenet_r50_fpnf.py',
|
||||
'../../_base_/default_runtime.py',
|
||||
'../../_base_/schedules/schedule_adam_step_600e.py',
|
||||
'../../_base_/det_datasets/ctw1500.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
file_client_args = dict(backend='disk')
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(type='CheckpointHook', interval=100),
|
||||
logger=dict(type='LoggerHook', interval=20))
|
||||
|
||||
model = {{_base_.model_poly}}
|
||||
|
||||
train_pipeline_ctw = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5),
|
||||
dict(type='ShortScaleAspectJitter', short_size=736, scale_divisor=32),
|
||||
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||
dict(type='RandomRotate', max_angle=10),
|
||||
dict(type='TextDetRandomCrop', target_size=(736, 736)),
|
||||
dict(type='Pad', size=(736, 736)),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
test_pipeline_ctw = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1280, 1280), keep_ratio=True),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor',
|
||||
'instances'))
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='ConcatDataset', datasets=train_list,
|
||||
pipeline=train_pipeline_ctw))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type='ConcatDataset', datasets=test_list, pipeline=test_pipeline_ctw))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='HmeanIOUMetric')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
visualizer = dict(type='TextDetLocalVisualizer', name='visualizer')
|
|
@ -1,84 +0,0 @@
|
|||
_base_ = [
|
||||
'psenet_r50_fpnf.py',
|
||||
'../../_base_/det_datasets/icdar2015.py',
|
||||
'../../_base_/default_runtime.py',
|
||||
'../../_base_/schedules/schedule_adam_step_600e.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
file_client_args = dict(backend='disk')
|
||||
default_hooks = dict(
|
||||
checkpoint=dict(type='CheckpointHook', interval=100),
|
||||
logger=dict(type='LoggerHook', interval=20))
|
||||
|
||||
model = {{_base_.model_quad}}
|
||||
|
||||
train_pipeline_icdar2015 = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True),
|
||||
dict(
|
||||
type='TorchVisionWrapper',
|
||||
op='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5),
|
||||
dict(type='ShortScaleAspectJitter', short_size=736, scale_divisor=32),
|
||||
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
|
||||
dict(type='RandomRotate', max_angle=10),
|
||||
dict(type='TextDetRandomCrop', target_size=(736, 736)),
|
||||
dict(type='Pad', size=(736, 736)),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
test_pipeline_icdar2015 = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(2240, 2240), keep_ratio=True),
|
||||
# add loading annotation after ``Resize`` because ground truth
|
||||
# does not need to do resize data transform
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=dict(
|
||||
type='ConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline_icdar2015))
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=4,
|
||||
persistent_workers=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=dict(
|
||||
type='ConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_icdar2015))
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(type='HmeanIOUMetric')
|
||||
test_evaluator = val_evaluator
|
||||
|
||||
visualizer = dict(type='TextDetLocalVisualizer', name='visualizer')
|
|
@ -0,0 +1,46 @@
|
|||
_base_ = [
|
||||
'_base_psenet_resnet50_fpnf.py',
|
||||
'../../_base_/det_datasets/ctw1500.py',
|
||||
'../../_base_/textdet_default_runtime.py',
|
||||
'../../_base_/schedules/schedule_adam_step_600e.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
ctw_det_train = _base_.ctw_det_train
|
||||
ctw_det_test = _base_.ctw_det_test
|
||||
|
||||
test_pipeline_ctw = [
|
||||
dict(
|
||||
type='LoadImageFromFile',
|
||||
file_client_args=_base_.file_client_args,
|
||||
color_type='color_ignore_orientation'),
|
||||
dict(type='Resize', scale=(1280, 1280), keep_ratio=True),
|
||||
dict(
|
||||
type='LoadOCRAnnotations',
|
||||
with_polygon=True,
|
||||
with_bbox=True,
|
||||
with_label=True),
|
||||
dict(
|
||||
type='PackTextDetInputs',
|
||||
meta_keys=('img_path', 'ori_shape', 'img_shape', 'scale_factor'))
|
||||
]
|
||||
|
||||
# pipeline settings
|
||||
ctw_det_train.pipeline = _base_.train_pipeline
|
||||
ctw_det_test.pipeline = test_pipeline_ctw
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=ctw_det_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=ctw_det_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
|
@ -0,0 +1,35 @@
|
|||
_base_ = [
|
||||
'_base_psenet_resnet50_fpnf.py',
|
||||
'../../_base_/det_datasets/icdar2015.py',
|
||||
'../../_base_/textdet_default_runtime.py',
|
||||
'../../_base_/schedules/schedule_adam_step_600e.py',
|
||||
]
|
||||
|
||||
# dataset settings
|
||||
ic15_det_train = _base_.ic15_det_train
|
||||
ic15_det_test = _base_.ic15_det_test
|
||||
|
||||
# use quadrilaterals for icdar2015
|
||||
model = dict(
|
||||
backbone=dict(style='pytorch'),
|
||||
det_head=dict(postprocessor=dict(text_repr_type='quad')))
|
||||
|
||||
# pipeline settings
|
||||
ic15_det_train.pipeline = _base_.train_pipeline
|
||||
ic15_det_test.pipeline = _base_.test_pipeline
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=16,
|
||||
num_workers=8,
|
||||
persistent_workers=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=ic15_det_train)
|
||||
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=False,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=ic15_det_test)
|
||||
|
||||
test_dataloader = val_dataloader
|
|
@ -0,0 +1,14 @@
|
|||
_base_ = [
|
||||
'psenet_resnet50_fpnf_600e_icdar2015.py',
|
||||
'../../_base_/det_datasets/icdar2017.py',
|
||||
]
|
||||
|
||||
ic17_det_train = _base_.ic17_det_train
|
||||
ic17_det_test = _base_.ic17_det_test
|
||||
# use the same pipeline as icdar2015
|
||||
ic17_det_train.pipeline = _base_.train_pipeline_icdar2015
|
||||
ic17_det_test.pipeline = _base_.test_pipeline_icdar2015
|
||||
|
||||
train_dataloader = dict(dataset=ic17_det_train)
|
||||
val_dataloader = dict(dataset=ic17_det_test)
|
||||
test_dataloader = val_dataloader
|
Loading…
Reference in New Issue