[Refactor] Refactor text detection config (#626)

* refactor textdet configs

* remove duplicate keys in _base_

* remove import from config

* syncbn to bn on cpu

* minimize change
pull/521/head^2
Hongbin Sun 2021-12-03 19:37:43 +08:00 committed by GitHub
parent 925b365dcf
commit 5a8859fe66
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
43 changed files with 1281 additions and 1428 deletions

View File

@ -1,97 +0,0 @@
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_cfg = None
test_cfg = None
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(
type='ScaleAspectJitter',
img_scale=[(3000, 640)],
ratio_range=(0.7, 1.3),
aspect_ratio_range=(0.9, 1.1),
multiscale_mode='value',
keep_ratio=False),
# shrink_ratio is from big to small. The 1st must be 1.0
dict(type='PANetTargets', shrink_ratio=(1.0, 0.7)),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='RandomRotateTextDet'),
dict(
type='RandomCropInstances',
target_size=(640, 640),
instance_key='gt_kernels'),
dict(type='Pad', size_divisor=32),
dict(
type='CustomFormatBundle',
keys=['gt_kernels', 'gt_mask'],
visualize=dict(flag=False, boundary_key='gt_kernels')),
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=(3000, 640),
flip=False,
transforms=[
dict(type='Resize', img_scale=(3000, 640), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
dataset_type = 'TextDetDataset'
img_prefix = 'tests/data/toy_dataset/imgs'
train_anno_file = 'tests/data/toy_dataset/instances_test.txt'
train1 = dict(
type=dataset_type,
img_prefix=img_prefix,
ann_file=train_anno_file,
loader=dict(
type='HardDiskLoader',
repeat=4,
parser=dict(
type='LineJsonParser',
keys=['file_name', 'height', 'width', 'annotations'])),
pipeline=train_pipeline,
test_mode=False)
data_root = 'tests/data/toy_dataset'
train2 = dict(
type='IcdarDataset',
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
pipeline=train_pipeline)
test_anno_file = 'tests/data/toy_dataset/instances_test.txt'
test = dict(
type=dataset_type,
img_prefix=img_prefix,
ann_file=test_anno_file,
loader=dict(
type='HardDiskLoader',
repeat=1,
parser=dict(
type='LineJsonParser',
keys=['file_name', 'height', 'width', 'annotations'])),
pipeline=test_pipeline,
test_mode=True)
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(type='ConcatDataset', datasets=[train1, train2]),
val=dict(type='ConcatDataset', datasets=[test]),
test=dict(type='ConcatDataset', datasets=[test]))
evaluation = dict(interval=1, metric='hmean-iou')

View File

@ -0,0 +1,18 @@
dataset_type = 'IcdarDataset'
data_root = 'data/ctw1500'
train = dict(
type=dataset_type,
ann_file=f'{data_root}/instances_training.json',
img_prefix=f'{data_root}/imgs',
pipeline=None)
test = dict(
type=dataset_type,
ann_file=f'{data_root}/instances_test.json',
img_prefix=f'{data_root}/imgs',
pipeline=None)
train_list = [train]
test_list = [test]

View File

@ -0,0 +1,18 @@
dataset_type = 'IcdarDataset'
data_root = 'data/icdar2015'
train = dict(
type=dataset_type,
ann_file=f'{data_root}/instances_training.json',
img_prefix=f'{data_root}/imgs',
pipeline=None)
test = dict(
type=dataset_type,
ann_file=f'{data_root}/instances_test.json',
img_prefix=f'{data_root}/imgs',
pipeline=None)
train_list = [train]
test_list = [test]

View File

@ -0,0 +1,18 @@
dataset_type = 'IcdarDataset'
data_root = 'data/icdar2017'
train = dict(
type=dataset_type,
ann_file=f'{data_root}/instances_training.json',
img_prefix=f'{data_root}/imgs',
pipeline=None)
test = dict(
type=dataset_type,
ann_file=f'{data_root}/instances_val.json',
img_prefix=f'{data_root}/imgs',
pipeline=None)
train_list = [train]
test_list = [test]

View File

@ -0,0 +1,39 @@
root = 'tests/data/toy_dataset'
# dataset with type='TextDetDataset'
train1 = dict(
type='TextDetDataset',
img_prefix=f'{root}/imgs',
ann_file=f'{root}/instances_test.txt',
loader=dict(
type='HardDiskLoader',
repeat=4,
parser=dict(
type='LineJsonParser',
keys=['file_name', 'height', 'width', 'annotations'])),
pipeline=None,
test_mode=False)
# dataset with type='IcdarDataset'
train2 = dict(
type='IcdarDataset',
ann_file=f'{root}/instances_test.json',
img_prefix=f'{root}/imgs',
pipeline=None)
test = dict(
type='TextDetDataset',
img_prefix=f'{root}/imgs',
ann_file=f'{root}/instances_test.txt',
loader=dict(
type='HardDiskLoader',
repeat=1,
parser=dict(
type='LineJsonParser',
keys=['file_name', 'height', 'width', 'annotations'])),
pipeline=None,
test_mode=True)
train_list = [train1, train2]
test_list = [test]

View File

@ -0,0 +1,21 @@
model = dict(
type='DBNet',
backbone=dict(
type='mmdet.ResNet',
depth=18,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
norm_eval=False,
style='caffe'),
neck=dict(
type='FPNC', in_channels=[64, 128, 256, 512], lateral_channels=256),
bbox_head=dict(
type='DBHead',
text_repr_type='quad',
in_channels=256,
loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True)),
train_cfg=None,
test_cfg=None)

View File

@ -0,0 +1,23 @@
model = dict(
type='DBNet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=False,
style='pytorch',
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
stage_with_dcn=(False, True, True, True)),
neck=dict(
type='FPNC', in_channels=[256, 512, 1024, 2048], lateral_channels=256),
bbox_head=dict(
type='DBHead',
text_repr_type='quad',
in_channels=256,
loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True)),
train_cfg=None,
test_cfg=None)

View File

@ -0,0 +1,21 @@
model = dict(
type='DRRG',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
norm_eval=True,
style='caffe'),
neck=dict(
type='FPN_UNet', in_channels=[256, 512, 1024, 2048], out_channels=32),
bbox_head=dict(
type='DRRGHead',
in_channels=32,
text_region_thr=0.3,
center_region_thr=0.4,
link_thr=0.80,
loss=dict(type='DRRGLoss')))

View File

@ -0,0 +1,30 @@
model = dict(
type='FCENet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
norm_eval=False,
style='pytorch'),
neck=dict(
type='mmdet.FPN',
in_channels=[512, 1024, 2048],
out_channels=256,
add_extra_convs='on_output',
num_outs=3,
relu_before_extra_convs=True,
act_cfg=None),
bbox_head=dict(
type='FCEHead',
in_channels=256,
scales=(8, 16, 32),
loss=dict(type='FCELoss'),
alpha=1.2,
beta=1.0,
text_repr_type='quad',
fourier_degree=5,
))

View File

@ -0,0 +1,29 @@
model = dict(
type='FCENet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch',
dcn=dict(type='DCNv2', deform_groups=2, fallback_on_stride=False),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
stage_with_dcn=(False, True, True, True)),
neck=dict(
type='mmdet.FPN',
in_channels=[512, 1024, 2048],
out_channels=256,
add_extra_convs='on_output',
num_outs=3,
relu_before_extra_convs=True,
act_cfg=None),
bbox_head=dict(
type='FCEHead',
in_channels=256,
scales=(8, 16, 32),
loss=dict(type='FCELoss'),
fourier_degree=5,
))

View File

@ -0,0 +1,43 @@
model_poly = dict(
type='PANet',
backbone=dict(
type='mmdet.ResNet',
depth=18,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='SyncBN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
norm_eval=True,
style='caffe'),
neck=dict(type='FPEM_FFM', in_channels=[64, 128, 256, 512]),
bbox_head=dict(
type='PANHead',
text_repr_type='poly',
in_channels=[128, 128, 128, 128],
out_channels=6,
loss=dict(type='PANLoss')),
train_cfg=None,
test_cfg=None)
model_quad = dict(
type='PANet',
backbone=dict(
type='mmdet.ResNet',
depth=18,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='SyncBN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
norm_eval=True,
style='caffe'),
neck=dict(type='FPEM_FFM', in_channels=[64, 128, 256, 512]),
bbox_head=dict(
type='PANHead',
text_repr_type='quad',
in_channels=[128, 128, 128, 128],
out_channels=6,
loss=dict(type='PANLoss')),
train_cfg=None,
test_cfg=None)

View File

@ -0,0 +1,20 @@
model = dict(
type='PANet',
pretrained='torchvision://resnet50',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='caffe'),
neck=dict(type='FPEM_FFM', in_channels=[256, 512, 1024, 2048]),
bbox_head=dict(
type='PANHead',
in_channels=[128, 128, 128, 128],
out_channels=6,
loss=dict(type='PANLoss', speedup_bbox_thr=32)),
train_cfg=None,
test_cfg=None)

View File

@ -0,0 +1,51 @@
model_poly = dict(
type='PSENet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='SyncBN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
norm_eval=True,
style='caffe'),
neck=dict(
type='FPNF',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
fusion_type='concat'),
bbox_head=dict(
type='PSEHead',
text_repr_type='poly',
in_channels=[256],
out_channels=7,
loss=dict(type='PSELoss')),
train_cfg=None,
test_cfg=None)
model_quad = dict(
type='PSENet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='SyncBN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
norm_eval=True,
style='caffe'),
neck=dict(
type='FPNF',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
fusion_type='concat'),
bbox_head=dict(
type='PSEHead',
text_repr_type='quad',
in_channels=[256],
out_channels=7,
loss=dict(type='PSELoss')),
train_cfg=None,
test_cfg=None)

View File

@ -0,0 +1,21 @@
model = dict(
type='TextSnake',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
norm_eval=True,
style='caffe'),
neck=dict(
type='FPN_UNet', in_channels=[256, 512, 1024, 2048], out_channels=32),
bbox_head=dict(
type='TextSnakeHead',
in_channels=32,
text_repr_type='poly',
loss=dict(type='TextSnakeLoss')),
train_cfg=None,
test_cfg=None)

View File

@ -0,0 +1,88 @@
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline_r18 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(
type='ImgAug',
args=[['Fliplr', 0.5],
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
dict(type='EastRandomCrop', target_size=(640, 640)),
dict(type='DBNetTargets', shrink_ratio=0.4),
dict(type='Pad', size_divisor=32),
dict(
type='CustomFormatBundle',
keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
visualize=dict(flag=False, boundary_key='gt_shrink')),
dict(
type='Collect',
keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
]
test_pipeline_1333_736 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 736),
flip=False,
transforms=[
dict(type='Resize', img_scale=(2944, 736), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
# for dbnet_r50dcnv2_fpnc
img_norm_cfg_r50dcnv2 = dict(
mean=[122.67891434, 116.66876762, 104.00698793],
std=[58.395, 57.12, 57.375],
to_rgb=True)
train_pipeline_r50dcnv2 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg_r50dcnv2),
dict(
type='ImgAug',
args=[['Fliplr', 0.5],
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
dict(type='EastRandomCrop', target_size=(640, 640)),
dict(type='DBNetTargets', shrink_ratio=0.4),
dict(type='Pad', size_divisor=32),
dict(
type='CustomFormatBundle',
keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
visualize=dict(flag=False, boundary_key='gt_shrink')),
dict(
type='Collect',
keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
]
test_pipeline_4068_1024 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=(4068, 1024),
flip=False,
transforms=[
dict(type='Resize', img_scale=(2944, 736), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg_r50dcnv2),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]

View File

@ -0,0 +1,60 @@
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='RandomScaling', size=800, scale=(0.75, 2.5)),
dict(
type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2),
dict(
type='RandomCropPolyInstances',
instance_key='gt_masks',
crop_ratio=0.8,
min_side_ratio=0.3),
dict(
type='RandomRotatePolyInstances',
rotate_ratio=0.5,
max_angle=60,
pad_with_fixed_color=False),
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='DRRGTargets'),
dict(type='Pad', size_divisor=32),
dict(
type='CustomFormatBundle',
keys=[
'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
'gt_top_height_map', 'gt_bot_height_map', 'gt_sin_map',
'gt_cos_map', 'gt_comp_attribs'
],
visualize=dict(flag=False, boundary_key='gt_text_mask')),
dict(
type='Collect',
keys=[
'img', 'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
'gt_top_height_map', 'gt_bot_height_map', 'gt_sin_map',
'gt_cos_map', 'gt_comp_attribs'
])
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=(1024, 640),
flip=False,
transforms=[
dict(type='Resize', img_scale=(1024, 640), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]

View File

@ -0,0 +1,118 @@
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
# for icdar2015
leval_prop_range_icdar2015 = ((0, 0.4), (0.3, 0.7), (0.6, 1.0))
train_pipeline_icdar2015 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(
type='ColorJitter',
brightness=32.0 / 255,
saturation=0.5,
contrast=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='RandomScaling', size=800, scale=(3. / 4, 5. / 2)),
dict(
type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2),
dict(
type='RandomCropPolyInstances',
instance_key='gt_masks',
crop_ratio=0.8,
min_side_ratio=0.3),
dict(
type='RandomRotatePolyInstances',
rotate_ratio=0.5,
max_angle=30,
pad_with_fixed_color=False),
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='Pad', size_divisor=32),
dict(
type='FCENetTargets',
fourier_degree=5,
level_proportion_range=leval_prop_range_icdar2015),
dict(
type='CustomFormatBundle',
keys=['p3_maps', 'p4_maps', 'p5_maps'],
visualize=dict(flag=False, boundary_key=None)),
dict(type='Collect', keys=['img', 'p3_maps', 'p4_maps', 'p5_maps'])
]
img_scale_icdar2015 = (2260, 2260)
test_pipeline_icdar2015 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=img_scale_icdar2015,
flip=False,
transforms=[
dict(type='Resize', img_scale=(1280, 800), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
# for ctw1500
leval_prop_range_ctw1500 = ((0, 0.25), (0.2, 0.65), (0.55, 1.0))
train_pipeline_ctw1500 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(
type='ColorJitter',
brightness=32.0 / 255,
saturation=0.5,
contrast=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='RandomScaling', size=800, scale=(3. / 4, 5. / 2)),
dict(
type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2),
dict(
type='RandomCropPolyInstances',
instance_key='gt_masks',
crop_ratio=0.8,
min_side_ratio=0.3),
dict(
type='RandomRotatePolyInstances',
rotate_ratio=0.5,
max_angle=30,
pad_with_fixed_color=False),
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='Pad', size_divisor=32),
dict(
type='FCENetTargets',
fourier_degree=5,
level_proportion_range=leval_prop_range_ctw1500),
dict(
type='CustomFormatBundle',
keys=['p3_maps', 'p4_maps', 'p5_maps'],
visualize=dict(flag=False, boundary_key=None)),
dict(type='Collect', keys=['img', 'p3_maps', 'p4_maps', 'p5_maps'])
]
img_scale_ctw1500 = (1080, 736)
test_pipeline_ctw1500 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=img_scale_ctw1500,
flip=False,
transforms=[
dict(type='Resize', img_scale=(1280, 800), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]

View File

@ -0,0 +1,57 @@
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(
type='ScaleAspectJitter',
img_scale=None,
keep_ratio=False,
resize_type='indep_sample_in_range',
scale_range=(640, 2560)),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(
type='RandomCropInstances',
target_size=(640, 640),
mask_type='union_all',
instance_key='gt_masks'),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
# for ctw1500
img_scale_ctw1500 = (1600, 1600)
test_pipeline_ctw1500 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=img_scale_ctw1500,
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
# for icdar2015
img_scale_icdar2015 = (1920, 1920)
test_pipeline_icdar2015 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=img_scale_icdar2015,
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]

View File

@ -0,0 +1,156 @@
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
# for ctw1500
img_scale_train_ctw1500 = [(3000, 640)]
shrink_ratio_train_ctw1500 = (1.0, 0.7)
target_size_train_ctw1500 = (640, 640)
train_pipeline_ctw1500 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(
type='ScaleAspectJitter',
img_scale=img_scale_train_ctw1500,
ratio_range=(0.7, 1.3),
aspect_ratio_range=(0.9, 1.1),
multiscale_mode='value',
keep_ratio=False),
# shrink_ratio is from big to small. The 1st must be 1.0
dict(type='PANetTargets', shrink_ratio=shrink_ratio_train_ctw1500),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='RandomRotateTextDet'),
dict(
type='RandomCropInstances',
target_size=target_size_train_ctw1500,
instance_key='gt_kernels'),
dict(type='Pad', size_divisor=32),
dict(
type='CustomFormatBundle',
keys=['gt_kernels', 'gt_mask'],
visualize=dict(flag=False, boundary_key='gt_kernels')),
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
]
img_scale_test_ctw1500 = (3000, 640)
test_pipeline_ctw1500 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=img_scale_test_ctw1500,
flip=False,
transforms=[
dict(type='Resize', img_scale=(3000, 640), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
# for icdar2015
img_scale_train_icdar2015 = [(3000, 736)]
shrink_ratio_train_icdar2015 = (1.0, 0.5)
target_size_train_icdar2015 = (736, 736)
train_pipeline_icdar2015 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(
type='ScaleAspectJitter',
img_scale=img_scale_train_icdar2015,
ratio_range=(0.7, 1.3),
aspect_ratio_range=(0.9, 1.1),
multiscale_mode='value',
keep_ratio=False),
dict(type='PANetTargets', shrink_ratio=shrink_ratio_train_icdar2015),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='RandomRotateTextDet'),
dict(
type='RandomCropInstances',
target_size=target_size_train_icdar2015,
instance_key='gt_kernels'),
dict(type='Pad', size_divisor=32),
dict(
type='CustomFormatBundle',
keys=['gt_kernels', 'gt_mask'],
visualize=dict(flag=False, boundary_key='gt_kernels')),
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
]
img_scale_test_icdar2015 = (1333, 736)
test_pipeline_icdar2015 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=img_scale_test_icdar2015,
flip=False,
transforms=[
dict(type='Resize', img_scale=(3000, 640), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
# for icdar2017
img_scale_train_icdar2017 = [(3000, 800)]
shrink_ratio_train_icdar2017 = (1.0, 0.5)
target_size_train_icdar2017 = (800, 800)
train_pipeline_icdar2017 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(
type='ScaleAspectJitter',
img_scale=img_scale_train_icdar2017,
ratio_range=(0.7, 1.3),
aspect_ratio_range=(0.9, 1.1),
multiscale_mode='value',
keep_ratio=False),
dict(type='PANetTargets', shrink_ratio=shrink_ratio_train_icdar2017),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='RandomRotateTextDet'),
dict(
type='RandomCropInstances',
target_size=target_size_train_icdar2017,
instance_key='gt_kernels'),
dict(type='Pad', size_divisor=32),
dict(
type='CustomFormatBundle',
keys=['gt_kernels', 'gt_mask'],
visualize=dict(flag=False, boundary_key='gt_kernels')),
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
]
img_scale_test_icdar2017 = (1333, 800)
test_pipeline_icdar2017 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=img_scale_test_icdar2017,
flip=False,
transforms=[
dict(type='Resize', img_scale=(3000, 640), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]

View File

@ -0,0 +1,70 @@
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(
type='ScaleAspectJitter',
img_scale=[(3000, 736)],
ratio_range=(0.5, 3),
aspect_ratio_range=(1, 1),
multiscale_mode='value',
long_size_bound=1280,
short_size_bound=640,
resize_type='long_short_bound',
keep_ratio=False),
dict(type='PSENetTargets'),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='RandomRotateTextDet'),
dict(
type='RandomCropInstances',
target_size=(640, 640),
instance_key='gt_kernels'),
dict(type='Pad', size_divisor=32),
dict(
type='CustomFormatBundle',
keys=['gt_kernels', 'gt_mask'],
visualize=dict(flag=False, boundary_key='gt_kernels')),
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
]
# for ctw1500
img_scale_test_ctw1500 = (1280, 1280)
test_pipeline_ctw1500 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=img_scale_test_ctw1500,
flip=False,
transforms=[
dict(type='Resize', img_scale=(1280, 1280), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
# for icdar2015
img_scale_test_icdar2015 = (2240, 2240)
test_pipeline_icdar2015 = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=img_scale_test_icdar2015,
flip=False,
transforms=[
dict(type='Resize', img_scale=(1280, 1280), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]

View File

@ -0,0 +1,65 @@
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(
type='RandomCropPolyInstances',
instance_key='gt_masks',
crop_ratio=0.65,
min_side_ratio=0.3),
dict(
type='RandomRotatePolyInstances',
rotate_ratio=0.5,
max_angle=20,
pad_with_fixed_color=False),
dict(
type='ScaleAspectJitter',
img_scale=[(3000, 736)], # unused
ratio_range=(0.7, 1.3),
aspect_ratio_range=(0.9, 1.1),
multiscale_mode='value',
long_size_bound=800,
short_size_bound=480,
resize_type='long_short_bound',
keep_ratio=False),
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='TextSnakeTargets'),
dict(type='Pad', size_divisor=32),
dict(
type='CustomFormatBundle',
keys=[
'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
'gt_radius_map', 'gt_sin_map', 'gt_cos_map'
],
visualize=dict(flag=False, boundary_key='gt_text_mask')),
dict(
type='Collect',
keys=[
'img', 'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
'gt_radius_map', 'gt_sin_map', 'gt_cos_map'
])
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 736),
flip=False,
transforms=[
dict(type='Resize', img_scale=(1333, 736), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]

View File

@ -0,0 +1,6 @@
# optimizer
optimizer = dict(type='Adam', lr=1e-4)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(policy='step', step=[200, 400])
total_epochs = 600

View File

@ -0,0 +1,5 @@
# optimizer
optimizer = dict(type='SGD', lr=1e-3, momentum=0.90, weight_decay=5e-4)
optimizer_config = dict(grad_clip=None)
lr_config = dict(policy='poly', power=0.9, min_lr=1e-7, by_epoch=True)
total_epochs = 1500

View File

@ -1,98 +1,33 @@
_base_ = [
'../../_base_/schedules/schedule_1200e.py', '../../_base_/runtime_10e.py'
'../../_base_/runtime_10e.py',
'../../_base_/schedules/schedule_sgd_1200e.py',
'../../_base_/det_models/dbnet_r18_fpnc.py',
'../../_base_/det_datasets/icdar2015.py',
'../../_base_/det_pipelines/dbnet_pipeline.py'
]
model = dict(
type='DBNet',
backbone=dict(
type='mmdet.ResNet',
depth=18,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
norm_eval=False,
style='caffe'),
neck=dict(
type='FPNC', in_channels=[64, 128, 256, 512], lateral_channels=256),
bbox_head=dict(
type='DBHead',
text_repr_type='quad',
in_channels=256,
loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True)),
train_cfg=None,
test_cfg=None)
dataset_type = 'IcdarDataset'
data_root = 'data/icdar2015'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
# for visualizing img, pls uncomment it.
# img_norm_cfg = dict(mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
train_pipeline_r18 = {{_base_.train_pipeline_r18}}
test_pipeline_1333_736 = {{_base_.test_pipeline_1333_736}}
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
# img aug
dict(
type='ImgAug',
args=[['Fliplr', 0.5],
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
# random crop
dict(type='EastRandomCrop', target_size=(640, 640)),
dict(type='DBNetTargets', shrink_ratio=0.4),
dict(type='Pad', size_divisor=32),
# for visualizing img and gts, pls set visualize = True
dict(
type='CustomFormatBundle',
keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
visualize=dict(flag=False, boundary_key='gt_shrink')),
dict(
type='Collect',
keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 736),
flip=False,
transforms=[
dict(type='Resize', img_scale=(2944, 736), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=16,
workers_per_gpu=8,
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type=dataset_type,
ann_file=data_root + '/instances_training.json',
# for debugging top k imgs
# select_first_k=200,
img_prefix=data_root + '/imgs',
pipeline=train_pipeline),
type='UniformConcatDataset',
datasets=train_list,
pipeline=train_pipeline_r18),
val=dict(
type=dataset_type,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
# select_first_k=100,
pipeline=test_pipeline),
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_1333_736),
test=dict(
type=dataset_type,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
# select_first_k=100,
pipeline=test_pipeline))
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_1333_736))
evaluation = dict(interval=100, metric='hmean-iou')

View File

@ -1,104 +1,35 @@
_base_ = [
'../../_base_/schedules/schedule_1200e.py', '../../_base_/runtime_10e.py'
'../../_base_/runtime_10e.py',
'../../_base_/schedules/schedule_sgd_1200e.py',
'../../_base_/det_models/dbnet_r50dcnv2_fpnc.py',
'../../_base_/det_datasets/icdar2015.py',
'../../_base_/det_pipelines/dbnet_pipeline.py'
]
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
train_pipeline_r50dcnv2 = {{_base_.train_pipeline_r50dcnv2}}
test_pipeline_4068_1024 = {{_base_.test_pipeline_4068_1024}}
load_from = 'checkpoints/textdet/dbnet/res50dcnv2_synthtext.pth'
model = dict(
type='DBNet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=False,
style='pytorch',
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
stage_with_dcn=(False, True, True, True)),
neck=dict(
type='FPNC', in_channels=[256, 512, 1024, 2048], lateral_channels=256),
bbox_head=dict(
type='DBHead',
text_repr_type='quad',
in_channels=256,
loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True)),
train_cfg=None,
test_cfg=None)
dataset_type = 'IcdarDataset'
data_root = 'data/icdar2015/'
img_norm_cfg = dict(
mean=[122.67891434, 116.66876762, 104.00698793],
std=[58.395, 57.12, 57.375],
to_rgb=True)
# for visualizing img, pls uncomment it.
# img_norm_cfg = dict(mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
# img aug
dict(
type='ImgAug',
args=[['Fliplr', 0.5],
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
# random crop
dict(type='EastRandomCrop', target_size=(640, 640)),
dict(type='DBNetTargets', shrink_ratio=0.4),
dict(type='Pad', size_divisor=32),
# for visualizing img and gts, pls set visualize = True
dict(
type='CustomFormatBundle',
keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
visualize=dict(flag=False, boundary_key='gt_shrink')),
dict(
type='Collect',
keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=(4068, 1024),
flip=False,
transforms=[
dict(type='Resize', img_scale=(4068, 1024), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type=dataset_type,
ann_file=data_root + '/instances_training.json',
# for debugging top k imgs
# select_first_k=200,
img_prefix=data_root + '/imgs',
pipeline=train_pipeline),
type='UniformConcatDataset',
datasets=train_list,
pipeline=train_pipeline_r50dcnv2),
val=dict(
type=dataset_type,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
# select_first_k=100,
pipeline=test_pipeline),
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_4068_1024),
test=dict(
type=dataset_type,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
# select_first_k=100,
pipeline=test_pipeline))
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_4068_1024))
evaluation = dict(interval=100, metric='hmean-iou')

View File

@ -1,112 +1,33 @@
_base_ = [
'../../_base_/schedules/schedule_1200e.py',
'../../_base_/default_runtime.py'
'../../_base_/schedules/schedule_sgd_1200e.py',
'../../_base_/default_runtime.py',
'../../_base_/det_models/drrg_r50_fpn_unet.py',
'../../_base_/det_datasets/ctw1500.py',
'../../_base_/det_pipelines/drrg_pipeline.py'
]
model = dict(
type='DRRG',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
norm_eval=True,
style='caffe'),
neck=dict(
type='FPN_UNet', in_channels=[256, 512, 1024, 2048], out_channels=32),
bbox_head=dict(
type='DRRGHead',
in_channels=32,
text_region_thr=0.3,
center_region_thr=0.4,
link_thr=0.80,
loss=dict(type='DRRGLoss')))
train_cfg = None
test_cfg = None
dataset_type = 'IcdarDataset'
data_root = 'data/ctw1500/'
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = {{_base_.train_pipeline}}
test_pipeline = {{_base_.test_pipeline}}
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='RandomScaling', size=800, scale=(0.75, 2.5)),
dict(
type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2),
dict(
type='RandomCropPolyInstances',
instance_key='gt_masks',
crop_ratio=0.8,
min_side_ratio=0.3),
dict(
type='RandomRotatePolyInstances',
rotate_ratio=0.5,
max_angle=60,
pad_with_fixed_color=False),
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='DRRGTargets'),
dict(type='Pad', size_divisor=32),
dict(
type='CustomFormatBundle',
keys=[
'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
'gt_top_height_map', 'gt_bot_height_map', 'gt_sin_map',
'gt_cos_map', 'gt_comp_attribs'
],
visualize=dict(flag=False, boundary_key='gt_text_mask')),
dict(
type='Collect',
keys=[
'img', 'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
'gt_top_height_map', 'gt_bot_height_map', 'gt_sin_map',
'gt_cos_map', 'gt_comp_attribs'
])
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=(1024, 640),
flip=False,
transforms=[
dict(type='Resize', img_scale=(1024, 640), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type=dataset_type,
ann_file=f'{data_root}/instances_training.json',
img_prefix=f'{data_root}/imgs',
type='UniformConcatDataset',
datasets=train_list,
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=f'{data_root}/instances_test.json',
img_prefix=f'{data_root}/imgs',
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=f'{data_root}/instances_test.json',
img_prefix=f'{data_root}/imgs',
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline))
evaluation = dict(interval=20, metric='hmean-iou')

View File

@ -1,136 +1,33 @@
fourier_degree = 5
model = dict(
type='FCENet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
norm_eval=False,
style='pytorch'),
neck=dict(
type='mmdet.FPN',
in_channels=[512, 1024, 2048],
out_channels=256,
add_extra_convs='on_output',
num_outs=3,
relu_before_extra_convs=True,
act_cfg=None),
bbox_head=dict(
type='FCEHead',
in_channels=256,
scales=(8, 16, 32),
loss=dict(type='FCELoss'),
alpha=1.2,
beta=1.0,
text_repr_type='quad',
fourier_degree=fourier_degree,
))
train_cfg = None
test_cfg = None
dataset_type = 'IcdarDataset'
data_root = 'data/icdar2015/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(
type='ColorJitter',
brightness=32.0 / 255,
saturation=0.5,
contrast=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='RandomScaling', size=800, scale=(3. / 4, 5. / 2)),
dict(
type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2),
dict(
type='RandomCropPolyInstances',
instance_key='gt_masks',
crop_ratio=0.8,
min_side_ratio=0.3),
dict(
type='RandomRotatePolyInstances',
rotate_ratio=0.5,
max_angle=30,
pad_with_fixed_color=False),
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='Pad', size_divisor=32),
dict(
type='FCENetTargets',
fourier_degree=fourier_degree,
level_proportion_range=((0, 0.4), (0.3, 0.7), (0.6, 1.0))),
dict(
type='CustomFormatBundle',
keys=['p3_maps', 'p4_maps', 'p5_maps'],
visualize=dict(flag=False, boundary_key=None)),
dict(type='Collect', keys=['img', 'p3_maps', 'p4_maps', 'p5_maps'])
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=(2260, 2260),
flip=False,
transforms=[
dict(type='Resize', img_scale=(1280, 800), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
_base_ = [
'../../_base_/runtime_10e.py',
'../../_base_/schedules/schedule_sgd_1500e.py',
'../../_base_/det_models/fcenet_r50_fpn.py',
'../../_base_/det_datasets/icdar2015.py',
'../../_base_/det_pipelines/fcenet_pipeline.py'
]
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
train_pipeline_icdar2015 = {{_base_.train_pipeline_icdar2015}}
test_pipeline_icdar2015 = {{_base_.test_pipeline_icdar2015}}
data = dict(
samples_per_gpu=8,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type=dataset_type,
ann_file=data_root + '/instances_training.json',
img_prefix=data_root + '/imgs',
pipeline=train_pipeline),
type='UniformConcatDataset',
datasets=train_list,
pipeline=train_pipeline_icdar2015),
val=dict(
type=dataset_type,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
pipeline=test_pipeline),
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_icdar2015),
test=dict(
type=dataset_type,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
pipeline=test_pipeline))
evaluation = dict(interval=5, metric='hmean-iou')
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_icdar2015))
# optimizer
optimizer = dict(type='SGD', lr=1e-3, momentum=0.90, weight_decay=5e-4)
optimizer_config = dict(grad_clip=None)
lr_config = dict(policy='poly', power=0.9, min_lr=1e-7, by_epoch=True)
total_epochs = 1500
checkpoint_config = dict(interval=5)
# yapf:disable
log_config = dict(
interval=20,
hooks=[
dict(type='TextLoggerHook')
])
# yapf:enable
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
evaluation = dict(interval=10, metric='hmean-iou')

View File

@ -1,135 +1,33 @@
fourier_degree = 5
model = dict(
type='FCENet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch',
dcn=dict(type='DCNv2', deform_groups=2, fallback_on_stride=False),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
stage_with_dcn=(False, True, True, True)),
neck=dict(
type='mmdet.FPN',
in_channels=[512, 1024, 2048],
out_channels=256,
add_extra_convs='on_output',
num_outs=3,
relu_before_extra_convs=True,
act_cfg=None),
bbox_head=dict(
type='FCEHead',
in_channels=256,
scales=(8, 16, 32),
loss=dict(type='FCELoss'),
fourier_degree=fourier_degree,
))
train_cfg = None
test_cfg = None
dataset_type = 'IcdarDataset'
data_root = 'data/ctw1500/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(
type='ColorJitter',
brightness=32.0 / 255,
saturation=0.5,
contrast=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='RandomScaling', size=800, scale=(3. / 4, 5. / 2)),
dict(
type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2),
dict(
type='RandomCropPolyInstances',
instance_key='gt_masks',
crop_ratio=0.8,
min_side_ratio=0.3),
dict(
type='RandomRotatePolyInstances',
rotate_ratio=0.5,
max_angle=30,
pad_with_fixed_color=False),
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='Pad', size_divisor=32),
dict(
type='FCENetTargets',
fourier_degree=fourier_degree,
level_proportion_range=((0, 0.25), (0.2, 0.65), (0.55, 1.0))),
dict(
type='CustomFormatBundle',
keys=['p3_maps', 'p4_maps', 'p5_maps'],
visualize=dict(flag=False, boundary_key=None)),
dict(type='Collect', keys=['img', 'p3_maps', 'p4_maps', 'p5_maps'])
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=(1080, 736),
flip=False,
transforms=[
dict(type='Resize', img_scale=(1280, 800), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
_base_ = [
'../../_base_/runtime_10e.py',
'../../_base_/schedules/schedule_sgd_1500e.py',
'../../_base_/det_models/fcenet_r50dcnv2_fpn.py',
'../../_base_/det_datasets/ctw1500.py',
'../../_base_/det_pipelines/fcenet_pipeline.py'
]
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
train_pipeline_ctw1500 = {{_base_.train_pipeline_ctw1500}}
test_pipeline_ctw1500 = {{_base_.test_pipeline_ctw1500}}
data = dict(
samples_per_gpu=6,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type=dataset_type,
ann_file=data_root + '/instances_training.json',
img_prefix=data_root + '/imgs',
pipeline=train_pipeline),
type='UniformConcatDataset',
datasets=train_list,
pipeline=train_pipeline_ctw1500),
val=dict(
type=dataset_type,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
pipeline=test_pipeline),
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_ctw1500),
test=dict(
type=dataset_type,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
pipeline=test_pipeline))
evaluation = dict(interval=5, metric='hmean-iou')
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_ctw1500))
# optimizer
optimizer = dict(type='SGD', lr=1e-3, momentum=0.90, weight_decay=5e-4)
optimizer_config = dict(grad_clip=None)
lr_config = dict(policy='poly', power=0.9, min_lr=1e-7, by_epoch=True)
total_epochs = 1500
checkpoint_config = dict(interval=5)
# yapf:disable
log_config = dict(
interval=20,
hooks=[
dict(type='TextLoggerHook')
])
# yapf:enable
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
evaluation = dict(interval=10, metric='hmean-iou')

View File

@ -1,69 +1,33 @@
_base_ = [
'../../_base_/models/ocr_mask_rcnn_r50_fpn_ohem_poly.py',
'../../_base_/schedules/schedule_160e.py', '../../_base_/runtime_10e.py'
'../../_base_/runtime_10e.py',
'../../_base_/det_models/ocr_mask_rcnn_r50_fpn_ohem_poly.py',
'../../_base_/schedules/schedule_sgd_160e.py',
'../../_base_/det_datasets/ctw1500.py',
'../../_base_/det_pipelines/maskrcnn_pipeline.py'
]
dataset_type = 'IcdarDataset'
data_root = 'data/ctw1500/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
# img_norm_cfg = dict(mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(
type='ScaleAspectJitter',
img_scale=None,
keep_ratio=False,
resize_type='indep_sample_in_range',
scale_range=(640, 2560)),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(
type='RandomCropInstances',
target_size=(640, 640),
mask_type='union_all',
instance_key='gt_masks'),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
# resize the long size to 1600
img_scale=(1600, 1600),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
# no flip
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
train_pipeline = {{_base_.train_pipeline}}
test_pipeline_ctw1500 = {{_base_.test_pipeline_ctw1500}}
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type=dataset_type,
ann_file=data_root + '/instances_training.json',
img_prefix=data_root + '/imgs',
type='UniformConcatDataset',
datasets=train_list,
pipeline=train_pipeline),
val=dict(
type=dataset_type,
# select_first_k=1,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
pipeline=test_pipeline),
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_ctw1500),
test=dict(
type=dataset_type,
# select_first_k=1,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
pipeline=test_pipeline))
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_ctw1500))
evaluation = dict(interval=10, metric='hmean-iou')

View File

@ -1,68 +1,33 @@
_base_ = [
'../../_base_/models/ocr_mask_rcnn_r50_fpn_ohem.py',
'../../_base_/schedules/schedule_160e.py', '../../_base_/runtime_10e.py'
]
dataset_type = 'IcdarDataset'
data_root = 'data/icdar2015/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
# img_norm_cfg = dict(mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(
type='ScaleAspectJitter',
img_scale=None,
keep_ratio=False,
resize_type='indep_sample_in_range',
scale_range=(640, 2560)),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(
type='RandomCropInstances',
target_size=(640, 640),
mask_type='union_all',
instance_key='gt_masks'),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
# resize the long size to 1600
img_scale=(1920, 1920),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
# no flip
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
'../../_base_/runtime_10e.py',
'../../_base_/det_models/ocr_mask_rcnn_r50_fpn_ohem.py',
'../../_base_/schedules/schedule_sgd_160e.py',
'../../_base_/det_datasets/icdar2015.py',
'../../_base_/det_pipelines/maskrcnn_pipeline.py'
]
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
train_pipeline = {{_base_.train_pipeline}}
test_pipeline_icdar2015 = {{_base_.test_pipeline_icdar2015}}
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type=dataset_type,
ann_file=data_root + '/instances_training.json',
img_prefix=data_root + '/imgs',
type='UniformConcatDataset',
datasets=train_list,
pipeline=train_pipeline),
val=dict(
type=dataset_type,
# select_first_k=1,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
pipeline=test_pipeline),
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_icdar2015),
test=dict(
type=dataset_type,
# select_first_k=1,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
pipeline=test_pipeline))
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_icdar2015))
evaluation = dict(interval=10, metric='hmean-iou')

View File

@ -1,69 +1,33 @@
_base_ = [
'../../_base_/models/ocr_mask_rcnn_r50_fpn_ohem.py',
'../../_base_/schedules/schedule_160e.py', '../../_base_/runtime_10e.py'
'../../_base_/runtime_10e.py',
'../../_base_/det_models/ocr_mask_rcnn_r50_fpn_ohem.py',
'../../_base_/schedules/schedule_sgd_160e.py',
'../../_base_/det_datasets/icdar2017.py',
'../../_base_/det_pipelines/maskrcnn_pipeline.py'
]
dataset_type = 'IcdarDataset'
data_root = 'data/icdar2017/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
# img_norm_cfg = dict(mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
dict(
type='ScaleAspectJitter',
img_scale=None,
keep_ratio=False,
resize_type='indep_sample_in_range',
scale_range=(640, 2560)),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(
type='RandomCropInstances',
target_size=(640, 640),
mask_type='union_all',
instance_key='gt_masks'),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
# resize the long size to 1600
img_scale=(1600, 1600),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
# no flip
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
train_pipeline = {{_base_.train_pipeline}}
test_pipeline_icdar2015 = {{_base_.test_pipeline_icdar2015}}
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type=dataset_type,
ann_file=data_root + '/instances_training.json',
img_prefix=data_root + '/imgs',
type='UniformConcatDataset',
datasets=train_list,
pipeline=train_pipeline),
val=dict(
type=dataset_type,
# select_first_k=1,
ann_file=data_root + '/instances_val.json',
img_prefix=data_root + '/imgs',
pipeline=test_pipeline),
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_icdar2015),
test=dict(
type=dataset_type,
# select_first_k=1,
ann_file=data_root + '/instances_val.json',
img_prefix=data_root + '/imgs',
pipeline=test_pipeline))
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_icdar2015))
evaluation = dict(interval=10, metric='hmean-iou')

View File

@ -1,106 +1,35 @@
_base_ = [
'../../_base_/schedules/schedule_adam_600e.py',
'../../_base_/runtime_10e.py'
'../../_base_/runtime_10e.py',
'../../_base_/det_models/panet_r18_fpem_ffm.py',
'../../_base_/det_datasets/ctw1500.py',
'../../_base_/det_pipelines/panet_pipeline.py'
]
model = dict(
type='PANet',
backbone=dict(
type='mmdet.ResNet',
depth=18,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='SyncBN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
norm_eval=True,
style='caffe'),
neck=dict(type='FPEM_FFM', in_channels=[64, 128, 256, 512]),
bbox_head=dict(
type='PANHead',
text_repr_type='poly',
in_channels=[128, 128, 128, 128],
out_channels=6,
loss=dict(type='PANLoss')),
train_cfg=None,
test_cfg=None)
dataset_type = 'IcdarDataset'
data_root = 'data/ctw1500/'
model = {{_base_.model_poly}}
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
# for visualizing img, pls uncomment it.
# img_norm_cfg = dict(
# mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
train_pipeline_ctw1500 = {{_base_.train_pipeline_ctw1500}}
test_pipeline_ctw1500 = {{_base_.test_pipeline_ctw1500}}
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(
type='ScaleAspectJitter',
img_scale=[(3000, 640)],
ratio_range=(0.7, 1.3),
aspect_ratio_range=(0.9, 1.1),
multiscale_mode='value',
keep_ratio=False),
# shrink_ratio is from big to small. The 1st must be 1.0
dict(type='PANetTargets', shrink_ratio=(1.0, 0.7)),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='RandomRotateTextDet'),
dict(
type='RandomCropInstances',
target_size=(640, 640),
instance_key='gt_kernels'),
dict(type='Pad', size_divisor=32),
# for visualizing img and gts, pls set visualize = True
dict(
type='CustomFormatBundle',
keys=['gt_kernels', 'gt_mask'],
visualize=dict(flag=False, boundary_key='gt_kernels')),
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=(3000, 640),
flip=False,
transforms=[
dict(type='Resize', img_scale=(3000, 640), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type=dataset_type,
ann_file=data_root + '/instances_training.json',
# for debugging top k imgs
# select_first_k=200,
img_prefix=data_root + '/imgs',
pipeline=train_pipeline),
type='UniformConcatDataset',
datasets=train_list,
pipeline=train_pipeline_ctw1500),
val=dict(
type=dataset_type,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
# select_first_k=100,
pipeline=test_pipeline),
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_ctw1500),
test=dict(
type=dataset_type,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
# select_first_k=100,
pipeline=test_pipeline))
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_ctw1500))
evaluation = dict(interval=10, metric='hmean-iou')

View File

@ -1,104 +1,35 @@
_base_ = [
'../../_base_/schedules/schedule_adam_600e.py',
'../../_base_/runtime_10e.py'
'../../_base_/runtime_10e.py',
'../../_base_/det_models/panet_r18_fpem_ffm.py',
'../../_base_/det_datasets/icdar2015.py',
'../../_base_/det_pipelines/panet_pipeline.py'
]
model = dict(
type='PANet',
backbone=dict(
type='mmdet.ResNet',
depth=18,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='SyncBN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
norm_eval=True,
style='caffe'),
neck=dict(type='FPEM_FFM', in_channels=[64, 128, 256, 512]),
bbox_head=dict(
type='PANHead',
text_repr_type='quad',
in_channels=[128, 128, 128, 128],
out_channels=6,
loss=dict(type='PANLoss')),
train_cfg=None,
test_cfg=None)
dataset_type = 'IcdarDataset'
data_root = 'data/icdar2015/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
# for visualizing img, pls uncomment it.
# img_norm_cfg = dict(
# mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
model = {{_base_.model_quad}}
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
train_pipeline_icdar2015 = {{_base_.train_pipeline_icdar2015}}
test_pipeline_icdar2015 = {{_base_.test_pipeline_icdar2015}}
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(
type='ScaleAspectJitter',
img_scale=[(3000, 736)],
ratio_range=(0.7, 1.3),
aspect_ratio_range=(0.9, 1.1),
multiscale_mode='value',
keep_ratio=False),
dict(type='PANetTargets', shrink_ratio=(1.0, 0.5), max_shrink=20),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='RandomRotateTextDet'),
dict(
type='RandomCropInstances',
target_size=(736, 736),
instance_key='gt_kernels'),
dict(type='Pad', size_divisor=32),
# for visualizing img and gts, pls set visualize = True
dict(
type='CustomFormatBundle',
keys=['gt_kernels', 'gt_mask'],
visualize=dict(flag=False, boundary_key='gt_kernels')),
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 736),
flip=False,
transforms=[
dict(type='Resize', img_scale=(1333, 736), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type=dataset_type,
ann_file=data_root + '/instances_training.json',
# for debugging top k imgs
# select_first_k=200,
img_prefix=data_root + '/imgs',
pipeline=train_pipeline),
type='UniformConcatDataset',
datasets=train_list,
pipeline=train_pipeline_icdar2015),
val=dict(
type=dataset_type,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
# select_first_k=100,
pipeline=test_pipeline),
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_icdar2015),
test=dict(
type=dataset_type,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
# select_first_k=100,
pipeline=test_pipeline))
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_icdar2015))
evaluation = dict(interval=10, metric='hmean-iou')

View File

@ -1,95 +1,33 @@
_base_ = [
'../../_base_/schedules/schedule_adam_600e.py',
'../../_base_/runtime_10e.py'
'../../_base_/runtime_10e.py',
'../../_base_/det_models/panet_r50_fpem_ffm.py',
'../../_base_/det_datasets/icdar2017.py',
'../../_base_/det_pipelines/panet_pipeline.py'
]
model = dict(
type='PANet',
pretrained='torchvision://resnet50',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='caffe'),
neck=dict(type='FPEM_FFM', in_channels=[256, 512, 1024, 2048]),
bbox_head=dict(
type='PANHead',
in_channels=[128, 128, 128, 128],
out_channels=6,
loss=dict(type='PANLoss', speedup_bbox_thr=32)),
train_cfg=None,
test_cfg=None)
dataset_type = 'IcdarDataset'
data_root = 'data/icdar2017/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(
type='ScaleAspectJitter',
img_scale=[(3000, 800)],
ratio_range=(0.7, 1.3),
aspect_ratio_range=(0.9, 1.1),
multiscale_mode='value',
keep_ratio=False),
dict(type='PANetTargets', shrink_ratio=(1.0, 0.5)),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='RandomRotateTextDet'),
dict(
type='RandomCropInstances',
target_size=(800, 800),
instance_key='gt_kernels'),
dict(type='Pad', size_divisor=32),
# for visualizing img and gts, pls set visualize = True
dict(
type='CustomFormatBundle',
keys=['gt_kernels', 'gt_mask'],
visualize=dict(flag=False, boundary_key='gt_kernels')),
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
train_pipeline_icdar2017 = {{_base_.train_pipeline_icdar2017}}
test_pipeline_icdar2017 = {{_base_.test_pipeline_icdar2017}}
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type=dataset_type,
ann_file=data_root + '/instances_training.json',
img_prefix=data_root + '/imgs',
pipeline=train_pipeline),
type='UniformConcatDataset',
datasets=train_list,
pipeline=train_pipeline_icdar2017),
val=dict(
type=dataset_type,
ann_file=data_root + '/instances_val.json',
img_prefix=data_root + '/imgs',
pipeline=test_pipeline),
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_icdar2017),
test=dict(
type=dataset_type,
ann_file=data_root + '/instances_val.json',
img_prefix=data_root + '/imgs',
pipeline=test_pipeline))
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_icdar2017))
evaluation = dict(interval=10, metric='hmean-iou')

View File

@ -1,110 +1,35 @@
_base_ = ['../../_base_/default_runtime.py']
# optimizer
optimizer = dict(type='Adam', lr=1e-4)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(policy='step', step=[200, 400])
total_epochs = 600
model = dict(
type='PSENet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='SyncBN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
norm_eval=True,
style='caffe'),
neck=dict(
type='FPNF',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
fusion_type='concat'),
bbox_head=dict(
type='PSEHead',
text_repr_type='poly',
in_channels=[256],
out_channels=7,
loss=dict(type='PSELoss')),
train_cfg=None,
test_cfg=None)
dataset_type = 'IcdarDataset'
data_root = 'data/ctw1500/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(
type='ScaleAspectJitter',
img_scale=[(3000, 736)],
ratio_range=(0.5, 3),
aspect_ratio_range=(1, 1),
multiscale_mode='value',
long_size_bound=1280,
short_size_bound=640,
resize_type='long_short_bound',
keep_ratio=False),
dict(type='PSENetTargets'),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='RandomRotateTextDet'),
dict(
type='RandomCropInstances',
target_size=(640, 640),
instance_key='gt_kernels'),
dict(type='Pad', size_divisor=32),
dict(
type='CustomFormatBundle',
keys=['gt_kernels', 'gt_mask'],
visualize=dict(flag=False, boundary_key='gt_kernels')),
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=(1280, 1280),
flip=False,
transforms=[
dict(type='Resize', img_scale=(1280, 1280), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
_base_ = [
'../../_base_/default_runtime.py',
'../../_base_/schedules/schedule_adam_step_600e.py',
'../../_base_/det_models/psenet_r50_fpnf.py',
'../../_base_/det_datasets/ctw1500.py',
'../../_base_/det_pipelines/psenet_pipeline.py'
]
model = {{_base_.model_poly}}
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
train_pipeline = {{_base_.train_pipeline}}
test_pipeline_ctw1500 = {{_base_.test_pipeline_ctw1500}}
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type=dataset_type,
ann_file=data_root + '/instances_training.json',
img_prefix=data_root + '/imgs',
type='UniformConcatDataset',
datasets=train_list,
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
pipeline=test_pipeline),
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_ctw1500),
test=dict(
type=dataset_type,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
pipeline=test_pipeline))
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_ctw1500))
evaluation = dict(interval=10, metric='hmean-iou')

View File

@ -1,110 +1,35 @@
_base_ = ['../../_base_/runtime_10e.py']
# optimizer
optimizer = dict(type='Adam', lr=1e-4)
optimizer_config = dict(grad_clip=None)
# learning policy
lr_config = dict(policy='step', step=[200, 400])
total_epochs = 600
model = dict(
type='PSENet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='SyncBN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
norm_eval=True,
style='caffe'),
neck=dict(
type='FPNF',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
fusion_type='concat'),
bbox_head=dict(
type='PSEHead',
text_repr_type='quad',
in_channels=[256],
out_channels=7,
loss=dict(type='PSELoss')),
train_cfg=None,
test_cfg=None)
dataset_type = 'IcdarDataset'
data_root = 'data/icdar2015/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(
type='ScaleAspectJitter',
img_scale=[(3000, 736)], # unused
ratio_range=(0.5, 3),
aspect_ratio_range=(1, 1),
multiscale_mode='value',
long_size_bound=1280,
short_size_bound=640,
resize_type='long_short_bound',
keep_ratio=False),
dict(type='PSENetTargets'),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='RandomRotateTextDet'),
dict(
type='RandomCropInstances',
target_size=(640, 640),
instance_key='gt_kernels'),
dict(type='Pad', size_divisor=32),
dict(
type='CustomFormatBundle',
keys=['gt_kernels', 'gt_mask'],
visualize=dict(flag=False, boundary_key='gt_kernels')),
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=(2240, 2200),
flip=False,
transforms=[
dict(type='Resize', img_scale=(2240, 2200), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
_base_ = [
'../../_base_/runtime_10e.py',
'../../_base_/schedules/schedule_adam_step_600e.py',
'../../_base_/det_models/psenet_r50_fpnf.py',
'../../_base_/det_datasets/icdar2015.py',
'../../_base_/det_pipelines/psenet_pipeline.py'
]
model = {{_base_.model_quad}}
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
train_pipeline = {{_base_.train_pipeline}}
test_pipeline_icdar2015 = {{_base_.test_pipeline_icdar2015}}
data = dict(
samples_per_gpu=8,
workers_per_gpu=2,
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type=dataset_type,
ann_file=data_root + '/instances_training.json',
img_prefix=data_root + '/imgs',
type='UniformConcatDataset',
datasets=train_list,
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
pipeline=test_pipeline),
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_icdar2015),
test=dict(
type=dataset_type,
ann_file=data_root + '/instances_test.json',
img_prefix=data_root + '/imgs',
pipeline=test_pipeline))
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_icdar2015))
evaluation = dict(interval=10, metric='hmean-iou')

View File

@ -1,85 +1,18 @@
_base_ = [
'../../_base_/schedules/schedule_sgd_600e.py',
'../../_base_/runtime_10e.py'
'../../_base_/runtime_10e.py',
'../../_base_/det_models/psenet_r50_fpnf.py',
'../../_base_/det_datasets/icdar2017.py',
'../../_base_/det_pipelines/psenet_pipeline.py'
]
model = dict(
type='PSENet',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
norm_eval=True,
style='caffe'),
neck=dict(
type='FPNF',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
fusion_type='concat'),
bbox_head=dict(
type='PSEHead',
text_repr_type='quad',
in_channels=[256],
out_channels=7,
loss=dict(type='PSELoss')),
train_cfg=None,
test_cfg=None)
dataset_type = 'IcdarDataset'
data_root = 'data/icdar2017/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
model = {{_base_.model_quad}}
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(
type='ScaleAspectJitter',
img_scale=[(3000, 736)],
ratio_range=(0.5, 3),
aspect_ratio_range=(1, 1),
multiscale_mode='value',
long_size_bound=1280,
short_size_bound=640,
resize_type='long_short_bound',
keep_ratio=False),
dict(type='PSENetTargets'),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='RandomRotateTextDet'),
dict(
type='RandomCropInstances',
target_size=(640, 640),
instance_key='gt_kernels'),
dict(type='Pad', size_divisor=32),
dict(
type='CustomFormatBundle',
keys=['gt_kernels', 'gt_mask'],
visualize=dict(flag=False, boundary_key='gt_kernels')),
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=(2240, 2200),
flip=False,
transforms=[
dict(type='Resize', img_scale=(2240, 2200), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
train_pipeline = {{_base_.train_pipeline}}
test_pipeline_icdar2015 = {{_base_.test_pipeline_icdar2015}}
data = dict(
samples_per_gpu=8,
@ -87,19 +20,16 @@ data = dict(
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type=dataset_type,
ann_file=data_root + '/instances_training.json',
img_prefix=data_root + '/imgs',
type='UniformConcatDataset',
datasets=train_list,
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=data_root + '/instances_val.json',
img_prefix=data_root + '/imgs',
pipeline=test_pipeline),
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_icdar2015),
test=dict(
type=dataset_type,
ann_file=data_root + '/instances_val.json',
img_prefix=data_root + '/imgs',
pipeline=test_pipeline))
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline_icdar2015))
evaluation = dict(interval=10, metric='hmean-iou')

View File

@ -1,95 +1,16 @@
_base_ = [
'../../_base_/schedules/schedule_1200e.py',
'../../_base_/default_runtime.py'
'../../_base_/schedules/schedule_sgd_1200e.py',
'../../_base_/default_runtime.py',
'../../_base_/det_models/textsnake_r50_fpn_unet.py',
'../../_base_/det_datasets/ctw1500.py',
'../../_base_/det_pipelines/textsnake_pipeline.py'
]
model = dict(
type='TextSnake',
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
norm_eval=True,
style='caffe'),
neck=dict(
type='FPN_UNet', in_channels=[256, 512, 1024, 2048], out_channels=32),
bbox_head=dict(
type='TextSnakeHead',
in_channels=32,
text_repr_type='poly',
loss=dict(type='TextSnakeLoss')),
train_cfg=None,
test_cfg=None)
dataset_type = 'IcdarDataset'
data_root = 'data/ctw1500/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_list = {{_base_.train_list}}
test_list = {{_base_.test_list}}
train_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='LoadTextAnnotations',
with_bbox=True,
with_mask=True,
poly2mask=False),
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(
type='RandomCropPolyInstances',
instance_key='gt_masks',
crop_ratio=0.65,
min_side_ratio=0.3),
dict(
type='RandomRotatePolyInstances',
rotate_ratio=0.5,
max_angle=20,
pad_with_fixed_color=False),
dict(
type='ScaleAspectJitter',
img_scale=[(3000, 736)], # unused
ratio_range=(0.7, 1.3),
aspect_ratio_range=(0.9, 1.1),
multiscale_mode='value',
long_size_bound=800,
short_size_bound=480,
resize_type='long_short_bound',
keep_ratio=False),
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
dict(type='TextSnakeTargets'),
dict(type='Pad', size_divisor=32),
dict(
type='CustomFormatBundle',
keys=[
'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
'gt_radius_map', 'gt_sin_map', 'gt_cos_map'
],
visualize=dict(flag=False, boundary_key='gt_text_mask')),
dict(
type='Collect',
keys=[
'img', 'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
'gt_radius_map', 'gt_sin_map', 'gt_cos_map'
])
]
test_pipeline = [
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 736),
flip=False,
transforms=[
dict(type='Resize', img_scale=(1333, 736), keep_ratio=True),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
train_pipeline = {{_base_.train_pipeline}}
test_pipeline = {{_base_.test_pipeline}}
data = dict(
samples_per_gpu=4,
@ -97,19 +18,16 @@ data = dict(
val_dataloader=dict(samples_per_gpu=1),
test_dataloader=dict(samples_per_gpu=1),
train=dict(
type=dataset_type,
ann_file=f'{data_root}/instances_training.json',
img_prefix=f'{data_root}/imgs',
type='UniformConcatDataset',
datasets=train_list,
pipeline=train_pipeline),
val=dict(
type=dataset_type,
ann_file=f'{data_root}/instances_test.json',
img_prefix=f'{data_root}/imgs',
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline),
test=dict(
type=dataset_type,
ann_file=f'{data_root}/instances_test.json',
img_prefix=f'{data_root}/imgs',
type='UniformConcatDataset',
datasets=test_list,
pipeline=test_pipeline))
evaluation = dict(interval=10, metric='hmean-iou')

View File

@ -7,11 +7,13 @@ from mmcv.image import imread
from mmocr.apis.inference import init_detector, model_inference
from mmocr.datasets import build_dataset # noqa: F401
from mmocr.models import build_detector # noqa: F401
from mmocr.utils import revert_sync_batchnorm
def build_model(config_file):
device = 'cpu'
model = init_detector(config_file, checkpoint=None, device=device)
model = revert_sync_batchnorm(model)
if model.cfg.data.test['type'] == 'ConcatDataset':
model.cfg.data.test.pipeline = model.cfg.data.test['datasets'][