mirror of https://github.com/open-mmlab/mmocr.git
[Refactor] Refactor text detection config (#626)
* refactor textdet configs * remove duplicate keys in _base_ * remove import from config * syncbn to bn on cpu * minimize changepull/521/head^2
parent
925b365dcf
commit
5a8859fe66
|
@ -1,97 +0,0 @@
|
|||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
train_cfg = None
|
||||
test_cfg = None
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=[(3000, 640)],
|
||||
ratio_range=(0.7, 1.3),
|
||||
aspect_ratio_range=(0.9, 1.1),
|
||||
multiscale_mode='value',
|
||||
keep_ratio=False),
|
||||
# shrink_ratio is from big to small. The 1st must be 1.0
|
||||
dict(type='PANetTargets', shrink_ratio=(1.0, 0.7)),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='RandomRotateTextDet'),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=(640, 640),
|
||||
instance_key='gt_kernels'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_kernels', 'gt_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_kernels')),
|
||||
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(3000, 640),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(3000, 640), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
|
||||
dataset_type = 'TextDetDataset'
|
||||
img_prefix = 'tests/data/toy_dataset/imgs'
|
||||
train_anno_file = 'tests/data/toy_dataset/instances_test.txt'
|
||||
train1 = dict(
|
||||
type=dataset_type,
|
||||
img_prefix=img_prefix,
|
||||
ann_file=train_anno_file,
|
||||
loader=dict(
|
||||
type='HardDiskLoader',
|
||||
repeat=4,
|
||||
parser=dict(
|
||||
type='LineJsonParser',
|
||||
keys=['file_name', 'height', 'width', 'annotations'])),
|
||||
pipeline=train_pipeline,
|
||||
test_mode=False)
|
||||
|
||||
data_root = 'tests/data/toy_dataset'
|
||||
train2 = dict(
|
||||
type='IcdarDataset',
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=train_pipeline)
|
||||
|
||||
test_anno_file = 'tests/data/toy_dataset/instances_test.txt'
|
||||
test = dict(
|
||||
type=dataset_type,
|
||||
img_prefix=img_prefix,
|
||||
ann_file=test_anno_file,
|
||||
loader=dict(
|
||||
type='HardDiskLoader',
|
||||
repeat=1,
|
||||
parser=dict(
|
||||
type='LineJsonParser',
|
||||
keys=['file_name', 'height', 'width', 'annotations'])),
|
||||
pipeline=test_pipeline,
|
||||
test_mode=True)
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=2,
|
||||
workers_per_gpu=2,
|
||||
train=dict(type='ConcatDataset', datasets=[train1, train2]),
|
||||
val=dict(type='ConcatDataset', datasets=[test]),
|
||||
test=dict(type='ConcatDataset', datasets=[test]))
|
||||
|
||||
evaluation = dict(interval=1, metric='hmean-iou')
|
|
@ -0,0 +1,18 @@
|
|||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/ctw1500'
|
||||
|
||||
train = dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/instances_training.json',
|
||||
img_prefix=f'{data_root}/imgs',
|
||||
pipeline=None)
|
||||
|
||||
test = dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/instances_test.json',
|
||||
img_prefix=f'{data_root}/imgs',
|
||||
pipeline=None)
|
||||
|
||||
train_list = [train]
|
||||
|
||||
test_list = [test]
|
|
@ -0,0 +1,18 @@
|
|||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/icdar2015'
|
||||
|
||||
train = dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/instances_training.json',
|
||||
img_prefix=f'{data_root}/imgs',
|
||||
pipeline=None)
|
||||
|
||||
test = dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/instances_test.json',
|
||||
img_prefix=f'{data_root}/imgs',
|
||||
pipeline=None)
|
||||
|
||||
train_list = [train]
|
||||
|
||||
test_list = [test]
|
|
@ -0,0 +1,18 @@
|
|||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/icdar2017'
|
||||
|
||||
train = dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/instances_training.json',
|
||||
img_prefix=f'{data_root}/imgs',
|
||||
pipeline=None)
|
||||
|
||||
test = dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/instances_val.json',
|
||||
img_prefix=f'{data_root}/imgs',
|
||||
pipeline=None)
|
||||
|
||||
train_list = [train]
|
||||
|
||||
test_list = [test]
|
|
@ -0,0 +1,39 @@
|
|||
root = 'tests/data/toy_dataset'
|
||||
|
||||
# dataset with type='TextDetDataset'
|
||||
train1 = dict(
|
||||
type='TextDetDataset',
|
||||
img_prefix=f'{root}/imgs',
|
||||
ann_file=f'{root}/instances_test.txt',
|
||||
loader=dict(
|
||||
type='HardDiskLoader',
|
||||
repeat=4,
|
||||
parser=dict(
|
||||
type='LineJsonParser',
|
||||
keys=['file_name', 'height', 'width', 'annotations'])),
|
||||
pipeline=None,
|
||||
test_mode=False)
|
||||
|
||||
# dataset with type='IcdarDataset'
|
||||
train2 = dict(
|
||||
type='IcdarDataset',
|
||||
ann_file=f'{root}/instances_test.json',
|
||||
img_prefix=f'{root}/imgs',
|
||||
pipeline=None)
|
||||
|
||||
test = dict(
|
||||
type='TextDetDataset',
|
||||
img_prefix=f'{root}/imgs',
|
||||
ann_file=f'{root}/instances_test.txt',
|
||||
loader=dict(
|
||||
type='HardDiskLoader',
|
||||
repeat=1,
|
||||
parser=dict(
|
||||
type='LineJsonParser',
|
||||
keys=['file_name', 'height', 'width', 'annotations'])),
|
||||
pipeline=None,
|
||||
test_mode=True)
|
||||
|
||||
train_list = [train1, train2]
|
||||
|
||||
test_list = [test]
|
|
@ -0,0 +1,21 @@
|
|||
model = dict(
|
||||
type='DBNet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=18,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
|
||||
norm_eval=False,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPNC', in_channels=[64, 128, 256, 512], lateral_channels=256),
|
||||
bbox_head=dict(
|
||||
type='DBHead',
|
||||
text_repr_type='quad',
|
||||
in_channels=256,
|
||||
loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True)),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
|
@ -0,0 +1,23 @@
|
|||
model = dict(
|
||||
type='DBNet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
stage_with_dcn=(False, True, True, True)),
|
||||
neck=dict(
|
||||
type='FPNC', in_channels=[256, 512, 1024, 2048], lateral_channels=256),
|
||||
bbox_head=dict(
|
||||
type='DBHead',
|
||||
text_repr_type='quad',
|
||||
in_channels=256,
|
||||
loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True)),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
|
@ -0,0 +1,21 @@
|
|||
model = dict(
|
||||
type='DRRG',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPN_UNet', in_channels=[256, 512, 1024, 2048], out_channels=32),
|
||||
bbox_head=dict(
|
||||
type='DRRGHead',
|
||||
in_channels=32,
|
||||
text_region_thr=0.3,
|
||||
center_region_thr=0.4,
|
||||
link_thr=0.80,
|
||||
loss=dict(type='DRRGLoss')))
|
|
@ -0,0 +1,30 @@
|
|||
model = dict(
|
||||
type='FCENet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=False,
|
||||
style='pytorch'),
|
||||
neck=dict(
|
||||
type='mmdet.FPN',
|
||||
in_channels=[512, 1024, 2048],
|
||||
out_channels=256,
|
||||
add_extra_convs='on_output',
|
||||
num_outs=3,
|
||||
relu_before_extra_convs=True,
|
||||
act_cfg=None),
|
||||
bbox_head=dict(
|
||||
type='FCEHead',
|
||||
in_channels=256,
|
||||
scales=(8, 16, 32),
|
||||
loss=dict(type='FCELoss'),
|
||||
alpha=1.2,
|
||||
beta=1.0,
|
||||
text_repr_type='quad',
|
||||
fourier_degree=5,
|
||||
))
|
|
@ -0,0 +1,29 @@
|
|||
model = dict(
|
||||
type='FCENet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=True,
|
||||
style='pytorch',
|
||||
dcn=dict(type='DCNv2', deform_groups=2, fallback_on_stride=False),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
stage_with_dcn=(False, True, True, True)),
|
||||
neck=dict(
|
||||
type='mmdet.FPN',
|
||||
in_channels=[512, 1024, 2048],
|
||||
out_channels=256,
|
||||
add_extra_convs='on_output',
|
||||
num_outs=3,
|
||||
relu_before_extra_convs=True,
|
||||
act_cfg=None),
|
||||
bbox_head=dict(
|
||||
type='FCEHead',
|
||||
in_channels=256,
|
||||
scales=(8, 16, 32),
|
||||
loss=dict(type='FCELoss'),
|
||||
fourier_degree=5,
|
||||
))
|
|
@ -0,0 +1,43 @@
|
|||
model_poly = dict(
|
||||
type='PANet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=18,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(type='FPEM_FFM', in_channels=[64, 128, 256, 512]),
|
||||
bbox_head=dict(
|
||||
type='PANHead',
|
||||
text_repr_type='poly',
|
||||
in_channels=[128, 128, 128, 128],
|
||||
out_channels=6,
|
||||
loss=dict(type='PANLoss')),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
||||
|
||||
model_quad = dict(
|
||||
type='PANet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=18,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(type='FPEM_FFM', in_channels=[64, 128, 256, 512]),
|
||||
bbox_head=dict(
|
||||
type='PANHead',
|
||||
text_repr_type='quad',
|
||||
in_channels=[128, 128, 128, 128],
|
||||
out_channels=6,
|
||||
loss=dict(type='PANLoss')),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
|
@ -0,0 +1,20 @@
|
|||
model = dict(
|
||||
type='PANet',
|
||||
pretrained='torchvision://resnet50',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(type='FPEM_FFM', in_channels=[256, 512, 1024, 2048]),
|
||||
bbox_head=dict(
|
||||
type='PANHead',
|
||||
in_channels=[128, 128, 128, 128],
|
||||
out_channels=6,
|
||||
loss=dict(type='PANLoss', speedup_bbox_thr=32)),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
|
@ -0,0 +1,51 @@
|
|||
model_poly = dict(
|
||||
type='PSENet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPNF',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
fusion_type='concat'),
|
||||
bbox_head=dict(
|
||||
type='PSEHead',
|
||||
text_repr_type='poly',
|
||||
in_channels=[256],
|
||||
out_channels=7,
|
||||
loss=dict(type='PSELoss')),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
||||
|
||||
model_quad = dict(
|
||||
type='PSENet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPNF',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
fusion_type='concat'),
|
||||
bbox_head=dict(
|
||||
type='PSEHead',
|
||||
text_repr_type='quad',
|
||||
in_channels=[256],
|
||||
out_channels=7,
|
||||
loss=dict(type='PSELoss')),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
|
@ -0,0 +1,21 @@
|
|||
model = dict(
|
||||
type='TextSnake',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPN_UNet', in_channels=[256, 512, 1024, 2048], out_channels=32),
|
||||
bbox_head=dict(
|
||||
type='TextSnakeHead',
|
||||
in_channels=32,
|
||||
text_repr_type='poly',
|
||||
loss=dict(type='TextSnakeLoss')),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
|
@ -0,0 +1,88 @@
|
|||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
train_pipeline_r18 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='ImgAug',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
|
||||
dict(type='EastRandomCrop', target_size=(640, 640)),
|
||||
dict(type='DBNetTargets', shrink_ratio=0.4),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_shrink')),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
|
||||
]
|
||||
|
||||
test_pipeline_1333_736 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(1333, 736),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(2944, 736), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
|
||||
# for dbnet_r50dcnv2_fpnc
|
||||
img_norm_cfg_r50dcnv2 = dict(
|
||||
mean=[122.67891434, 116.66876762, 104.00698793],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
to_rgb=True)
|
||||
|
||||
train_pipeline_r50dcnv2 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg_r50dcnv2),
|
||||
dict(
|
||||
type='ImgAug',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
|
||||
dict(type='EastRandomCrop', target_size=(640, 640)),
|
||||
dict(type='DBNetTargets', shrink_ratio=0.4),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_shrink')),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
|
||||
]
|
||||
|
||||
test_pipeline_4068_1024 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(4068, 1024),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(2944, 736), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg_r50dcnv2),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
|
@ -0,0 +1,60 @@
|
|||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='RandomScaling', size=800, scale=(0.75, 2.5)),
|
||||
dict(
|
||||
type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2),
|
||||
dict(
|
||||
type='RandomCropPolyInstances',
|
||||
instance_key='gt_masks',
|
||||
crop_ratio=0.8,
|
||||
min_side_ratio=0.3),
|
||||
dict(
|
||||
type='RandomRotatePolyInstances',
|
||||
rotate_ratio=0.5,
|
||||
max_angle=60,
|
||||
pad_with_fixed_color=False),
|
||||
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='DRRGTargets'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=[
|
||||
'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
|
||||
'gt_top_height_map', 'gt_bot_height_map', 'gt_sin_map',
|
||||
'gt_cos_map', 'gt_comp_attribs'
|
||||
],
|
||||
visualize=dict(flag=False, boundary_key='gt_text_mask')),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=[
|
||||
'img', 'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
|
||||
'gt_top_height_map', 'gt_bot_height_map', 'gt_sin_map',
|
||||
'gt_cos_map', 'gt_comp_attribs'
|
||||
])
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(1024, 640),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(1024, 640), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
|
@ -0,0 +1,118 @@
|
|||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
# for icdar2015
|
||||
leval_prop_range_icdar2015 = ((0, 0.4), (0.3, 0.7), (0.6, 1.0))
|
||||
train_pipeline_icdar2015 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(
|
||||
type='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5,
|
||||
contrast=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='RandomScaling', size=800, scale=(3. / 4, 5. / 2)),
|
||||
dict(
|
||||
type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2),
|
||||
dict(
|
||||
type='RandomCropPolyInstances',
|
||||
instance_key='gt_masks',
|
||||
crop_ratio=0.8,
|
||||
min_side_ratio=0.3),
|
||||
dict(
|
||||
type='RandomRotatePolyInstances',
|
||||
rotate_ratio=0.5,
|
||||
max_angle=30,
|
||||
pad_with_fixed_color=False),
|
||||
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='FCENetTargets',
|
||||
fourier_degree=5,
|
||||
level_proportion_range=leval_prop_range_icdar2015),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['p3_maps', 'p4_maps', 'p5_maps'],
|
||||
visualize=dict(flag=False, boundary_key=None)),
|
||||
dict(type='Collect', keys=['img', 'p3_maps', 'p4_maps', 'p5_maps'])
|
||||
]
|
||||
|
||||
img_scale_icdar2015 = (2260, 2260)
|
||||
test_pipeline_icdar2015 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale_icdar2015,
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(1280, 800), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
|
||||
# for ctw1500
|
||||
leval_prop_range_ctw1500 = ((0, 0.25), (0.2, 0.65), (0.55, 1.0))
|
||||
train_pipeline_ctw1500 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(
|
||||
type='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5,
|
||||
contrast=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='RandomScaling', size=800, scale=(3. / 4, 5. / 2)),
|
||||
dict(
|
||||
type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2),
|
||||
dict(
|
||||
type='RandomCropPolyInstances',
|
||||
instance_key='gt_masks',
|
||||
crop_ratio=0.8,
|
||||
min_side_ratio=0.3),
|
||||
dict(
|
||||
type='RandomRotatePolyInstances',
|
||||
rotate_ratio=0.5,
|
||||
max_angle=30,
|
||||
pad_with_fixed_color=False),
|
||||
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='FCENetTargets',
|
||||
fourier_degree=5,
|
||||
level_proportion_range=leval_prop_range_ctw1500),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['p3_maps', 'p4_maps', 'p5_maps'],
|
||||
visualize=dict(flag=False, boundary_key=None)),
|
||||
dict(type='Collect', keys=['img', 'p3_maps', 'p4_maps', 'p5_maps'])
|
||||
]
|
||||
|
||||
img_scale_ctw1500 = (1080, 736)
|
||||
test_pipeline_ctw1500 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale_ctw1500,
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(1280, 800), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
|
@ -0,0 +1,57 @@
|
|||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=None,
|
||||
keep_ratio=False,
|
||||
resize_type='indep_sample_in_range',
|
||||
scale_range=(640, 2560)),
|
||||
dict(type='RandomFlip', flip_ratio=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=(640, 640),
|
||||
mask_type='union_all',
|
||||
instance_key='gt_masks'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
|
||||
]
|
||||
|
||||
# for ctw1500
|
||||
img_scale_ctw1500 = (1600, 1600)
|
||||
test_pipeline_ctw1500 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale_ctw1500,
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
|
||||
# for icdar2015
|
||||
img_scale_icdar2015 = (1920, 1920)
|
||||
test_pipeline_icdar2015 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale_icdar2015,
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
|
@ -0,0 +1,156 @@
|
|||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
# for ctw1500
|
||||
img_scale_train_ctw1500 = [(3000, 640)]
|
||||
shrink_ratio_train_ctw1500 = (1.0, 0.7)
|
||||
target_size_train_ctw1500 = (640, 640)
|
||||
train_pipeline_ctw1500 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=img_scale_train_ctw1500,
|
||||
ratio_range=(0.7, 1.3),
|
||||
aspect_ratio_range=(0.9, 1.1),
|
||||
multiscale_mode='value',
|
||||
keep_ratio=False),
|
||||
# shrink_ratio is from big to small. The 1st must be 1.0
|
||||
dict(type='PANetTargets', shrink_ratio=shrink_ratio_train_ctw1500),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='RandomRotateTextDet'),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=target_size_train_ctw1500,
|
||||
instance_key='gt_kernels'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_kernels', 'gt_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_kernels')),
|
||||
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
|
||||
]
|
||||
|
||||
img_scale_test_ctw1500 = (3000, 640)
|
||||
test_pipeline_ctw1500 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale_test_ctw1500,
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(3000, 640), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
|
||||
# for icdar2015
|
||||
img_scale_train_icdar2015 = [(3000, 736)]
|
||||
shrink_ratio_train_icdar2015 = (1.0, 0.5)
|
||||
target_size_train_icdar2015 = (736, 736)
|
||||
train_pipeline_icdar2015 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=img_scale_train_icdar2015,
|
||||
ratio_range=(0.7, 1.3),
|
||||
aspect_ratio_range=(0.9, 1.1),
|
||||
multiscale_mode='value',
|
||||
keep_ratio=False),
|
||||
dict(type='PANetTargets', shrink_ratio=shrink_ratio_train_icdar2015),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='RandomRotateTextDet'),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=target_size_train_icdar2015,
|
||||
instance_key='gt_kernels'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_kernels', 'gt_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_kernels')),
|
||||
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
|
||||
]
|
||||
|
||||
img_scale_test_icdar2015 = (1333, 736)
|
||||
test_pipeline_icdar2015 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale_test_icdar2015,
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(3000, 640), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
|
||||
# for icdar2017
|
||||
img_scale_train_icdar2017 = [(3000, 800)]
|
||||
shrink_ratio_train_icdar2017 = (1.0, 0.5)
|
||||
target_size_train_icdar2017 = (800, 800)
|
||||
train_pipeline_icdar2017 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=img_scale_train_icdar2017,
|
||||
ratio_range=(0.7, 1.3),
|
||||
aspect_ratio_range=(0.9, 1.1),
|
||||
multiscale_mode='value',
|
||||
keep_ratio=False),
|
||||
dict(type='PANetTargets', shrink_ratio=shrink_ratio_train_icdar2017),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='RandomRotateTextDet'),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=target_size_train_icdar2017,
|
||||
instance_key='gt_kernels'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_kernels', 'gt_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_kernels')),
|
||||
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
|
||||
]
|
||||
|
||||
img_scale_test_icdar2017 = (1333, 800)
|
||||
test_pipeline_icdar2017 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale_test_icdar2017,
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(3000, 640), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
|
@ -0,0 +1,70 @@
|
|||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=[(3000, 736)],
|
||||
ratio_range=(0.5, 3),
|
||||
aspect_ratio_range=(1, 1),
|
||||
multiscale_mode='value',
|
||||
long_size_bound=1280,
|
||||
short_size_bound=640,
|
||||
resize_type='long_short_bound',
|
||||
keep_ratio=False),
|
||||
dict(type='PSENetTargets'),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='RandomRotateTextDet'),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=(640, 640),
|
||||
instance_key='gt_kernels'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_kernels', 'gt_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_kernels')),
|
||||
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
|
||||
]
|
||||
|
||||
# for ctw1500
|
||||
img_scale_test_ctw1500 = (1280, 1280)
|
||||
test_pipeline_ctw1500 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale_test_ctw1500,
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(1280, 1280), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
|
||||
# for icdar2015
|
||||
img_scale_test_icdar2015 = (2240, 2240)
|
||||
test_pipeline_icdar2015 = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=img_scale_test_icdar2015,
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(1280, 1280), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
|
@ -0,0 +1,65 @@
|
|||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='RandomCropPolyInstances',
|
||||
instance_key='gt_masks',
|
||||
crop_ratio=0.65,
|
||||
min_side_ratio=0.3),
|
||||
dict(
|
||||
type='RandomRotatePolyInstances',
|
||||
rotate_ratio=0.5,
|
||||
max_angle=20,
|
||||
pad_with_fixed_color=False),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=[(3000, 736)], # unused
|
||||
ratio_range=(0.7, 1.3),
|
||||
aspect_ratio_range=(0.9, 1.1),
|
||||
multiscale_mode='value',
|
||||
long_size_bound=800,
|
||||
short_size_bound=480,
|
||||
resize_type='long_short_bound',
|
||||
keep_ratio=False),
|
||||
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='TextSnakeTargets'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=[
|
||||
'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
|
||||
'gt_radius_map', 'gt_sin_map', 'gt_cos_map'
|
||||
],
|
||||
visualize=dict(flag=False, boundary_key='gt_text_mask')),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=[
|
||||
'img', 'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
|
||||
'gt_radius_map', 'gt_sin_map', 'gt_cos_map'
|
||||
])
|
||||
]
|
||||
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(1333, 736),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(1333, 736), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
|
@ -0,0 +1,6 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='Adam', lr=1e-4)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[200, 400])
|
||||
total_epochs = 600
|
|
@ -0,0 +1,5 @@
|
|||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=1e-3, momentum=0.90, weight_decay=5e-4)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
lr_config = dict(policy='poly', power=0.9, min_lr=1e-7, by_epoch=True)
|
||||
total_epochs = 1500
|
|
@ -1,98 +1,33 @@
|
|||
_base_ = [
|
||||
'../../_base_/schedules/schedule_1200e.py', '../../_base_/runtime_10e.py'
|
||||
'../../_base_/runtime_10e.py',
|
||||
'../../_base_/schedules/schedule_sgd_1200e.py',
|
||||
'../../_base_/det_models/dbnet_r18_fpnc.py',
|
||||
'../../_base_/det_datasets/icdar2015.py',
|
||||
'../../_base_/det_pipelines/dbnet_pipeline.py'
|
||||
]
|
||||
model = dict(
|
||||
type='DBNet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=18,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
|
||||
norm_eval=False,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPNC', in_channels=[64, 128, 256, 512], lateral_channels=256),
|
||||
bbox_head=dict(
|
||||
type='DBHead',
|
||||
text_repr_type='quad',
|
||||
in_channels=256,
|
||||
loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True)),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
||||
|
||||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/icdar2015'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
# for visualizing img, pls uncomment it.
|
||||
# img_norm_cfg = dict(mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
|
||||
train_pipeline_r18 = {{_base_.train_pipeline_r18}}
|
||||
test_pipeline_1333_736 = {{_base_.test_pipeline_1333_736}}
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
# img aug
|
||||
dict(
|
||||
type='ImgAug',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
|
||||
# random crop
|
||||
dict(type='EastRandomCrop', target_size=(640, 640)),
|
||||
dict(type='DBNetTargets', shrink_ratio=0.4),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
# for visualizing img and gts, pls set visualize = True
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_shrink')),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(1333, 736),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(2944, 736), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
samples_per_gpu=16,
|
||||
workers_per_gpu=8,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_training.json',
|
||||
# for debugging top k imgs
|
||||
# select_first_k=200,
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=train_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline_r18),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
# select_first_k=100,
|
||||
pipeline=test_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_1333_736),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
# select_first_k=100,
|
||||
pipeline=test_pipeline))
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_1333_736))
|
||||
|
||||
evaluation = dict(interval=100, metric='hmean-iou')
|
||||
|
|
|
@ -1,104 +1,35 @@
|
|||
_base_ = [
|
||||
'../../_base_/schedules/schedule_1200e.py', '../../_base_/runtime_10e.py'
|
||||
'../../_base_/runtime_10e.py',
|
||||
'../../_base_/schedules/schedule_sgd_1200e.py',
|
||||
'../../_base_/det_models/dbnet_r50dcnv2_fpnc.py',
|
||||
'../../_base_/det_datasets/icdar2015.py',
|
||||
'../../_base_/det_pipelines/dbnet_pipeline.py'
|
||||
]
|
||||
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
|
||||
train_pipeline_r50dcnv2 = {{_base_.train_pipeline_r50dcnv2}}
|
||||
test_pipeline_4068_1024 = {{_base_.test_pipeline_4068_1024}}
|
||||
|
||||
load_from = 'checkpoints/textdet/dbnet/res50dcnv2_synthtext.pth'
|
||||
|
||||
model = dict(
|
||||
type='DBNet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=False,
|
||||
style='pytorch',
|
||||
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
stage_with_dcn=(False, True, True, True)),
|
||||
neck=dict(
|
||||
type='FPNC', in_channels=[256, 512, 1024, 2048], lateral_channels=256),
|
||||
bbox_head=dict(
|
||||
type='DBHead',
|
||||
text_repr_type='quad',
|
||||
in_channels=256,
|
||||
loss=dict(type='DBLoss', alpha=5.0, beta=10.0, bbce_loss=True)),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
||||
|
||||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/icdar2015/'
|
||||
img_norm_cfg = dict(
|
||||
mean=[122.67891434, 116.66876762, 104.00698793],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
to_rgb=True)
|
||||
# for visualizing img, pls uncomment it.
|
||||
# img_norm_cfg = dict(mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
# img aug
|
||||
dict(
|
||||
type='ImgAug',
|
||||
args=[['Fliplr', 0.5],
|
||||
dict(cls='Affine', rotate=[-10, 10]), ['Resize', [0.5, 3.0]]]),
|
||||
# random crop
|
||||
dict(type='EastRandomCrop', target_size=(640, 640)),
|
||||
dict(type='DBNetTargets', shrink_ratio=0.4),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
# for visualizing img and gts, pls set visualize = True
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_shrink')),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img', 'gt_shrink', 'gt_shrink_mask', 'gt_thr', 'gt_thr_mask'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(4068, 1024),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(4068, 1024), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
samples_per_gpu=8,
|
||||
workers_per_gpu=4,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_training.json',
|
||||
# for debugging top k imgs
|
||||
# select_first_k=200,
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=train_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline_r50dcnv2),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
# select_first_k=100,
|
||||
pipeline=test_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_4068_1024),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
# select_first_k=100,
|
||||
pipeline=test_pipeline))
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_4068_1024))
|
||||
|
||||
evaluation = dict(interval=100, metric='hmean-iou')
|
||||
|
|
|
@ -1,112 +1,33 @@
|
|||
_base_ = [
|
||||
'../../_base_/schedules/schedule_1200e.py',
|
||||
'../../_base_/default_runtime.py'
|
||||
'../../_base_/schedules/schedule_sgd_1200e.py',
|
||||
'../../_base_/default_runtime.py',
|
||||
'../../_base_/det_models/drrg_r50_fpn_unet.py',
|
||||
'../../_base_/det_datasets/ctw1500.py',
|
||||
'../../_base_/det_pipelines/drrg_pipeline.py'
|
||||
]
|
||||
model = dict(
|
||||
type='DRRG',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPN_UNet', in_channels=[256, 512, 1024, 2048], out_channels=32),
|
||||
bbox_head=dict(
|
||||
type='DRRGHead',
|
||||
in_channels=32,
|
||||
text_region_thr=0.3,
|
||||
center_region_thr=0.4,
|
||||
link_thr=0.80,
|
||||
loss=dict(type='DRRGLoss')))
|
||||
train_cfg = None
|
||||
test_cfg = None
|
||||
|
||||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/ctw1500/'
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
train_pipeline = {{_base_.train_pipeline}}
|
||||
test_pipeline = {{_base_.test_pipeline}}
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='RandomScaling', size=800, scale=(0.75, 2.5)),
|
||||
dict(
|
||||
type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2),
|
||||
dict(
|
||||
type='RandomCropPolyInstances',
|
||||
instance_key='gt_masks',
|
||||
crop_ratio=0.8,
|
||||
min_side_ratio=0.3),
|
||||
dict(
|
||||
type='RandomRotatePolyInstances',
|
||||
rotate_ratio=0.5,
|
||||
max_angle=60,
|
||||
pad_with_fixed_color=False),
|
||||
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='DRRGTargets'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=[
|
||||
'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
|
||||
'gt_top_height_map', 'gt_bot_height_map', 'gt_sin_map',
|
||||
'gt_cos_map', 'gt_comp_attribs'
|
||||
],
|
||||
visualize=dict(flag=False, boundary_key='gt_text_mask')),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=[
|
||||
'img', 'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
|
||||
'gt_top_height_map', 'gt_bot_height_map', 'gt_sin_map',
|
||||
'gt_cos_map', 'gt_comp_attribs'
|
||||
])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(1024, 640),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(1024, 640), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
samples_per_gpu=4,
|
||||
workers_per_gpu=4,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/instances_training.json',
|
||||
img_prefix=f'{data_root}/imgs',
|
||||
type='UniformConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/instances_test.json',
|
||||
img_prefix=f'{data_root}/imgs',
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/instances_test.json',
|
||||
img_prefix=f'{data_root}/imgs',
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline))
|
||||
|
||||
evaluation = dict(interval=20, metric='hmean-iou')
|
||||
|
|
|
@ -1,136 +1,33 @@
|
|||
fourier_degree = 5
|
||||
model = dict(
|
||||
type='FCENet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=False,
|
||||
style='pytorch'),
|
||||
neck=dict(
|
||||
type='mmdet.FPN',
|
||||
in_channels=[512, 1024, 2048],
|
||||
out_channels=256,
|
||||
add_extra_convs='on_output',
|
||||
num_outs=3,
|
||||
relu_before_extra_convs=True,
|
||||
act_cfg=None),
|
||||
bbox_head=dict(
|
||||
type='FCEHead',
|
||||
in_channels=256,
|
||||
scales=(8, 16, 32),
|
||||
loss=dict(type='FCELoss'),
|
||||
alpha=1.2,
|
||||
beta=1.0,
|
||||
text_repr_type='quad',
|
||||
fourier_degree=fourier_degree,
|
||||
))
|
||||
|
||||
train_cfg = None
|
||||
test_cfg = None
|
||||
|
||||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/icdar2015/'
|
||||
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(
|
||||
type='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5,
|
||||
contrast=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='RandomScaling', size=800, scale=(3. / 4, 5. / 2)),
|
||||
dict(
|
||||
type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2),
|
||||
dict(
|
||||
type='RandomCropPolyInstances',
|
||||
instance_key='gt_masks',
|
||||
crop_ratio=0.8,
|
||||
min_side_ratio=0.3),
|
||||
dict(
|
||||
type='RandomRotatePolyInstances',
|
||||
rotate_ratio=0.5,
|
||||
max_angle=30,
|
||||
pad_with_fixed_color=False),
|
||||
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='FCENetTargets',
|
||||
fourier_degree=fourier_degree,
|
||||
level_proportion_range=((0, 0.4), (0.3, 0.7), (0.6, 1.0))),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['p3_maps', 'p4_maps', 'p5_maps'],
|
||||
visualize=dict(flag=False, boundary_key=None)),
|
||||
dict(type='Collect', keys=['img', 'p3_maps', 'p4_maps', 'p5_maps'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(2260, 2260),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(1280, 800), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
_base_ = [
|
||||
'../../_base_/runtime_10e.py',
|
||||
'../../_base_/schedules/schedule_sgd_1500e.py',
|
||||
'../../_base_/det_models/fcenet_r50_fpn.py',
|
||||
'../../_base_/det_datasets/icdar2015.py',
|
||||
'../../_base_/det_pipelines/fcenet_pipeline.py'
|
||||
]
|
||||
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
|
||||
train_pipeline_icdar2015 = {{_base_.train_pipeline_icdar2015}}
|
||||
test_pipeline_icdar2015 = {{_base_.test_pipeline_icdar2015}}
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=8,
|
||||
workers_per_gpu=2,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_training.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=train_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline_icdar2015),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=test_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_icdar2015),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=test_pipeline))
|
||||
evaluation = dict(interval=5, metric='hmean-iou')
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_icdar2015))
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=1e-3, momentum=0.90, weight_decay=5e-4)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
lr_config = dict(policy='poly', power=0.9, min_lr=1e-7, by_epoch=True)
|
||||
total_epochs = 1500
|
||||
|
||||
checkpoint_config = dict(interval=5)
|
||||
# yapf:disable
|
||||
log_config = dict(
|
||||
interval=20,
|
||||
hooks=[
|
||||
dict(type='TextLoggerHook')
|
||||
|
||||
])
|
||||
# yapf:enable
|
||||
dist_params = dict(backend='nccl')
|
||||
log_level = 'INFO'
|
||||
load_from = None
|
||||
resume_from = None
|
||||
workflow = [('train', 1)]
|
||||
evaluation = dict(interval=10, metric='hmean-iou')
|
||||
|
|
|
@ -1,135 +1,33 @@
|
|||
fourier_degree = 5
|
||||
model = dict(
|
||||
type='FCENet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=True,
|
||||
style='pytorch',
|
||||
dcn=dict(type='DCNv2', deform_groups=2, fallback_on_stride=False),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
stage_with_dcn=(False, True, True, True)),
|
||||
neck=dict(
|
||||
type='mmdet.FPN',
|
||||
in_channels=[512, 1024, 2048],
|
||||
out_channels=256,
|
||||
add_extra_convs='on_output',
|
||||
num_outs=3,
|
||||
relu_before_extra_convs=True,
|
||||
act_cfg=None),
|
||||
bbox_head=dict(
|
||||
type='FCEHead',
|
||||
in_channels=256,
|
||||
scales=(8, 16, 32),
|
||||
loss=dict(type='FCELoss'),
|
||||
fourier_degree=fourier_degree,
|
||||
))
|
||||
|
||||
train_cfg = None
|
||||
test_cfg = None
|
||||
|
||||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/ctw1500/'
|
||||
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(
|
||||
type='ColorJitter',
|
||||
brightness=32.0 / 255,
|
||||
saturation=0.5,
|
||||
contrast=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='RandomScaling', size=800, scale=(3. / 4, 5. / 2)),
|
||||
dict(
|
||||
type='RandomCropFlip', crop_ratio=0.5, iter_num=1, min_area_ratio=0.2),
|
||||
dict(
|
||||
type='RandomCropPolyInstances',
|
||||
instance_key='gt_masks',
|
||||
crop_ratio=0.8,
|
||||
min_side_ratio=0.3),
|
||||
dict(
|
||||
type='RandomRotatePolyInstances',
|
||||
rotate_ratio=0.5,
|
||||
max_angle=30,
|
||||
pad_with_fixed_color=False),
|
||||
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='FCENetTargets',
|
||||
fourier_degree=fourier_degree,
|
||||
level_proportion_range=((0, 0.25), (0.2, 0.65), (0.55, 1.0))),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['p3_maps', 'p4_maps', 'p5_maps'],
|
||||
visualize=dict(flag=False, boundary_key=None)),
|
||||
dict(type='Collect', keys=['img', 'p3_maps', 'p4_maps', 'p5_maps'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(1080, 736),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(1280, 800), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
_base_ = [
|
||||
'../../_base_/runtime_10e.py',
|
||||
'../../_base_/schedules/schedule_sgd_1500e.py',
|
||||
'../../_base_/det_models/fcenet_r50dcnv2_fpn.py',
|
||||
'../../_base_/det_datasets/ctw1500.py',
|
||||
'../../_base_/det_pipelines/fcenet_pipeline.py'
|
||||
]
|
||||
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
|
||||
train_pipeline_ctw1500 = {{_base_.train_pipeline_ctw1500}}
|
||||
test_pipeline_ctw1500 = {{_base_.test_pipeline_ctw1500}}
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=6,
|
||||
workers_per_gpu=2,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_training.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=train_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline_ctw1500),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=test_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_ctw1500),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=test_pipeline))
|
||||
evaluation = dict(interval=5, metric='hmean-iou')
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_ctw1500))
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(type='SGD', lr=1e-3, momentum=0.90, weight_decay=5e-4)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
lr_config = dict(policy='poly', power=0.9, min_lr=1e-7, by_epoch=True)
|
||||
total_epochs = 1500
|
||||
|
||||
checkpoint_config = dict(interval=5)
|
||||
# yapf:disable
|
||||
log_config = dict(
|
||||
interval=20,
|
||||
hooks=[
|
||||
dict(type='TextLoggerHook')
|
||||
|
||||
])
|
||||
# yapf:enable
|
||||
dist_params = dict(backend='nccl')
|
||||
log_level = 'INFO'
|
||||
load_from = None
|
||||
resume_from = None
|
||||
workflow = [('train', 1)]
|
||||
evaluation = dict(interval=10, metric='hmean-iou')
|
||||
|
|
|
@ -1,69 +1,33 @@
|
|||
_base_ = [
|
||||
'../../_base_/models/ocr_mask_rcnn_r50_fpn_ohem_poly.py',
|
||||
'../../_base_/schedules/schedule_160e.py', '../../_base_/runtime_10e.py'
|
||||
'../../_base_/runtime_10e.py',
|
||||
'../../_base_/det_models/ocr_mask_rcnn_r50_fpn_ohem_poly.py',
|
||||
'../../_base_/schedules/schedule_sgd_160e.py',
|
||||
'../../_base_/det_datasets/ctw1500.py',
|
||||
'../../_base_/det_pipelines/maskrcnn_pipeline.py'
|
||||
]
|
||||
|
||||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/ctw1500/'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
# img_norm_cfg = dict(mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=None,
|
||||
keep_ratio=False,
|
||||
resize_type='indep_sample_in_range',
|
||||
scale_range=(640, 2560)),
|
||||
dict(type='RandomFlip', flip_ratio=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=(640, 640),
|
||||
mask_type='union_all',
|
||||
instance_key='gt_masks'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
# resize the long size to 1600
|
||||
img_scale=(1600, 1600),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
# no flip
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
|
||||
train_pipeline = {{_base_.train_pipeline}}
|
||||
test_pipeline_ctw1500 = {{_base_.test_pipeline_ctw1500}}
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=8,
|
||||
workers_per_gpu=4,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_training.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
type='UniformConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
# select_first_k=1,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=test_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_ctw1500),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
# select_first_k=1,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=test_pipeline))
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_ctw1500))
|
||||
|
||||
evaluation = dict(interval=10, metric='hmean-iou')
|
||||
|
|
|
@ -1,68 +1,33 @@
|
|||
_base_ = [
|
||||
'../../_base_/models/ocr_mask_rcnn_r50_fpn_ohem.py',
|
||||
'../../_base_/schedules/schedule_160e.py', '../../_base_/runtime_10e.py'
|
||||
]
|
||||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/icdar2015/'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
# img_norm_cfg = dict(mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=None,
|
||||
keep_ratio=False,
|
||||
resize_type='indep_sample_in_range',
|
||||
scale_range=(640, 2560)),
|
||||
dict(type='RandomFlip', flip_ratio=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=(640, 640),
|
||||
mask_type='union_all',
|
||||
instance_key='gt_masks'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
# resize the long size to 1600
|
||||
img_scale=(1920, 1920),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
# no flip
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
'../../_base_/runtime_10e.py',
|
||||
'../../_base_/det_models/ocr_mask_rcnn_r50_fpn_ohem.py',
|
||||
'../../_base_/schedules/schedule_sgd_160e.py',
|
||||
'../../_base_/det_datasets/icdar2015.py',
|
||||
'../../_base_/det_pipelines/maskrcnn_pipeline.py'
|
||||
]
|
||||
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
|
||||
train_pipeline = {{_base_.train_pipeline}}
|
||||
test_pipeline_icdar2015 = {{_base_.test_pipeline_icdar2015}}
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=8,
|
||||
workers_per_gpu=4,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_training.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
type='UniformConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
# select_first_k=1,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=test_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_icdar2015),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
# select_first_k=1,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=test_pipeline))
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_icdar2015))
|
||||
|
||||
evaluation = dict(interval=10, metric='hmean-iou')
|
||||
|
|
|
@ -1,69 +1,33 @@
|
|||
_base_ = [
|
||||
'../../_base_/models/ocr_mask_rcnn_r50_fpn_ohem.py',
|
||||
'../../_base_/schedules/schedule_160e.py', '../../_base_/runtime_10e.py'
|
||||
'../../_base_/runtime_10e.py',
|
||||
'../../_base_/det_models/ocr_mask_rcnn_r50_fpn_ohem.py',
|
||||
'../../_base_/schedules/schedule_sgd_160e.py',
|
||||
'../../_base_/det_datasets/icdar2017.py',
|
||||
'../../_base_/det_pipelines/maskrcnn_pipeline.py'
|
||||
]
|
||||
|
||||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/icdar2017/'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
# img_norm_cfg = dict(mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=None,
|
||||
keep_ratio=False,
|
||||
resize_type='indep_sample_in_range',
|
||||
scale_range=(640, 2560)),
|
||||
dict(type='RandomFlip', flip_ratio=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=(640, 640),
|
||||
mask_type='union_all',
|
||||
instance_key='gt_masks'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='DefaultFormatBundle'),
|
||||
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
# resize the long size to 1600
|
||||
img_scale=(1600, 1600),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', keep_ratio=True),
|
||||
# no flip
|
||||
dict(type='RandomFlip'),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
|
||||
train_pipeline = {{_base_.train_pipeline}}
|
||||
test_pipeline_icdar2015 = {{_base_.test_pipeline_icdar2015}}
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=8,
|
||||
workers_per_gpu=4,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_training.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
type='UniformConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
# select_first_k=1,
|
||||
ann_file=data_root + '/instances_val.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=test_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_icdar2015),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
# select_first_k=1,
|
||||
ann_file=data_root + '/instances_val.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=test_pipeline))
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_icdar2015))
|
||||
|
||||
evaluation = dict(interval=10, metric='hmean-iou')
|
||||
|
|
|
@ -1,106 +1,35 @@
|
|||
_base_ = [
|
||||
'../../_base_/schedules/schedule_adam_600e.py',
|
||||
'../../_base_/runtime_10e.py'
|
||||
'../../_base_/runtime_10e.py',
|
||||
'../../_base_/det_models/panet_r18_fpem_ffm.py',
|
||||
'../../_base_/det_datasets/ctw1500.py',
|
||||
'../../_base_/det_pipelines/panet_pipeline.py'
|
||||
]
|
||||
model = dict(
|
||||
type='PANet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=18,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(type='FPEM_FFM', in_channels=[64, 128, 256, 512]),
|
||||
bbox_head=dict(
|
||||
type='PANHead',
|
||||
text_repr_type='poly',
|
||||
in_channels=[128, 128, 128, 128],
|
||||
out_channels=6,
|
||||
loss=dict(type='PANLoss')),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
||||
|
||||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/ctw1500/'
|
||||
model = {{_base_.model_poly}}
|
||||
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
# for visualizing img, pls uncomment it.
|
||||
# img_norm_cfg = dict(
|
||||
# mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
|
||||
train_pipeline_ctw1500 = {{_base_.train_pipeline_ctw1500}}
|
||||
test_pipeline_ctw1500 = {{_base_.test_pipeline_ctw1500}}
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=[(3000, 640)],
|
||||
ratio_range=(0.7, 1.3),
|
||||
aspect_ratio_range=(0.9, 1.1),
|
||||
multiscale_mode='value',
|
||||
keep_ratio=False),
|
||||
# shrink_ratio is from big to small. The 1st must be 1.0
|
||||
dict(type='PANetTargets', shrink_ratio=(1.0, 0.7)),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='RandomRotateTextDet'),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=(640, 640),
|
||||
instance_key='gt_kernels'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
# for visualizing img and gts, pls set visualize = True
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_kernels', 'gt_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_kernels')),
|
||||
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(3000, 640),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(3000, 640), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
samples_per_gpu=2,
|
||||
workers_per_gpu=2,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_training.json',
|
||||
# for debugging top k imgs
|
||||
# select_first_k=200,
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=train_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline_ctw1500),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
# select_first_k=100,
|
||||
pipeline=test_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_ctw1500),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
# select_first_k=100,
|
||||
pipeline=test_pipeline))
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_ctw1500))
|
||||
|
||||
evaluation = dict(interval=10, metric='hmean-iou')
|
||||
|
|
|
@ -1,104 +1,35 @@
|
|||
_base_ = [
|
||||
'../../_base_/schedules/schedule_adam_600e.py',
|
||||
'../../_base_/runtime_10e.py'
|
||||
'../../_base_/runtime_10e.py',
|
||||
'../../_base_/det_models/panet_r18_fpem_ffm.py',
|
||||
'../../_base_/det_datasets/icdar2015.py',
|
||||
'../../_base_/det_pipelines/panet_pipeline.py'
|
||||
]
|
||||
model = dict(
|
||||
type='PANet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=18,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(type='FPEM_FFM', in_channels=[64, 128, 256, 512]),
|
||||
bbox_head=dict(
|
||||
type='PANHead',
|
||||
text_repr_type='quad',
|
||||
in_channels=[128, 128, 128, 128],
|
||||
out_channels=6,
|
||||
loss=dict(type='PANLoss')),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
||||
|
||||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/icdar2015/'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
# for visualizing img, pls uncomment it.
|
||||
# img_norm_cfg = dict(
|
||||
# mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
|
||||
model = {{_base_.model_quad}}
|
||||
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
|
||||
train_pipeline_icdar2015 = {{_base_.train_pipeline_icdar2015}}
|
||||
test_pipeline_icdar2015 = {{_base_.test_pipeline_icdar2015}}
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=[(3000, 736)],
|
||||
ratio_range=(0.7, 1.3),
|
||||
aspect_ratio_range=(0.9, 1.1),
|
||||
multiscale_mode='value',
|
||||
keep_ratio=False),
|
||||
dict(type='PANetTargets', shrink_ratio=(1.0, 0.5), max_shrink=20),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='RandomRotateTextDet'),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=(736, 736),
|
||||
instance_key='gt_kernels'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
# for visualizing img and gts, pls set visualize = True
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_kernels', 'gt_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_kernels')),
|
||||
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(1333, 736),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(1333, 736), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
data = dict(
|
||||
samples_per_gpu=8,
|
||||
workers_per_gpu=2,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_training.json',
|
||||
# for debugging top k imgs
|
||||
# select_first_k=200,
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=train_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline_icdar2015),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
# select_first_k=100,
|
||||
pipeline=test_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_icdar2015),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
# select_first_k=100,
|
||||
pipeline=test_pipeline))
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_icdar2015))
|
||||
|
||||
evaluation = dict(interval=10, metric='hmean-iou')
|
||||
|
|
|
@ -1,95 +1,33 @@
|
|||
_base_ = [
|
||||
'../../_base_/schedules/schedule_adam_600e.py',
|
||||
'../../_base_/runtime_10e.py'
|
||||
'../../_base_/runtime_10e.py',
|
||||
'../../_base_/det_models/panet_r50_fpem_ffm.py',
|
||||
'../../_base_/det_datasets/icdar2017.py',
|
||||
'../../_base_/det_pipelines/panet_pipeline.py'
|
||||
]
|
||||
model = dict(
|
||||
type='PANet',
|
||||
pretrained='torchvision://resnet50',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(type='FPEM_FFM', in_channels=[256, 512, 1024, 2048]),
|
||||
bbox_head=dict(
|
||||
type='PANHead',
|
||||
in_channels=[128, 128, 128, 128],
|
||||
out_channels=6,
|
||||
loss=dict(type='PANLoss', speedup_bbox_thr=32)),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
||||
|
||||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/icdar2017/'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=[(3000, 800)],
|
||||
ratio_range=(0.7, 1.3),
|
||||
aspect_ratio_range=(0.9, 1.1),
|
||||
multiscale_mode='value',
|
||||
keep_ratio=False),
|
||||
dict(type='PANetTargets', shrink_ratio=(1.0, 0.5)),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='RandomRotateTextDet'),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=(800, 800),
|
||||
instance_key='gt_kernels'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
# for visualizing img and gts, pls set visualize = True
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_kernels', 'gt_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_kernels')),
|
||||
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(1333, 800),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
|
||||
train_pipeline_icdar2017 = {{_base_.train_pipeline_icdar2017}}
|
||||
test_pipeline_icdar2017 = {{_base_.test_pipeline_icdar2017}}
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=4,
|
||||
workers_per_gpu=4,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_training.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=train_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline_icdar2017),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_val.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=test_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_icdar2017),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_val.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=test_pipeline))
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_icdar2017))
|
||||
|
||||
evaluation = dict(interval=10, metric='hmean-iou')
|
||||
|
|
|
@ -1,110 +1,35 @@
|
|||
_base_ = ['../../_base_/default_runtime.py']
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(type='Adam', lr=1e-4)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[200, 400])
|
||||
total_epochs = 600
|
||||
|
||||
model = dict(
|
||||
type='PSENet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPNF',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
fusion_type='concat'),
|
||||
bbox_head=dict(
|
||||
type='PSEHead',
|
||||
text_repr_type='poly',
|
||||
in_channels=[256],
|
||||
out_channels=7,
|
||||
loss=dict(type='PSELoss')),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
||||
|
||||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/ctw1500/'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=[(3000, 736)],
|
||||
ratio_range=(0.5, 3),
|
||||
aspect_ratio_range=(1, 1),
|
||||
multiscale_mode='value',
|
||||
long_size_bound=1280,
|
||||
short_size_bound=640,
|
||||
resize_type='long_short_bound',
|
||||
keep_ratio=False),
|
||||
dict(type='PSENetTargets'),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='RandomRotateTextDet'),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=(640, 640),
|
||||
instance_key='gt_kernels'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_kernels', 'gt_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_kernels')),
|
||||
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(1280, 1280),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(1280, 1280), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
_base_ = [
|
||||
'../../_base_/default_runtime.py',
|
||||
'../../_base_/schedules/schedule_adam_step_600e.py',
|
||||
'../../_base_/det_models/psenet_r50_fpnf.py',
|
||||
'../../_base_/det_datasets/ctw1500.py',
|
||||
'../../_base_/det_pipelines/psenet_pipeline.py'
|
||||
]
|
||||
|
||||
model = {{_base_.model_poly}}
|
||||
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
|
||||
train_pipeline = {{_base_.train_pipeline}}
|
||||
test_pipeline_ctw1500 = {{_base_.test_pipeline_ctw1500}}
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=2,
|
||||
workers_per_gpu=2,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_training.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
type='UniformConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=test_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_ctw1500),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=test_pipeline))
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_ctw1500))
|
||||
|
||||
evaluation = dict(interval=10, metric='hmean-iou')
|
||||
|
|
|
@ -1,110 +1,35 @@
|
|||
_base_ = ['../../_base_/runtime_10e.py']
|
||||
|
||||
# optimizer
|
||||
optimizer = dict(type='Adam', lr=1e-4)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
# learning policy
|
||||
lr_config = dict(policy='step', step=[200, 400])
|
||||
total_epochs = 600
|
||||
|
||||
model = dict(
|
||||
type='PSENet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='SyncBN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPNF',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
fusion_type='concat'),
|
||||
bbox_head=dict(
|
||||
type='PSEHead',
|
||||
text_repr_type='quad',
|
||||
in_channels=[256],
|
||||
out_channels=7,
|
||||
loss=dict(type='PSELoss')),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
||||
|
||||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/icdar2015/'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=[(3000, 736)], # unused
|
||||
ratio_range=(0.5, 3),
|
||||
aspect_ratio_range=(1, 1),
|
||||
multiscale_mode='value',
|
||||
long_size_bound=1280,
|
||||
short_size_bound=640,
|
||||
resize_type='long_short_bound',
|
||||
keep_ratio=False),
|
||||
dict(type='PSENetTargets'),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='RandomRotateTextDet'),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=(640, 640),
|
||||
instance_key='gt_kernels'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_kernels', 'gt_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_kernels')),
|
||||
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(2240, 2200),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(2240, 2200), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
_base_ = [
|
||||
'../../_base_/runtime_10e.py',
|
||||
'../../_base_/schedules/schedule_adam_step_600e.py',
|
||||
'../../_base_/det_models/psenet_r50_fpnf.py',
|
||||
'../../_base_/det_datasets/icdar2015.py',
|
||||
'../../_base_/det_pipelines/psenet_pipeline.py'
|
||||
]
|
||||
|
||||
model = {{_base_.model_quad}}
|
||||
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
|
||||
train_pipeline = {{_base_.train_pipeline}}
|
||||
test_pipeline_icdar2015 = {{_base_.test_pipeline_icdar2015}}
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=8,
|
||||
workers_per_gpu=2,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_training.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
type='UniformConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=test_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_icdar2015),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_test.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=test_pipeline))
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_icdar2015))
|
||||
|
||||
evaluation = dict(interval=10, metric='hmean-iou')
|
||||
|
|
|
@ -1,85 +1,18 @@
|
|||
_base_ = [
|
||||
'../../_base_/schedules/schedule_sgd_600e.py',
|
||||
'../../_base_/runtime_10e.py'
|
||||
'../../_base_/runtime_10e.py',
|
||||
'../../_base_/det_models/psenet_r50_fpnf.py',
|
||||
'../../_base_/det_datasets/icdar2017.py',
|
||||
'../../_base_/det_pipelines/psenet_pipeline.py'
|
||||
]
|
||||
model = dict(
|
||||
type='PSENet',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPNF',
|
||||
in_channels=[256, 512, 1024, 2048],
|
||||
out_channels=256,
|
||||
fusion_type='concat'),
|
||||
bbox_head=dict(
|
||||
type='PSEHead',
|
||||
text_repr_type='quad',
|
||||
in_channels=[256],
|
||||
out_channels=7,
|
||||
loss=dict(type='PSELoss')),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
||||
|
||||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/icdar2017/'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
model = {{_base_.model_quad}}
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=[(3000, 736)],
|
||||
ratio_range=(0.5, 3),
|
||||
aspect_ratio_range=(1, 1),
|
||||
multiscale_mode='value',
|
||||
long_size_bound=1280,
|
||||
short_size_bound=640,
|
||||
resize_type='long_short_bound',
|
||||
keep_ratio=False),
|
||||
dict(type='PSENetTargets'),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='RandomRotateTextDet'),
|
||||
dict(
|
||||
type='RandomCropInstances',
|
||||
target_size=(640, 640),
|
||||
instance_key='gt_kernels'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=['gt_kernels', 'gt_mask'],
|
||||
visualize=dict(flag=False, boundary_key='gt_kernels')),
|
||||
dict(type='Collect', keys=['img', 'gt_kernels', 'gt_mask'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(2240, 2200),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(2240, 2200), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
|
||||
train_pipeline = {{_base_.train_pipeline}}
|
||||
test_pipeline_icdar2015 = {{_base_.test_pipeline_icdar2015}}
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=8,
|
||||
|
@ -87,19 +20,16 @@ data = dict(
|
|||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_training.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
type='UniformConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_val.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=test_pipeline),
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_icdar2015),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
ann_file=data_root + '/instances_val.json',
|
||||
img_prefix=data_root + '/imgs',
|
||||
pipeline=test_pipeline))
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline_icdar2015))
|
||||
|
||||
evaluation = dict(interval=10, metric='hmean-iou')
|
||||
|
|
|
@ -1,95 +1,16 @@
|
|||
_base_ = [
|
||||
'../../_base_/schedules/schedule_1200e.py',
|
||||
'../../_base_/default_runtime.py'
|
||||
'../../_base_/schedules/schedule_sgd_1200e.py',
|
||||
'../../_base_/default_runtime.py',
|
||||
'../../_base_/det_models/textsnake_r50_fpn_unet.py',
|
||||
'../../_base_/det_datasets/ctw1500.py',
|
||||
'../../_base_/det_pipelines/textsnake_pipeline.py'
|
||||
]
|
||||
model = dict(
|
||||
type='TextSnake',
|
||||
backbone=dict(
|
||||
type='mmdet.ResNet',
|
||||
depth=50,
|
||||
num_stages=4,
|
||||
out_indices=(0, 1, 2, 3),
|
||||
frozen_stages=-1,
|
||||
norm_cfg=dict(type='BN', requires_grad=True),
|
||||
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
|
||||
norm_eval=True,
|
||||
style='caffe'),
|
||||
neck=dict(
|
||||
type='FPN_UNet', in_channels=[256, 512, 1024, 2048], out_channels=32),
|
||||
bbox_head=dict(
|
||||
type='TextSnakeHead',
|
||||
in_channels=32,
|
||||
text_repr_type='poly',
|
||||
loss=dict(type='TextSnakeLoss')),
|
||||
train_cfg=None,
|
||||
test_cfg=None)
|
||||
|
||||
dataset_type = 'IcdarDataset'
|
||||
data_root = 'data/ctw1500/'
|
||||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
train_list = {{_base_.train_list}}
|
||||
test_list = {{_base_.test_list}}
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='LoadTextAnnotations',
|
||||
with_bbox=True,
|
||||
with_mask=True,
|
||||
poly2mask=False),
|
||||
dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(
|
||||
type='RandomCropPolyInstances',
|
||||
instance_key='gt_masks',
|
||||
crop_ratio=0.65,
|
||||
min_side_ratio=0.3),
|
||||
dict(
|
||||
type='RandomRotatePolyInstances',
|
||||
rotate_ratio=0.5,
|
||||
max_angle=20,
|
||||
pad_with_fixed_color=False),
|
||||
dict(
|
||||
type='ScaleAspectJitter',
|
||||
img_scale=[(3000, 736)], # unused
|
||||
ratio_range=(0.7, 1.3),
|
||||
aspect_ratio_range=(0.9, 1.1),
|
||||
multiscale_mode='value',
|
||||
long_size_bound=800,
|
||||
short_size_bound=480,
|
||||
resize_type='long_short_bound',
|
||||
keep_ratio=False),
|
||||
dict(type='SquareResizePad', target_size=800, pad_ratio=0.6),
|
||||
dict(type='RandomFlip', flip_ratio=0.5, direction='horizontal'),
|
||||
dict(type='TextSnakeTargets'),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(
|
||||
type='CustomFormatBundle',
|
||||
keys=[
|
||||
'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
|
||||
'gt_radius_map', 'gt_sin_map', 'gt_cos_map'
|
||||
],
|
||||
visualize=dict(flag=False, boundary_key='gt_text_mask')),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=[
|
||||
'img', 'gt_text_mask', 'gt_center_region_mask', 'gt_mask',
|
||||
'gt_radius_map', 'gt_sin_map', 'gt_cos_map'
|
||||
])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile', color_type='color_ignore_orientation'),
|
||||
dict(
|
||||
type='MultiScaleFlipAug',
|
||||
img_scale=(1333, 736),
|
||||
flip=False,
|
||||
transforms=[
|
||||
dict(type='Resize', img_scale=(1333, 736), keep_ratio=True),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='ImageToTensor', keys=['img']),
|
||||
dict(type='Collect', keys=['img']),
|
||||
])
|
||||
]
|
||||
train_pipeline = {{_base_.train_pipeline}}
|
||||
test_pipeline = {{_base_.test_pipeline}}
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=4,
|
||||
|
@ -97,19 +18,16 @@ data = dict(
|
|||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/instances_training.json',
|
||||
img_prefix=f'{data_root}/imgs',
|
||||
type='UniformConcatDataset',
|
||||
datasets=train_list,
|
||||
pipeline=train_pipeline),
|
||||
val=dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/instances_test.json',
|
||||
img_prefix=f'{data_root}/imgs',
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline),
|
||||
test=dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/instances_test.json',
|
||||
img_prefix=f'{data_root}/imgs',
|
||||
type='UniformConcatDataset',
|
||||
datasets=test_list,
|
||||
pipeline=test_pipeline))
|
||||
|
||||
evaluation = dict(interval=10, metric='hmean-iou')
|
||||
|
|
|
@ -7,11 +7,13 @@ from mmcv.image import imread
|
|||
from mmocr.apis.inference import init_detector, model_inference
|
||||
from mmocr.datasets import build_dataset # noqa: F401
|
||||
from mmocr.models import build_detector # noqa: F401
|
||||
from mmocr.utils import revert_sync_batchnorm
|
||||
|
||||
|
||||
def build_model(config_file):
|
||||
device = 'cpu'
|
||||
model = init_detector(config_file, checkpoint=None, device=device)
|
||||
model = revert_sync_batchnorm(model)
|
||||
|
||||
if model.cfg.data.test['type'] == 'ConcatDataset':
|
||||
model.cfg.data.test.pipeline = model.cfg.data.test['datasets'][
|
||||
|
|
Loading…
Reference in New Issue