mirror of https://github.com/open-mmlab/mmocr.git
[SDMGR] Add SDMGR configs
parent
77ffe8fb00
commit
422bea9d10
|
@ -1,98 +1,81 @@
|
|||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
max_scale, min_scale = 1024, 512
|
||||
_base_ = ['../../_base_/default_runtime.py']
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='ResizeNoImg', img_scale=(max_scale, min_scale), keep_ratio=True),
|
||||
dict(type='KIEFormatBundle'),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img', 'relations', 'texts', 'gt_bboxes', 'gt_labels'],
|
||||
meta_keys=('filename', 'ori_texts'))
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(
|
||||
type='ResizeNoImg', img_scale=(max_scale, min_scale), keep_ratio=True),
|
||||
dict(type='KIEFormatBundle'),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img', 'relations', 'texts', 'gt_bboxes'],
|
||||
meta_keys=('filename', 'ori_texts', 'img_norm_cfg', 'ori_filename',
|
||||
'img_shape'))
|
||||
optim_wrapper = dict(
|
||||
type='OptimWrapper', optimizer=dict(type='Adam', weight_decay=0.0001))
|
||||
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=60, val_interval=1)
|
||||
val_cfg = dict(type='ValLoop')
|
||||
test_cfg = dict(type='TestLoop')
|
||||
# learning rate
|
||||
param_scheduler = [
|
||||
dict(type='MultiStepLR', milestones=[40, 50], end=60),
|
||||
]
|
||||
|
||||
dataset_type = 'KIEDataset'
|
||||
data_root = 'data/wildreceipt'
|
||||
default_hooks = dict(logger=dict(type='LoggerHook', interval=100), )
|
||||
|
||||
loader = dict(
|
||||
type='HardDiskLoader',
|
||||
repeat=1,
|
||||
parser=dict(
|
||||
type='LineJsonParser',
|
||||
keys=['file_name', 'height', 'width', 'annotations']))
|
||||
|
||||
train = dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/train.txt',
|
||||
pipeline=train_pipeline,
|
||||
img_prefix=data_root,
|
||||
loader=loader,
|
||||
dict_file=f'{data_root}/dict.txt',
|
||||
test_mode=False)
|
||||
test = dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/test.txt',
|
||||
pipeline=test_pipeline,
|
||||
img_prefix=data_root,
|
||||
loader=loader,
|
||||
dict_file=f'{data_root}/dict.txt',
|
||||
test_mode=True)
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=4,
|
||||
workers_per_gpu=1,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=train,
|
||||
val=test,
|
||||
test=test)
|
||||
|
||||
evaluation = dict(
|
||||
interval=1,
|
||||
metric='macro_f1',
|
||||
metric_options=dict(
|
||||
macro_f1=dict(
|
||||
ignores=[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 25])))
|
||||
num_classes = 26
|
||||
|
||||
model = dict(
|
||||
type='SDMGR',
|
||||
backbone=dict(type='UNet', base_channels=16),
|
||||
bbox_head=dict(
|
||||
type='SDMGRHead', visual_dim=16, num_chars=92, num_classes=26),
|
||||
visual_modality=False,
|
||||
train_cfg=None,
|
||||
test_cfg=None,
|
||||
class_list=f'{data_root}/class_list.txt')
|
||||
kie_head=dict(
|
||||
type='SDMGRHead',
|
||||
visual_dim=16,
|
||||
num_classes=num_classes,
|
||||
module_loss=dict(type='SDMGRModuleLoss'),
|
||||
postprocessor=dict(type='SDMGRPostProcessor')),
|
||||
dictionary=dict(
|
||||
type='Dictionary',
|
||||
dict_file='data/wildreceipt/dict.txt',
|
||||
with_padding=True,
|
||||
with_unknown=True,
|
||||
unknown_token=None),
|
||||
)
|
||||
|
||||
optimizer = dict(type='Adam', weight_decay=0.0001)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
lr_config = dict(
|
||||
policy='step',
|
||||
warmup='linear',
|
||||
warmup_iters=1,
|
||||
warmup_ratio=1,
|
||||
step=[40, 50])
|
||||
total_epochs = 60
|
||||
train_pipeline = [
|
||||
dict(type='LoadKIEAnnotations'),
|
||||
dict(type='Resize', scale=(1024, 512), keep_ratio=True),
|
||||
dict(type='PackKIEInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadKIEAnnotations'),
|
||||
dict(type='Resize', scale=(1024, 512), keep_ratio=True),
|
||||
dict(type='PackKIEInputs'),
|
||||
]
|
||||
|
||||
checkpoint_config = dict(interval=1)
|
||||
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
|
||||
dist_params = dict(backend='nccl')
|
||||
log_level = 'INFO'
|
||||
load_from = None
|
||||
resume_from = None
|
||||
workflow = [('train', 1)]
|
||||
dataset_type = 'WildReceiptDataset'
|
||||
data_root = 'data/wildreceipt/'
|
||||
|
||||
find_unused_parameters = True
|
||||
train_dataset = dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
metainfo=data_root + 'class_list.txt',
|
||||
ann_file='train.txt',
|
||||
pipeline=train_pipeline)
|
||||
|
||||
test_dataset = dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
metainfo=data_root + 'class_list.txt',
|
||||
ann_file='test.txt',
|
||||
test_mode=True,
|
||||
pipeline=test_pipeline)
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=train_dataset)
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=test_dataset)
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(
|
||||
type='F1Metric',
|
||||
mode='macro',
|
||||
num_classes=num_classes,
|
||||
ignored_classes=[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 25])
|
||||
test_evaluator = val_evaluator
|
||||
|
|
|
@ -1,84 +1,101 @@
|
|||
_base_ = ['../../_base_/default_runtime.py']
|
||||
|
||||
optim_wrapper = dict(
|
||||
type='OptimWrapper', optimizer=dict(type='Adam', weight_decay=0.0001))
|
||||
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=60, val_interval=1)
|
||||
val_cfg = dict(type='ValLoop')
|
||||
test_cfg = dict(type='TestLoop')
|
||||
# learning rate
|
||||
param_scheduler = [
|
||||
dict(type='MultiStepLR', milestones=[40, 50], end=60),
|
||||
]
|
||||
|
||||
default_hooks = dict(logger=dict(type='LoggerHook', interval=100), )
|
||||
|
||||
num_classes = 4
|
||||
key_node_idx = 1
|
||||
value_node_idx = 2
|
||||
|
||||
model = dict(
|
||||
type='SDMGR',
|
||||
backbone=dict(type='UNet', base_channels=16),
|
||||
bbox_head=dict(
|
||||
type='SDMGRHead', visual_dim=16, num_chars=92, num_classes=4),
|
||||
visual_modality=False,
|
||||
train_cfg=None,
|
||||
test_cfg=None,
|
||||
class_list=None,
|
||||
openset=True)
|
||||
|
||||
optimizer = dict(type='Adam', weight_decay=0.0001)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
lr_config = dict(
|
||||
policy='step',
|
||||
warmup='linear',
|
||||
warmup_iters=1,
|
||||
warmup_ratio=1,
|
||||
step=[40, 50])
|
||||
total_epochs = 60
|
||||
kie_head=dict(
|
||||
type='SDMGRHead',
|
||||
visual_dim=16,
|
||||
num_classes=num_classes,
|
||||
module_loss=dict(type='SDMGRModuleLoss'),
|
||||
postprocessor=dict(
|
||||
type='SDMGRPostProcessor',
|
||||
link_type='one-to-many',
|
||||
key_node_idx=key_node_idx,
|
||||
value_node_idx=value_node_idx)),
|
||||
dictionary=dict(
|
||||
type='Dictionary',
|
||||
dict_file='data/wildreceipt/dict.txt',
|
||||
with_padding=True,
|
||||
with_unknown=True,
|
||||
unknown_token=None),
|
||||
)
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='ResizeNoImg', img_scale=(1024, 512), keep_ratio=True),
|
||||
dict(type='KIEFormatBundle'),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img', 'relations', 'texts', 'gt_bboxes', 'gt_labels'],
|
||||
meta_keys=('filename', 'ori_filename', 'ori_texts'))
|
||||
dict(type='LoadKIEAnnotations'),
|
||||
dict(type='Resize', scale=(1024, 512), keep_ratio=True),
|
||||
dict(type='PackKIEInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='ResizeNoImg', img_scale=(1024, 512), keep_ratio=True),
|
||||
dict(type='KIEFormatBundle'),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img', 'relations', 'texts', 'gt_bboxes'],
|
||||
meta_keys=('filename', 'ori_filename', 'ori_texts', 'ori_bboxes',
|
||||
'img_norm_cfg', 'ori_filename', 'img_shape'))
|
||||
type='LoadKIEAnnotations',
|
||||
key_node_idx=key_node_idx,
|
||||
value_node_idx=value_node_idx), # Keep key->value edges for evaluation
|
||||
dict(type='Resize', scale=(1024, 512), keep_ratio=True),
|
||||
dict(type='PackKIEInputs'),
|
||||
]
|
||||
|
||||
dataset_type = 'OpensetKIEDataset'
|
||||
data_root = 'data/wildreceipt'
|
||||
dataset_type = 'WildReceiptDataset'
|
||||
data_root = 'data/wildreceipt/'
|
||||
|
||||
loader = dict(
|
||||
type='HardDiskLoader',
|
||||
repeat=1,
|
||||
parser=dict(
|
||||
type='LineJsonParser',
|
||||
keys=['file_name', 'height', 'width', 'annotations']))
|
||||
|
||||
train = dict(
|
||||
train_dataset = dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/openset_train.txt',
|
||||
pipeline=train_pipeline,
|
||||
img_prefix=data_root,
|
||||
link_type='one-to-many',
|
||||
loader=loader,
|
||||
dict_file=f'{data_root}/dict.txt',
|
||||
test_mode=False)
|
||||
test = dict(
|
||||
data_root=data_root,
|
||||
metainfo=data_root + 'class_list.txt',
|
||||
ann_file='openset_train.txt',
|
||||
pipeline=train_pipeline)
|
||||
|
||||
test_dataset = dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/openset_test.txt',
|
||||
pipeline=test_pipeline,
|
||||
img_prefix=data_root,
|
||||
link_type='one-to-many',
|
||||
loader=loader,
|
||||
dict_file=f'{data_root}/dict.txt',
|
||||
test_mode=True)
|
||||
data_root=data_root,
|
||||
metainfo=data_root + 'class_list.txt',
|
||||
ann_file='openset_test.txt',
|
||||
test_mode=True,
|
||||
pipeline=test_pipeline)
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=4,
|
||||
workers_per_gpu=1,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=train,
|
||||
val=test,
|
||||
test=test)
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=train_dataset)
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=test_dataset)
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
evaluation = dict(interval=1, metric='openset_f1', metric_options=None)
|
||||
|
||||
find_unused_parameters = True
|
||||
val_evaluator = [
|
||||
dict(
|
||||
type='F1Metric',
|
||||
prefix='node',
|
||||
key='labels',
|
||||
mode=['micro', 'macro'],
|
||||
num_classes=num_classes,
|
||||
cared_classes=[key_node_idx, value_node_idx]),
|
||||
dict(
|
||||
type='F1Metric',
|
||||
prefix='edge',
|
||||
mode='micro',
|
||||
key='edge_labels',
|
||||
cared_classes=[1], # binary f1 score
|
||||
num_classes=2)
|
||||
]
|
||||
test_evaluator = val_evaluator
|
||||
|
|
|
@ -1,105 +1,94 @@
|
|||
img_norm_cfg = dict(
|
||||
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
|
||||
max_scale, min_scale = 1024, 512
|
||||
_base_ = ['../../_base_/default_runtime.py']
|
||||
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='Resize', img_scale=(max_scale, min_scale), keep_ratio=True),
|
||||
dict(type='RandomFlip', flip_ratio=0.),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='KIEFormatBundle'),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img', 'relations', 'texts', 'gt_bboxes', 'gt_labels'])
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadAnnotations'),
|
||||
dict(type='Resize', img_scale=(max_scale, min_scale), keep_ratio=True),
|
||||
dict(type='RandomFlip', flip_ratio=0.),
|
||||
dict(type='Normalize', **img_norm_cfg),
|
||||
dict(type='Pad', size_divisor=32),
|
||||
dict(type='KIEFormatBundle'),
|
||||
dict(
|
||||
type='Collect',
|
||||
keys=['img', 'relations', 'texts', 'gt_bboxes'],
|
||||
meta_keys=[
|
||||
'img_norm_cfg', 'img_shape', 'ori_filename', 'filename',
|
||||
'ori_texts'
|
||||
])
|
||||
optim_wrapper = dict(
|
||||
type='OptimWrapper', optimizer=dict(type='Adam', weight_decay=0.0001))
|
||||
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=60, val_interval=1)
|
||||
val_cfg = dict(type='ValLoop')
|
||||
test_cfg = dict(type='TestLoop')
|
||||
# learning rate
|
||||
param_scheduler = [
|
||||
dict(type='MultiStepLR', milestones=[40, 50], end=60),
|
||||
]
|
||||
|
||||
dataset_type = 'KIEDataset'
|
||||
data_root = 'data/wildreceipt'
|
||||
default_hooks = dict(logger=dict(type='LoggerHook', interval=100), )
|
||||
|
||||
loader = dict(
|
||||
type='HardDiskLoader',
|
||||
repeat=1,
|
||||
parser=dict(
|
||||
type='LineJsonParser',
|
||||
keys=['file_name', 'height', 'width', 'annotations']))
|
||||
|
||||
train = dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/train.txt',
|
||||
pipeline=train_pipeline,
|
||||
img_prefix=data_root,
|
||||
loader=loader,
|
||||
dict_file=f'{data_root}/dict.txt',
|
||||
test_mode=False)
|
||||
test = dict(
|
||||
type=dataset_type,
|
||||
ann_file=f'{data_root}/test.txt',
|
||||
pipeline=test_pipeline,
|
||||
img_prefix=data_root,
|
||||
loader=loader,
|
||||
dict_file=f'{data_root}/dict.txt',
|
||||
test_mode=True)
|
||||
|
||||
data = dict(
|
||||
samples_per_gpu=4,
|
||||
workers_per_gpu=4,
|
||||
val_dataloader=dict(samples_per_gpu=1),
|
||||
test_dataloader=dict(samples_per_gpu=1),
|
||||
train=train,
|
||||
val=test,
|
||||
test=test)
|
||||
|
||||
evaluation = dict(
|
||||
interval=1,
|
||||
metric='macro_f1',
|
||||
metric_options=dict(
|
||||
macro_f1=dict(
|
||||
ignores=[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 25])))
|
||||
num_classes = 26
|
||||
|
||||
model = dict(
|
||||
type='SDMGR',
|
||||
backbone=dict(type='UNet', base_channels=16),
|
||||
bbox_head=dict(
|
||||
type='SDMGRHead', visual_dim=16, num_chars=92, num_classes=26),
|
||||
visual_modality=True,
|
||||
train_cfg=None,
|
||||
test_cfg=None,
|
||||
class_list=f'{data_root}/class_list.txt')
|
||||
roi_extractor=dict(
|
||||
type='mmdet.SingleRoIExtractor',
|
||||
roi_layer=dict(type='RoIAlign', output_size=7),
|
||||
featmap_strides=[1]),
|
||||
kie_head=dict(
|
||||
type='SDMGRHead',
|
||||
visual_dim=16,
|
||||
num_classes=num_classes,
|
||||
module_loss=dict(type='SDMGRModuleLoss'),
|
||||
postprocessor=dict(type='SDMGRPostProcessor')),
|
||||
dictionary=dict(
|
||||
type='Dictionary',
|
||||
dict_file='data/wildreceipt/dict.txt',
|
||||
with_padding=True,
|
||||
with_unknown=True,
|
||||
unknown_token=None),
|
||||
data_preprocessor=dict(
|
||||
type='ImgDataPreprocessor',
|
||||
mean=[123.675, 116.28, 103.53],
|
||||
std=[58.395, 57.12, 57.375],
|
||||
bgr_to_rgb=True,
|
||||
pad_size_divisor=32),
|
||||
)
|
||||
|
||||
optimizer = dict(type='Adam', weight_decay=0.0001)
|
||||
optimizer_config = dict(grad_clip=None)
|
||||
lr_config = dict(
|
||||
policy='step',
|
||||
warmup='linear',
|
||||
warmup_iters=1,
|
||||
warmup_ratio=1,
|
||||
step=[40, 50])
|
||||
total_epochs = 60
|
||||
train_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadKIEAnnotations'),
|
||||
dict(type='Resize', scale=(1024, 512), keep_ratio=True),
|
||||
dict(type='PackKIEInputs')
|
||||
]
|
||||
test_pipeline = [
|
||||
dict(type='LoadImageFromFile'),
|
||||
dict(type='LoadKIEAnnotations'),
|
||||
dict(type='Resize', scale=(1024, 512), keep_ratio=True),
|
||||
dict(type='PackKIEInputs'),
|
||||
]
|
||||
|
||||
checkpoint_config = dict(interval=1)
|
||||
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
|
||||
dist_params = dict(backend='nccl')
|
||||
log_level = 'INFO'
|
||||
load_from = None
|
||||
resume_from = None
|
||||
workflow = [('train', 1)]
|
||||
dataset_type = 'WildReceiptDataset'
|
||||
data_root = 'data/wildreceipt/'
|
||||
|
||||
find_unused_parameters = True
|
||||
train_dataset = dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
metainfo=data_root + 'class_list.txt',
|
||||
ann_file='train.txt',
|
||||
pipeline=train_pipeline)
|
||||
|
||||
test_dataset = dict(
|
||||
type=dataset_type,
|
||||
data_root=data_root,
|
||||
metainfo=data_root + 'class_list.txt',
|
||||
ann_file='test.txt',
|
||||
test_mode=True,
|
||||
pipeline=test_pipeline)
|
||||
|
||||
train_dataloader = dict(
|
||||
batch_size=4,
|
||||
num_workers=4,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=True),
|
||||
dataset=train_dataset)
|
||||
val_dataloader = dict(
|
||||
batch_size=1,
|
||||
num_workers=1,
|
||||
persistent_workers=True,
|
||||
sampler=dict(type='DefaultSampler', shuffle=False),
|
||||
dataset=test_dataset)
|
||||
test_dataloader = val_dataloader
|
||||
|
||||
val_evaluator = dict(
|
||||
type='F1Metric',
|
||||
mode='macro',
|
||||
num_classes=num_classes,
|
||||
ignored_classes=[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 25])
|
||||
test_evaluator = val_evaluator
|
||||
|
|
Loading…
Reference in New Issue