diff --git a/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py b/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py index 220135a0..104a4a01 100644 --- a/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py +++ b/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt.py @@ -1,98 +1,81 @@ -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -max_scale, min_scale = 1024, 512 +_base_ = ['../../_base_/default_runtime.py'] -train_pipeline = [ - dict(type='LoadAnnotations'), - dict( - type='ResizeNoImg', img_scale=(max_scale, min_scale), keep_ratio=True), - dict(type='KIEFormatBundle'), - dict( - type='Collect', - keys=['img', 'relations', 'texts', 'gt_bboxes', 'gt_labels'], - meta_keys=('filename', 'ori_texts')) -] -test_pipeline = [ - dict(type='LoadAnnotations'), - dict( - type='ResizeNoImg', img_scale=(max_scale, min_scale), keep_ratio=True), - dict(type='KIEFormatBundle'), - dict( - type='Collect', - keys=['img', 'relations', 'texts', 'gt_bboxes'], - meta_keys=('filename', 'ori_texts', 'img_norm_cfg', 'ori_filename', - 'img_shape')) +optim_wrapper = dict( + type='OptimWrapper', optimizer=dict(type='Adam', weight_decay=0.0001)) +train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=60, val_interval=1) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +# learning rate +param_scheduler = [ + dict(type='MultiStepLR', milestones=[40, 50], end=60), ] -dataset_type = 'KIEDataset' -data_root = 'data/wildreceipt' +default_hooks = dict(logger=dict(type='LoggerHook', interval=100), ) -loader = dict( - type='HardDiskLoader', - repeat=1, - parser=dict( - type='LineJsonParser', - keys=['file_name', 'height', 'width', 'annotations'])) - -train = dict( - type=dataset_type, - ann_file=f'{data_root}/train.txt', - pipeline=train_pipeline, - img_prefix=data_root, - loader=loader, - dict_file=f'{data_root}/dict.txt', - test_mode=False) -test = dict( - type=dataset_type, - ann_file=f'{data_root}/test.txt', - pipeline=test_pipeline, - img_prefix=data_root, - loader=loader, - dict_file=f'{data_root}/dict.txt', - test_mode=True) - -data = dict( - samples_per_gpu=4, - workers_per_gpu=1, - val_dataloader=dict(samples_per_gpu=1), - test_dataloader=dict(samples_per_gpu=1), - train=train, - val=test, - test=test) - -evaluation = dict( - interval=1, - metric='macro_f1', - metric_options=dict( - macro_f1=dict( - ignores=[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 25]))) +num_classes = 26 model = dict( type='SDMGR', - backbone=dict(type='UNet', base_channels=16), - bbox_head=dict( - type='SDMGRHead', visual_dim=16, num_chars=92, num_classes=26), - visual_modality=False, - train_cfg=None, - test_cfg=None, - class_list=f'{data_root}/class_list.txt') + kie_head=dict( + type='SDMGRHead', + visual_dim=16, + num_classes=num_classes, + module_loss=dict(type='SDMGRModuleLoss'), + postprocessor=dict(type='SDMGRPostProcessor')), + dictionary=dict( + type='Dictionary', + dict_file='data/wildreceipt/dict.txt', + with_padding=True, + with_unknown=True, + unknown_token=None), +) -optimizer = dict(type='Adam', weight_decay=0.0001) -optimizer_config = dict(grad_clip=None) -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=1, - warmup_ratio=1, - step=[40, 50]) -total_epochs = 60 +train_pipeline = [ + dict(type='LoadKIEAnnotations'), + dict(type='Resize', scale=(1024, 512), keep_ratio=True), + dict(type='PackKIEInputs') +] +test_pipeline = [ + dict(type='LoadKIEAnnotations'), + dict(type='Resize', scale=(1024, 512), keep_ratio=True), + dict(type='PackKIEInputs'), +] -checkpoint_config = dict(interval=1) -log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')]) -dist_params = dict(backend='nccl') -log_level = 'INFO' -load_from = None -resume_from = None -workflow = [('train', 1)] +dataset_type = 'WildReceiptDataset' +data_root = 'data/wildreceipt/' -find_unused_parameters = True +train_dataset = dict( + type=dataset_type, + data_root=data_root, + metainfo=data_root + 'class_list.txt', + ann_file='train.txt', + pipeline=train_pipeline) + +test_dataset = dict( + type=dataset_type, + data_root=data_root, + metainfo=data_root + 'class_list.txt', + ann_file='test.txt', + test_mode=True, + pipeline=test_pipeline) + +train_dataloader = dict( + batch_size=4, + num_workers=1, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=train_dataset) +val_dataloader = dict( + batch_size=1, + num_workers=1, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=test_dataset) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='F1Metric', + mode='macro', + num_classes=num_classes, + ignored_classes=[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 25]) +test_evaluator = val_evaluator diff --git a/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt_openset.py b/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt_openset.py index b295bd13..e3e2b1e6 100644 --- a/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt_openset.py +++ b/configs/kie/sdmgr/sdmgr_novisual_60e_wildreceipt_openset.py @@ -1,84 +1,101 @@ _base_ = ['../../_base_/default_runtime.py'] +optim_wrapper = dict( + type='OptimWrapper', optimizer=dict(type='Adam', weight_decay=0.0001)) +train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=60, val_interval=1) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +# learning rate +param_scheduler = [ + dict(type='MultiStepLR', milestones=[40, 50], end=60), +] + +default_hooks = dict(logger=dict(type='LoggerHook', interval=100), ) + +num_classes = 4 +key_node_idx = 1 +value_node_idx = 2 + model = dict( type='SDMGR', - backbone=dict(type='UNet', base_channels=16), - bbox_head=dict( - type='SDMGRHead', visual_dim=16, num_chars=92, num_classes=4), - visual_modality=False, - train_cfg=None, - test_cfg=None, - class_list=None, - openset=True) - -optimizer = dict(type='Adam', weight_decay=0.0001) -optimizer_config = dict(grad_clip=None) -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=1, - warmup_ratio=1, - step=[40, 50]) -total_epochs = 60 + kie_head=dict( + type='SDMGRHead', + visual_dim=16, + num_classes=num_classes, + module_loss=dict(type='SDMGRModuleLoss'), + postprocessor=dict( + type='SDMGRPostProcessor', + link_type='one-to-many', + key_node_idx=key_node_idx, + value_node_idx=value_node_idx)), + dictionary=dict( + type='Dictionary', + dict_file='data/wildreceipt/dict.txt', + with_padding=True, + with_unknown=True, + unknown_token=None), +) train_pipeline = [ - dict(type='LoadAnnotations'), - dict(type='ResizeNoImg', img_scale=(1024, 512), keep_ratio=True), - dict(type='KIEFormatBundle'), - dict( - type='Collect', - keys=['img', 'relations', 'texts', 'gt_bboxes', 'gt_labels'], - meta_keys=('filename', 'ori_filename', 'ori_texts')) + dict(type='LoadKIEAnnotations'), + dict(type='Resize', scale=(1024, 512), keep_ratio=True), + dict(type='PackKIEInputs') ] test_pipeline = [ - dict(type='LoadAnnotations'), - dict(type='ResizeNoImg', img_scale=(1024, 512), keep_ratio=True), - dict(type='KIEFormatBundle'), dict( - type='Collect', - keys=['img', 'relations', 'texts', 'gt_bboxes'], - meta_keys=('filename', 'ori_filename', 'ori_texts', 'ori_bboxes', - 'img_norm_cfg', 'ori_filename', 'img_shape')) + type='LoadKIEAnnotations', + key_node_idx=key_node_idx, + value_node_idx=value_node_idx), # Keep key->value edges for evaluation + dict(type='Resize', scale=(1024, 512), keep_ratio=True), + dict(type='PackKIEInputs'), ] -dataset_type = 'OpensetKIEDataset' -data_root = 'data/wildreceipt' +dataset_type = 'WildReceiptDataset' +data_root = 'data/wildreceipt/' -loader = dict( - type='HardDiskLoader', - repeat=1, - parser=dict( - type='LineJsonParser', - keys=['file_name', 'height', 'width', 'annotations'])) - -train = dict( +train_dataset = dict( type=dataset_type, - ann_file=f'{data_root}/openset_train.txt', - pipeline=train_pipeline, - img_prefix=data_root, - link_type='one-to-many', - loader=loader, - dict_file=f'{data_root}/dict.txt', - test_mode=False) -test = dict( + data_root=data_root, + metainfo=data_root + 'class_list.txt', + ann_file='openset_train.txt', + pipeline=train_pipeline) + +test_dataset = dict( type=dataset_type, - ann_file=f'{data_root}/openset_test.txt', - pipeline=test_pipeline, - img_prefix=data_root, - link_type='one-to-many', - loader=loader, - dict_file=f'{data_root}/dict.txt', - test_mode=True) + data_root=data_root, + metainfo=data_root + 'class_list.txt', + ann_file='openset_test.txt', + test_mode=True, + pipeline=test_pipeline) -data = dict( - samples_per_gpu=4, - workers_per_gpu=1, - val_dataloader=dict(samples_per_gpu=1), - test_dataloader=dict(samples_per_gpu=1), - train=train, - val=test, - test=test) +train_dataloader = dict( + batch_size=4, + num_workers=1, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=train_dataset) +val_dataloader = dict( + batch_size=1, + num_workers=1, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=test_dataset) +test_dataloader = val_dataloader -evaluation = dict(interval=1, metric='openset_f1', metric_options=None) - -find_unused_parameters = True +val_evaluator = [ + dict( + type='F1Metric', + prefix='node', + key='labels', + mode=['micro', 'macro'], + num_classes=num_classes, + cared_classes=[key_node_idx, value_node_idx]), + dict( + type='F1Metric', + prefix='edge', + mode='micro', + key='edge_labels', + cared_classes=[1], # binary f1 score + num_classes=2) +] +test_evaluator = val_evaluator diff --git a/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py b/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py index f073064a..21b2c785 100644 --- a/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py +++ b/configs/kie/sdmgr/sdmgr_unet16_60e_wildreceipt.py @@ -1,105 +1,94 @@ -img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) -max_scale, min_scale = 1024, 512 +_base_ = ['../../_base_/default_runtime.py'] -train_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=(max_scale, min_scale), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='KIEFormatBundle'), - dict( - type='Collect', - keys=['img', 'relations', 'texts', 'gt_bboxes', 'gt_labels']) -] -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='LoadAnnotations'), - dict(type='Resize', img_scale=(max_scale, min_scale), keep_ratio=True), - dict(type='RandomFlip', flip_ratio=0.), - dict(type='Normalize', **img_norm_cfg), - dict(type='Pad', size_divisor=32), - dict(type='KIEFormatBundle'), - dict( - type='Collect', - keys=['img', 'relations', 'texts', 'gt_bboxes'], - meta_keys=[ - 'img_norm_cfg', 'img_shape', 'ori_filename', 'filename', - 'ori_texts' - ]) +optim_wrapper = dict( + type='OptimWrapper', optimizer=dict(type='Adam', weight_decay=0.0001)) +train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=60, val_interval=1) +val_cfg = dict(type='ValLoop') +test_cfg = dict(type='TestLoop') +# learning rate +param_scheduler = [ + dict(type='MultiStepLR', milestones=[40, 50], end=60), ] -dataset_type = 'KIEDataset' -data_root = 'data/wildreceipt' +default_hooks = dict(logger=dict(type='LoggerHook', interval=100), ) -loader = dict( - type='HardDiskLoader', - repeat=1, - parser=dict( - type='LineJsonParser', - keys=['file_name', 'height', 'width', 'annotations'])) - -train = dict( - type=dataset_type, - ann_file=f'{data_root}/train.txt', - pipeline=train_pipeline, - img_prefix=data_root, - loader=loader, - dict_file=f'{data_root}/dict.txt', - test_mode=False) -test = dict( - type=dataset_type, - ann_file=f'{data_root}/test.txt', - pipeline=test_pipeline, - img_prefix=data_root, - loader=loader, - dict_file=f'{data_root}/dict.txt', - test_mode=True) - -data = dict( - samples_per_gpu=4, - workers_per_gpu=4, - val_dataloader=dict(samples_per_gpu=1), - test_dataloader=dict(samples_per_gpu=1), - train=train, - val=test, - test=test) - -evaluation = dict( - interval=1, - metric='macro_f1', - metric_options=dict( - macro_f1=dict( - ignores=[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 25]))) +num_classes = 26 model = dict( type='SDMGR', backbone=dict(type='UNet', base_channels=16), - bbox_head=dict( - type='SDMGRHead', visual_dim=16, num_chars=92, num_classes=26), - visual_modality=True, - train_cfg=None, - test_cfg=None, - class_list=f'{data_root}/class_list.txt') + roi_extractor=dict( + type='mmdet.SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', output_size=7), + featmap_strides=[1]), + kie_head=dict( + type='SDMGRHead', + visual_dim=16, + num_classes=num_classes, + module_loss=dict(type='SDMGRModuleLoss'), + postprocessor=dict(type='SDMGRPostProcessor')), + dictionary=dict( + type='Dictionary', + dict_file='data/wildreceipt/dict.txt', + with_padding=True, + with_unknown=True, + unknown_token=None), + data_preprocessor=dict( + type='ImgDataPreprocessor', + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + bgr_to_rgb=True, + pad_size_divisor=32), +) -optimizer = dict(type='Adam', weight_decay=0.0001) -optimizer_config = dict(grad_clip=None) -lr_config = dict( - policy='step', - warmup='linear', - warmup_iters=1, - warmup_ratio=1, - step=[40, 50]) -total_epochs = 60 +train_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadKIEAnnotations'), + dict(type='Resize', scale=(1024, 512), keep_ratio=True), + dict(type='PackKIEInputs') +] +test_pipeline = [ + dict(type='LoadImageFromFile'), + dict(type='LoadKIEAnnotations'), + dict(type='Resize', scale=(1024, 512), keep_ratio=True), + dict(type='PackKIEInputs'), +] -checkpoint_config = dict(interval=1) -log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')]) -dist_params = dict(backend='nccl') -log_level = 'INFO' -load_from = None -resume_from = None -workflow = [('train', 1)] +dataset_type = 'WildReceiptDataset' +data_root = 'data/wildreceipt/' -find_unused_parameters = True +train_dataset = dict( + type=dataset_type, + data_root=data_root, + metainfo=data_root + 'class_list.txt', + ann_file='train.txt', + pipeline=train_pipeline) + +test_dataset = dict( + type=dataset_type, + data_root=data_root, + metainfo=data_root + 'class_list.txt', + ann_file='test.txt', + test_mode=True, + pipeline=test_pipeline) + +train_dataloader = dict( + batch_size=4, + num_workers=4, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=True), + dataset=train_dataset) +val_dataloader = dict( + batch_size=1, + num_workers=1, + persistent_workers=True, + sampler=dict(type='DefaultSampler', shuffle=False), + dataset=test_dataset) +test_dataloader = val_dataloader + +val_evaluator = dict( + type='F1Metric', + mode='macro', + num_classes=num_classes, + ignored_classes=[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 25]) +test_evaluator = val_evaluator