implementation of datasets and dataloader (#2)

* implementation of datasets and dataloader * add testing file * fix MergeDataset flag bug and QueryAwareDataset one shot setting bug * Set all flag to 0 in three datasetwrapper * Fix NwayKshotDataloader sampler bug and fix some review comments * add pytest file * add voc test data for pytest * finish test file for few shot custom dataset * finish test file for few shot custom dataset * finish test file for few shot custom dataset * finish test file for merge dataset * finish test file nwaykshot dataset * cover more coner case in both datasets and add test file for query aware dataset and nwaykshot dataset * finish test file of dataloader and fix all random seed in test file * remove config * avoid ann info change * fix voc comments * fix voc comments * Lyq dataset dataloader (#4) * fix voc comments * fix voc comments * fix voc comments * fix comment and refactoring FewShotCustomDataset FewShotVOCDataset * add coco dataset and test file * Lyq dataset dataloader (#6) * fix comments * fix comments * Lyq dataset dataloader (#7) * fix comments * fix comments * fix comments
2021-05-24 17:07:43 +08:00 · 2021-05-24 17:07:43 +08:00 · 85933cb556
parent 6f15e33ab9
commit 85933cb556
67 changed files with 4348 additions and 118 deletions
--- a/configs/detection/_base_/datasets/coco_detection.py
+++ b/configs/detection/_base_/datasets/coco_detection.py
@ -1,49 +0,0 @@
 # dataset settings
 dataset_type = 'CocoDataset'
 data_root = 'data/coco/'
 img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
 ]
 test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1333, 800),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img']),
        ])
 ]
 data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_train2017.json',
        img_prefix=data_root + 'train2017/',
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_val2017.json',
        img_prefix=data_root + 'val2017/',
        pipeline=test_pipeline),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_val2017.json',
        img_prefix=data_root + 'val2017/',
        pipeline=test_pipeline))
 evaluation = dict(interval=1, metric='bbox')
--- a/configs/detection/_base_/datasets/finetune_based/voc0712_split1_1shot.py
+++ b/configs/detection/_base_/datasets/finetune_based/voc0712_split1_1shot.py
@ -0,0 +1,116 @@
 ALL_CLASSES = {
    1: ('aeroplane', 'bicycle', 'boat', 'bottle', 'car', 'cat', 'chair',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'sheep',
        'train', 'tvmonitor', 'bird', 'bus', 'cow', 'motorbike', 'sofa'),
    2: ('bicycle', 'bird', 'boat', 'bus', 'car', 'cat', 'chair', 'diningtable',
        'dog', 'motorbike', 'person', 'pottedplant', 'sheep', 'train',
        'tvmonitor', 'aeroplane', 'bottle', 'cow', 'horse', 'sofa'),
    3: ('aeroplane', 'bicycle', 'bird', 'bottle', 'bus', 'car', 'chair', 'cow',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'train',
        'tvmonitor', 'boat', 'cat', 'motorbike', 'sheep', 'sofa')
 }
 NOVEL_CLASSES = {
    1: ('bird', 'bus', 'cow', 'motorbike', 'sofa'),
    2: ('aeroplane', 'bottle', 'cow', 'horse', 'sofa'),
    3: ('boat', 'cat', 'motorbike', 'sheep', 'sofa'),
 }
 BASE_CLASSES = {
    1: ('aeroplane', 'bicycle', 'boat', 'bottle', 'car', 'cat', 'chair',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'sheep',
        'train', 'tvmonitor'),
    2: ('bicycle', 'bird', 'boat', 'bus', 'car', 'cat', 'chair', 'diningtable',
        'dog', 'motorbike', 'person', 'pottedplant', 'sheep', 'train',
        'tvmonitor'),
    3: ('aeroplane', 'bicycle', 'bird', 'bottle', 'bus', 'car', 'chair', 'cow',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'train',
        'tvmonitor')
 }
 # dataset settings
 data_root = 'data/VOCdevkit/'
 split = 1
 all_classes = ALL_CLASSES[split]
 base_classes = BASE_CLASSES[split]
 novel_classes = NOVEL_CLASSES[split]
 num_base_shot = 1
 num_novel_shot = 1
 # load few shot data :
 # each ann file corresponding to one class
 # all file should use same image prefix
 ann_file_root = 'data/few_shot_voc_split/'
 ann_file_per_class = []  # file path
 img_prefix_per_class = []  # image prefix
 ann_shot_filter_per_class = []  # ann filter for each ann file
 for class_name in base_classes:
    ann_file_per_class.append(
        ann_file_root +
        f'{num_base_shot}shot/box_{num_base_shot}shot_{class_name}_train.txt')
    img_prefix_per_class.append(data_root)
    ann_shot_filter_per_class.append({class_name: num_base_shot})
 for class_name in novel_classes:
    ann_file_per_class.append(
        ann_file_root +
        f'{num_novel_shot}shot/box_{num_novel_shot}shot_{class_name}_train.txt'
    )
    img_prefix_per_class.append(data_root)
    ann_shot_filter_per_class.append({class_name: num_novel_shot})
 img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='Resize', img_scale=[(1333, 480), (1333, 800)], keep_ratio=True),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
 ]
 test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1333, 800),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img']),
        ])
 ]
 data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type='RepeatDataset',
        times=3,
        dataset=dict(
            type='FewShotVOCDataset',
            ann_file=ann_file_per_class,
            img_prefix=img_prefix_per_class,
            ann_masks=ann_shot_filter_per_class,
            pipeline=train_pipeline,
            classes=all_classes,
            merge_dataset=True)),
    val=dict(
        type='VOCDataset',
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        pipeline=test_pipeline,
        classes=novel_classes),
    test=dict(
        type='VOCDataset',
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        pipeline=test_pipeline,
        classes=novel_classes))
 evaluation = dict(interval=1, metric='mAP')
--- a/configs/detection/_base_/datasets/finetune_based/voc0712_split1_base.py
+++ b/configs/detection/_base_/datasets/finetune_based/voc0712_split1_base.py
@ -0,0 +1,92 @@
 ALL_CLASSES = {
    1: ('aeroplane', 'bicycle', 'boat', 'bottle', 'car', 'cat', 'chair',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'sheep',
        'train', 'tvmonitor', 'bird', 'bus', 'cow', 'motorbike', 'sofa'),
    2: ('bicycle', 'bird', 'boat', 'bus', 'car', 'cat', 'chair', 'diningtable',
        'dog', 'motorbike', 'person', 'pottedplant', 'sheep', 'train',
        'tvmonitor', 'aeroplane', 'bottle', 'cow', 'horse', 'sofa'),
    3: ('aeroplane', 'bicycle', 'bird', 'bottle', 'bus', 'car', 'chair', 'cow',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'train',
        'tvmonitor', 'boat', 'cat', 'motorbike', 'sheep', 'sofa')
 }
 NOVEL_CLASSES = {
    1: ('bird', 'bus', 'cow', 'motorbike', 'sofa'),
    2: ('aeroplane', 'bottle', 'cow', 'horse', 'sofa'),
    3: ('boat', 'cat', 'motorbike', 'sheep', 'sofa'),
 }
 BASE_CLASSES = {
    1: ('aeroplane', 'bicycle', 'boat', 'bottle', 'car', 'cat', 'chair',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'sheep',
        'train', 'tvmonitor'),
    2: ('bicycle', 'bird', 'boat', 'bus', 'car', 'cat', 'chair', 'diningtable',
        'dog', 'motorbike', 'person', 'pottedplant', 'sheep', 'train',
        'tvmonitor'),
    3: ('aeroplane', 'bicycle', 'bird', 'bottle', 'bus', 'car', 'chair', 'cow',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'train',
        'tvmonitor')
 }
 # dataset settings
 data_root = 'data/VOCdevkit/'
 # few shot setting
 split = 1
 base_classes = BASE_CLASSES[split]
 img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='Resize', img_scale=[(1333, 480), (1333, 800)], keep_ratio=True),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
 ]
 test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1333, 800),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img']),
        ])
 ]
 data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type='RepeatDataset',
        times=3,
        dataset=dict(
            type='VOCDataset',
            ann_file=[
                data_root + 'VOC2007/ImageSets/Main/trainval.txt',
                data_root + 'VOC2012/ImageSets/Main/trainval.txt'
            ],
            img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
            pipeline=train_pipeline,
            classes=base_classes)),
    val=dict(
        type='VOCDataset',
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        pipeline=test_pipeline,
        classes=base_classes),
    test=dict(
        type='VOCDataset',
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        pipeline=test_pipeline,
        classes=base_classes))
 evaluation = dict(interval=1, metric='mAP')
--- a/configs/detection/_base_/datasets/nway_kshot/voc0712_split1_1shot.py
+++ b/configs/detection/_base_/datasets/nway_kshot/voc0712_split1_1shot.py
@ -0,0 +1,128 @@
 ALL_CLASSES = {
    1: ('aeroplane', 'bicycle', 'boat', 'bottle', 'car', 'cat', 'chair',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'sheep',
        'train', 'tvmonitor', 'bird', 'bus', 'cow', 'motorbike', 'sofa'),
    2: ('bicycle', 'bird', 'boat', 'bus', 'car', 'cat', 'chair', 'diningtable',
        'dog', 'motorbike', 'person', 'pottedplant', 'sheep', 'train',
        'tvmonitor', 'aeroplane', 'bottle', 'cow', 'horse', 'sofa'),
    3: ('aeroplane', 'bicycle', 'bird', 'bottle', 'bus', 'car', 'chair', 'cow',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'train',
        'tvmonitor', 'boat', 'cat', 'motorbike', 'sheep', 'sofa')
 }
 NOVEL_CLASSES = {
    1: ('bird', 'bus', 'cow', 'motorbike', 'sofa'),
    2: ('aeroplane', 'bottle', 'cow', 'horse', 'sofa'),
    3: ('boat', 'cat', 'motorbike', 'sheep', 'sofa'),
 }
 BASE_CLASSES = {
    1: ('aeroplane', 'bicycle', 'boat', 'bottle', 'car', 'cat', 'chair',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'sheep',
        'train', 'tvmonitor'),
    2: ('bicycle', 'bird', 'boat', 'bus', 'car', 'cat', 'chair', 'diningtable',
        'dog', 'motorbike', 'person', 'pottedplant', 'sheep', 'train',
        'tvmonitor'),
    3: ('aeroplane', 'bicycle', 'bird', 'bottle', 'bus', 'car', 'chair', 'cow',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'train',
        'tvmonitor')
 }
 # dataset settings
 data_root = 'data/VOCdevkit/'
 split = 1
 all_classes = ALL_CLASSES[split]
 base_classes = BASE_CLASSES[split]
 novel_classes = NOVEL_CLASSES[split]
 num_base_shot = 1
 num_novel_shot = 1
 # load few shot data :
 # each ann file corresponding to one class
 # all file should use same image prefix
 ann_file_root = 'data/few_shot_voc_split/'
 ann_file_per_class = []  # file path
 img_prefix_per_class = []  # image prefix
 ann_shot_filter_per_class = []  # ann filter for each ann file
 for class_name in base_classes:
    ann_file_per_class.append(
        ann_file_root +
        f'{num_base_shot}shot/box_{num_base_shot}shot_{class_name}_train.txt')
    img_prefix_per_class.append(data_root)
    ann_shot_filter_per_class.append({class_name: num_base_shot})
 for class_name in novel_classes:
    ann_file_per_class.append(
        ann_file_root +
        f'{num_novel_shot}shot/box_{num_novel_shot}shot_{class_name}_train.txt'
    )
    img_prefix_per_class.append(data_root)
    ann_shot_filter_per_class.append({class_name: num_novel_shot})
 img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 train_pipeline = dict(
    query=[
        dict(type='LoadImageFromFile'),
        dict(type='LoadAnnotations', with_bbox=True),
        dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
        dict(type='RandomFlip', flip_ratio=0.5),
        dict(type='Normalize', **img_norm_cfg),
        dict(type='Pad', size_divisor=32),
        dict(type='DefaultFormatBundle'),
        dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
    ],
    support=[
        dict(type='LoadImageFromFile'),
        dict(type='LoadAnnotations', with_bbox=True),
        dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
        dict(type='RandomFlip', flip_ratio=0.5),
        dict(type='Normalize', **img_norm_cfg),
        dict(type='Pad', size_divisor=32),
        dict(type='DefaultFormatBundle'),
        dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
    ])
 test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1000, 600),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img']),
        ])
 ]
 data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type='NwayKshotDataset',
        support_way=20,
        support_shot=1,
        dataset=dict(
            type='FewShotVOCDataset',
            ann_file=ann_file_per_class,
            img_prefix=img_prefix_per_class,
            ann_masks=ann_shot_filter_per_class,
            pipeline=train_pipeline,
            classes=all_classes,
            merge_dataset=True)),
    val=dict(
        type='VOCDataset',
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        pipeline=test_pipeline,
        classes=novel_classes),
    test=dict(
        type='VOCDataset',
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        pipeline=test_pipeline,
        classes=novel_classes))
 evaluation = dict(interval=10, metric='mAP')
--- a/configs/detection/_base_/datasets/nway_kshot/voc0712_split1_base.py
+++ b/configs/detection/_base_/datasets/nway_kshot/voc0712_split1_base.py
@ -0,0 +1,106 @@
 ALL_CLASSES = {
    1: ('aeroplane', 'bicycle', 'boat', 'bottle', 'car', 'cat', 'chair',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'sheep',
        'train', 'tvmonitor', 'bird', 'bus', 'cow', 'motorbike', 'sofa'),
    2: ('bicycle', 'bird', 'boat', 'bus', 'car', 'cat', 'chair', 'diningtable',
        'dog', 'motorbike', 'person', 'pottedplant', 'sheep', 'train',
        'tvmonitor', 'aeroplane', 'bottle', 'cow', 'horse', 'sofa'),
    3: ('aeroplane', 'bicycle', 'bird', 'bottle', 'bus', 'car', 'chair', 'cow',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'train',
        'tvmonitor', 'boat', 'cat', 'motorbike', 'sheep', 'sofa')
 }
 NOVEL_CLASSES = {
    1: ('bird', 'bus', 'cow', 'motorbike', 'sofa'),
    2: ('aeroplane', 'bottle', 'cow', 'horse', 'sofa'),
    3: ('boat', 'cat', 'motorbike', 'sheep', 'sofa'),
 }
 BASE_CLASSES = {
    1: ('aeroplane', 'bicycle', 'boat', 'bottle', 'car', 'cat', 'chair',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'sheep',
        'train', 'tvmonitor'),
    2: ('bicycle', 'bird', 'boat', 'bus', 'car', 'cat', 'chair', 'diningtable',
        'dog', 'motorbike', 'person', 'pottedplant', 'sheep', 'train',
        'tvmonitor'),
    3: ('aeroplane', 'bicycle', 'bird', 'bottle', 'bus', 'car', 'chair', 'cow',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'train',
        'tvmonitor')
 }
 # dataset settings
 data_root = 'data/VOCdevkit/'
 # few shot setting
 split = 1
 base_classes = BASE_CLASSES[split]
 img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 train_pipeline = dict(
    query=[
        dict(type='LoadImageFromFile'),
        dict(type='LoadAnnotations', with_bbox=True),
        dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
        dict(type='RandomFlip', flip_ratio=0.5),
        dict(type='Normalize', **img_norm_cfg),
        dict(type='Pad', size_divisor=32),
        dict(type='DefaultFormatBundle'),
        dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
    ],
    support=[
        dict(type='LoadImageFromFile'),
        dict(type='LoadAnnotations', with_bbox=True),
        dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
        dict(type='RandomFlip', flip_ratio=0.5),
        dict(type='Normalize', **img_norm_cfg),
        dict(type='Pad', size_divisor=32),
        dict(type='DefaultFormatBundle'),
        dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
    ])
 test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1000, 600),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img']),
        ])
 ]
 data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type='NwayKshotDataset',
        support_way=15,
        support_shot=1,
        dataset=dict(
            type='FewShotVOCDataset',
            ann_file=[
                data_root + 'VOC2007/ImageSets/Main/trainval.txt',
                data_root + 'VOC2012/ImageSets/Main/trainval.txt'
            ],
            img_prefix=[data_root, data_root],
            pipeline=train_pipeline,
            classes=base_classes,
            merge_dataset=True,
        )),
    val=dict(
        type='VOCDataset',
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        pipeline=test_pipeline,
        classes=base_classes),
    test=dict(
        type='VOCDataset',
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        pipeline=test_pipeline,
        classes=base_classes))
 evaluation = dict(interval=1, metric='mAP')
--- a/configs/detection/_base_/datasets/query_aware/voc0712_split1_1shot.py
+++ b/configs/detection/_base_/datasets/query_aware/voc0712_split1_1shot.py
@ -0,0 +1,128 @@
 ALL_CLASSES = {
    1: ('aeroplane', 'bicycle', 'boat', 'bottle', 'car', 'cat', 'chair',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'sheep',
        'train', 'tvmonitor', 'bird', 'bus', 'cow', 'motorbike', 'sofa'),
    2: ('bicycle', 'bird', 'boat', 'bus', 'car', 'cat', 'chair', 'diningtable',
        'dog', 'motorbike', 'person', 'pottedplant', 'sheep', 'train',
        'tvmonitor', 'aeroplane', 'bottle', 'cow', 'horse', 'sofa'),
    3: ('aeroplane', 'bicycle', 'bird', 'bottle', 'bus', 'car', 'chair', 'cow',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'train',
        'tvmonitor', 'boat', 'cat', 'motorbike', 'sheep', 'sofa')
 }
 NOVEL_CLASSES = {
    1: ('bird', 'bus', 'cow', 'motorbike', 'sofa'),
    2: ('aeroplane', 'bottle', 'cow', 'horse', 'sofa'),
    3: ('boat', 'cat', 'motorbike', 'sheep', 'sofa'),
 }
 BASE_CLASSES = {
    1: ('aeroplane', 'bicycle', 'boat', 'bottle', 'car', 'cat', 'chair',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'sheep',
        'train', 'tvmonitor'),
    2: ('bicycle', 'bird', 'boat', 'bus', 'car', 'cat', 'chair', 'diningtable',
        'dog', 'motorbike', 'person', 'pottedplant', 'sheep', 'train',
        'tvmonitor'),
    3: ('aeroplane', 'bicycle', 'bird', 'bottle', 'bus', 'car', 'chair', 'cow',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'train',
        'tvmonitor')
 }
 # dataset settings
 data_root = 'data/VOCdevkit/'
 split = 1
 all_classes = ALL_CLASSES[split]
 base_classes = BASE_CLASSES[split]
 novel_classes = NOVEL_CLASSES[split]
 num_base_shot = 1
 num_novel_shot = 1
 # load few shot data :
 # each ann file corresponding to one class
 # all file should use same image prefix
 ann_file_root = 'data/few_shot_voc_split/'
 ann_file_per_class = []  # file path
 img_prefix_per_class = []  # image prefix
 ann_shot_filter_per_class = []  # ann filter for each ann file
 for class_name in base_classes:
    ann_file_per_class.append(
        ann_file_root +
        f'{num_base_shot}shot/box_{num_base_shot}shot_{class_name}_train.txt')
    img_prefix_per_class.append(data_root)
    ann_shot_filter_per_class.append({class_name: num_base_shot})
 for class_name in novel_classes:
    ann_file_per_class.append(
        ann_file_root +
        f'{num_novel_shot}shot/box_{num_novel_shot}shot_{class_name}_train.txt'
    )
    img_prefix_per_class.append(data_root)
    ann_shot_filter_per_class.append({class_name: num_novel_shot})
 img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 train_pipeline = dict(
    query=[
        dict(type='LoadImageFromFile'),
        dict(type='LoadAnnotations', with_bbox=True),
        dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
        dict(type='RandomFlip', flip_ratio=0.5),
        dict(type='Normalize', **img_norm_cfg),
        dict(type='Pad', size_divisor=32),
        dict(type='DefaultFormatBundle'),
        dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
    ],
    support=[
        dict(type='LoadImageFromFile'),
        dict(type='LoadAnnotations', with_bbox=True),
        dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
        dict(type='RandomFlip', flip_ratio=0.5),
        dict(type='Normalize', **img_norm_cfg),
        dict(type='Pad', size_divisor=32),
        dict(type='DefaultFormatBundle'),
        dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
    ])
 test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1000, 600),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img']),
        ])
 ]
 data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type='QueryAwareDataset',
        support_way=2,
        support_shot=1,
        dataset=dict(
            type='FewShotVOCDataset',
            ann_file=ann_file_per_class,
            img_prefix=img_prefix_per_class,
            ann_masks=ann_shot_filter_per_class,
            pipeline=train_pipeline,
            classes=all_classes,
            merge_dataset=True)),
    val=dict(
        type='VOCDataset',
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        pipeline=test_pipeline,
        classes=novel_classes),
    test=dict(
        type='VOCDataset',
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        pipeline=test_pipeline,
        classes=novel_classes))
 evaluation = dict(interval=1, metric='mAP')
--- a/configs/detection/_base_/datasets/query_aware/voc0712_split1_base.py
+++ b/configs/detection/_base_/datasets/query_aware/voc0712_split1_base.py
@ -0,0 +1,164 @@
 VOC_FEW_SHOT_SPLIT_ALL_CLASSES = {
    1: ('aeroplane', 'bicycle', 'boat', 'bottle', 'car', 'cat', 'chair',
        'diningtable', 'dog', 'horse', 'person', 'pottedplant', 'sheep',
        'train', 'tvmonitor', 'bird', 'bus', 'cow', 'motorbike', 'sofa'),
    2: ('bicycle', 'bird', 'boat', 'bus', 'car', 'cat', 'chair', 'diningtable',
        'dog', 'motorbike', 'person', 'pottedplant', 'sheep', 'train',
        'tvmonitor', 'aeroplane', 'bottle', 'cow', 'horse', 'sofa'),
    3: (
        'aeroplane',
        'bicycle',
        'bird',
        'bottle',
        'bus',
        'car',
        'chair',
        'cow',
        'diningtable',
        'dog',
        'horse',
        'person',
        'pottedplant',
        'train',
        'tvmonitor',
        'boat',
        'cat',
        'motorbike',
        'sheep',
        'sofa',
    ),
 }
 VOC_FEW_SHOT_SPLIT_NOVEL_CLASSES = {
    1: ('bird', 'bus', 'cow', 'motorbike', 'sofa'),
    2: ('aeroplane', 'bottle', 'cow', 'horse', 'sofa'),
    3: ('boat', 'cat', 'motorbike', 'sheep', 'sofa'),
 }
 VOC_FEW_SHOT_SPLIT_BASE_CLASSES = {
    1: (
        'aeroplane',
        'bicycle',
        'boat',
        'bottle',
        'car',
        'cat',
        'chair',
        'diningtable',
        'dog',
        'horse',
        'person',
        'pottedplant',
        'sheep',
        'train',
        'tvmonitor',
    ),
    2: (
        'bicycle',
        'bird',
        'boat',
        'bus',
        'car',
        'cat',
        'chair',
        'diningtable',
        'dog',
        'motorbike',
        'person',
        'pottedplant',
        'sheep',
        'train',
        'tvmonitor',
    ),
    3: (
        'aeroplane',
        'bicycle',
        'bird',
        'bottle',
        'bus',
        'car',
        'chair',
        'cow',
        'diningtable',
        'dog',
        'horse',
        'person',
        'pottedplant',
        'train',
        'tvmonitor',
    ),
 }
 # dataset settings
 data_root = 'data/VOCdevkit/'
 # few shot setting
 split = 1
 base_classes = VOC_FEW_SHOT_SPLIT_BASE_CLASSES[split]
 img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 train_pipeline = dict(
    query=[
        dict(type='LoadImageFromFile'),
        dict(type='LoadAnnotations', with_bbox=True),
        dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
        dict(type='RandomFlip', flip_ratio=0.5),
        dict(type='Normalize', **img_norm_cfg),
        dict(type='Pad', size_divisor=32),
        dict(type='DefaultFormatBundle'),
        dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
    ],
    support=[
        dict(type='LoadImageFromFile'),
        dict(type='LoadAnnotations', with_bbox=True),
        dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
        dict(type='RandomFlip', flip_ratio=0.5),
        dict(type='Normalize', **img_norm_cfg),
        dict(type='Pad', size_divisor=32),
        dict(type='DefaultFormatBundle'),
        dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
    ])
 test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1000, 600),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img']),
        ])
 ]
 data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type='QueryAwareDataset',
        support_way=2,
        support_shot=5,
        dataset=dict(
            type='FewShotVOCDataset',
            ann_file=[
                data_root + 'VOC2007/ImageSets/Main/trainval.txt',
                data_root + 'VOC2012/ImageSets/Main/trainval.txt'
            ],
            img_prefix=[data_root, data_root],
            pipeline=train_pipeline,
            classes=base_classes,
            merge_dataset=True)),
    val=dict(
        type='VOCDataset',
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        pipeline=test_pipeline,
        classes=base_classes),
    test=dict(
        type='VOCDataset',
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        pipeline=test_pipeline,
        classes=base_classes))
 evaluation = dict(interval=1, metric='mAP')
--- a/configs/detection/_base_/datasets/voc0712.py
+++ b/configs/detection/_base_/datasets/voc0712.py
@ -1,55 +0,0 @@
 # dataset settings
 dataset_type = 'VOCDataset'
 data_root = 'data/VOCdevkit/'
 img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
 train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations', with_bbox=True),
    dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
 ]
 test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1000, 600),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img']),
        ])
 ]
 data = dict(
    samples_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type='RepeatDataset',
        times=3,
        dataset=dict(
            type=dataset_type,
            ann_file=[
                data_root + 'VOC2007/ImageSets/Main/trainval.txt',
                data_root + 'VOC2012/ImageSets/Main/trainval.txt'
            ],
            img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],
            pipeline=train_pipeline)),
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        pipeline=test_pipeline),
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',
        img_prefix=data_root + 'VOC2007/',
        pipeline=test_pipeline))
 evaluation = dict(interval=1, metric='mAP')
--- a/mmfewshot/apis/train.py
+++ b/mmfewshot/apis/train.py
@ -3,7 +3,8 @@ import random
 import numpy as np
 import torch
 from mmcls.apis.train import train_model as train_classifier
-from mmdet.apis.train import train_detector
+
 from mmfewshot.detection.apis.train import train_detector
 def set_random_seed(seed, deterministic=False):
--- a/mmfewshot/builders/dataset_builder.py
+++ b/mmfewshot/builders/dataset_builder.py
@ -1,7 +1,11 @@
 # this file only for unittests
 from mmcls.datasets.builder import build_dataloader as build_cls_dataloader
 from mmcls.datasets.builder import build_dataset as build_cls_dataset
-from mmdet.datasets.builder import build_dataloader as build_det_dataloader
+
-from mmdet.datasets.builder import build_dataset as build_det_dataset
+from mmfewshot.detection.datasets.builder import \
    build_dataloader as build_det_dataloader
 from mmfewshot.detection.datasets.builder import \
    build_dataset as build_det_dataset
 def build_dataloader(dataset=None, task_type='mmdet', round_up=True, **kwargs):
--- a/mmfewshot/builders/model_builder.py
+++ b/mmfewshot/builders/model_builder.py
@ -1,3 +1,4 @@
 # this file only for unittests
 from mmcls.models.builder import build_classifier as build_cls_model
 from mmdet.models.builder import build_detector as build_det_model
--- a/mmfewshot/detection/apis/init.py
+++ b/mmfewshot/detection/apis/init.py
@ -0,0 +1,3 @@
 from .train import get_root_logger, set_random_seed, train_detector
 __all__ = ['get_root_logger', 'set_random_seed', 'train_detector']
--- a/mmfewshot/detection/apis/train.py
+++ b/mmfewshot/detection/apis/train.py
@ -0,0 +1,170 @@
 import random
 import warnings
 import numpy as np
 import torch
 from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
 from mmcv.runner import (HOOKS, DistSamplerSeedHook, EpochBasedRunner,
                         Fp16OptimizerHook, OptimizerHook, build_optimizer,
                         build_runner)
 from mmcv.utils import build_from_cfg
 from mmdet.core import DistEvalHook, EvalHook
 from mmdet.datasets import replace_ImageToTensor
 from mmdet.utils import get_root_logger
 from mmfewshot.detection.datasets import build_dataloader, build_dataset
 def set_random_seed(seed, deterministic=False):
    """Set random seed.
    Args:
        seed (int): Seed to be used.
        deterministic (bool): Whether to set the deterministic option for
            CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`
            to True and `torch.backends.cudnn.benchmark` to False.
            Default: False.
    """
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    if deterministic:
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
 def train_detector(model,
                   dataset,
                   cfg,
                   distributed=False,
                   validate=False,
                   timestamp=None,
                   meta=None):
    logger = get_root_logger(cfg.log_level)
    # prepare data loaders
    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]
    if 'imgs_per_gpu' in cfg.data:
        logger.warning('"imgs_per_gpu" is deprecated in MMDet V2.0. '
                       'Please use "samples_per_gpu" instead')
        if 'samples_per_gpu' in cfg.data:
            logger.warning(
                f'Got "imgs_per_gpu"={cfg.data.imgs_per_gpu} and '
                f'"samples_per_gpu"={cfg.data.samples_per_gpu}, "imgs_per_gpu"'
                f'={cfg.data.imgs_per_gpu} is used in this experiments')
        else:
            logger.warning(
                'Automatically set "samples_per_gpu"="imgs_per_gpu"='
                f'{cfg.data.imgs_per_gpu} in this experiments')
        cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu
    data_loaders = [
        build_dataloader(
            ds,
            cfg.data.samples_per_gpu,
            cfg.data.workers_per_gpu,
            # cfg.gpus will be ignored if distributed
            len(cfg.gpu_ids),
            dist=distributed,
            seed=cfg.seed) for ds in dataset
    ]
    # put model on gpus
    if distributed:
        find_unused_parameters = cfg.get('find_unused_parameters', False)
        # Sets the `find_unused_parameters` parameter in
        # torch.nn.parallel.DistributedDataParallel
        model = MMDistributedDataParallel(
            model.cuda(),
            device_ids=[torch.cuda.current_device()],
            broadcast_buffers=False,
            find_unused_parameters=find_unused_parameters)
    else:
        model = MMDataParallel(
            model.cuda(cfg.gpu_ids[0]), device_ids=cfg.gpu_ids)
    # build runner
    optimizer = build_optimizer(model, cfg.optimizer)
    if 'runner' not in cfg:
        cfg.runner = {
            'type': 'EpochBasedRunner',
            'max_epochs': cfg.total_epochs
        }
        warnings.warn(
            'config is now expected to have a `runner` section, '
            'please set `runner` in your config.', UserWarning)
    else:
        if 'total_epochs' in cfg:
            assert cfg.total_epochs == cfg.runner.max_epochs
    runner = build_runner(
        cfg.runner,
        default_args=dict(
            model=model,
            optimizer=optimizer,
            work_dir=cfg.work_dir,
            logger=logger,
            meta=meta))
    # an ugly workaround to make .log and .log.json filenames the same
    runner.timestamp = timestamp
    # fp16 setting
    fp16_cfg = cfg.get('fp16', None)
    if fp16_cfg is not None:
        optimizer_config = Fp16OptimizerHook(
            **cfg.optimizer_config, **fp16_cfg, distributed=distributed)
    elif distributed and 'type' not in cfg.optimizer_config:
        optimizer_config = OptimizerHook(**cfg.optimizer_config)
    else:
        optimizer_config = cfg.optimizer_config
    # register hooks
    runner.register_training_hooks(cfg.lr_config, optimizer_config,
                                   cfg.checkpoint_config, cfg.log_config,
                                   cfg.get('momentum_config', None))
    if distributed:
        if isinstance(runner, EpochBasedRunner):
            runner.register_hook(DistSamplerSeedHook())
    # register eval hooks
    if validate:
        # Support batch_size > 1 in validation
        val_samples_per_gpu = cfg.data.val.pop('samples_per_gpu', 1)
        if val_samples_per_gpu > 1:
            # Replace 'ImageToTensor' to 'DefaultFormatBundle'
            cfg.data.val.pipeline = replace_ImageToTensor(
                cfg.data.val.pipeline)
        val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))
        val_dataloader = build_dataloader(
            val_dataset,
            samples_per_gpu=val_samples_per_gpu,
            workers_per_gpu=cfg.data.workers_per_gpu,
            dist=distributed,
            shuffle=False)
        eval_cfg = cfg.get('evaluation', {})
        eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'
        eval_hook = DistEvalHook if distributed else EvalHook
        runner.register_hook(eval_hook(val_dataloader, **eval_cfg))
    # user-defined hooks
    if cfg.get('custom_hooks', None):
        custom_hooks = cfg.custom_hooks
        assert isinstance(custom_hooks, list), \
            f'custom_hooks expect list type, but got {type(custom_hooks)}'
        for hook_cfg in cfg.custom_hooks:
            assert isinstance(hook_cfg, dict), \
                'Each item in custom_hooks expects dict type, but got ' \
                f'{type(hook_cfg)}'
            hook_cfg = hook_cfg.copy()
            priority = hook_cfg.pop('priority', 'NORMAL')
            hook = build_from_cfg(hook_cfg, HOOKS)
            runner.register_hook(hook, priority=priority)
    if cfg.resume_from:
        runner.resume(cfg.resume_from)
    elif cfg.load_from:
        runner.load_checkpoint(cfg.load_from)
    runner.run(data_loaders, cfg.workflow)
--- a/mmfewshot/detection/datasets/init.py
+++ b/mmfewshot/detection/datasets/init.py
@ -1,3 +1,18 @@
-from .base_meta_learning_dataset import BaseMetaLearingDataset
+from .builder import build_dataloader, build_dataset
 from .dataloader_wrappers import NwayKshotDataloader
 from .dataset_wrappers import MergeDataset, NwayKshotDataset, QueryAwareDataset
 from .few_shot_custom import FewShotCustomDataset
 from .utils import query_support_collate_fn
 from .voc import FewShotVOCDataset
-__all__ = ['BaseMetaLearingDataset']
+__all__ = [
    'build_dataloader',
    'build_dataset',
    'MergeDataset',
    'QueryAwareDataset',
    'NwayKshotDataset',
    'NwayKshotDataloader',
    'query_support_collate_fn',
    'FewShotCustomDataset',
    'FewShotVOCDataset',
 ]
--- a/mmfewshot/detection/datasets/base_meta_learning_dataset.py
+++ b/mmfewshot/detection/datasets/base_meta_learning_dataset.py
@ -1,8 +0,0 @@
 # jsut an example
 from mmdet.datasets.builder import DATASETS
 from mmdet.datasets.custom import CustomDataset
@DATASETS.register_module()
 class BaseMetaLearingDataset(CustomDataset):
    pass
--- a/mmfewshot/detection/datasets/builder.py
+++ b/mmfewshot/detection/datasets/builder.py
@ -0,0 +1,226 @@
 import copy
 from functools import partial
 from mmcv.parallel import collate
 from mmcv.runner import get_dist_info
 from mmcv.utils import build_from_cfg
 from mmdet.datasets.builder import DATASETS, worker_init_fn
 from mmdet.datasets.dataset_wrappers import (ClassBalancedDataset,
                                             ConcatDataset, RepeatDataset)
 from mmdet.datasets.samplers import (DistributedGroupSampler,
                                     DistributedSampler, GroupSampler)
 from torch.utils.data import DataLoader
 from .dataset_wrappers import MergeDataset, NwayKshotDataset, QueryAwareDataset
 def _concat_dataset(cfg, default_args=None):
    ann_files = cfg['ann_file']
    img_prefixes = cfg.get('img_prefix', None)
    seg_prefixes = cfg.get('seg_prefix', None)
    proposal_files = cfg.get('proposal_file', None)
    separate_eval = cfg.get('separate_eval', True)
    merge_dataset = cfg.get('merge_dataset', False)
    ann_shot_filter = cfg.get('ann_shot_filter', None)
    if ann_shot_filter is not None:
        assert merge_dataset, 'using ann shot filter to load ann file ' \
              'in FewShotDataset, merge_dataset should be set to True.'
    datasets = []
    num_dset = len(ann_files)
    for i in range(num_dset):
        data_cfg = copy.deepcopy(cfg)
        # pop 'separate_eval' since it is not a valid key for common datasets.
        if 'separate_eval' in data_cfg:
            data_cfg.pop('separate_eval')
        if 'merge_dataset' in data_cfg:
            data_cfg.pop('merge_dataset')
        data_cfg['ann_file'] = ann_files[i]
        if isinstance(img_prefixes, (list, tuple)):
            data_cfg['img_prefix'] = img_prefixes[i]
        if isinstance(seg_prefixes, (list, tuple)):
            data_cfg['seg_prefix'] = seg_prefixes[i]
        if isinstance(proposal_files, (list, tuple)):
            data_cfg['proposal_file'] = proposal_files[i]
        if isinstance(ann_shot_filter, (list, tuple)):
            data_cfg['ann_shot_filter'] = ann_shot_filter[i]
        datasets.append(build_dataset(data_cfg, default_args))
    if merge_dataset:
        return MergeDataset(datasets)
    else:
        return ConcatDataset(datasets, separate_eval)
 def build_dataset(cfg, default_args=None):
    if isinstance(cfg, (list, tuple)):
        dataset = ConcatDataset([build_dataset(c, default_args) for c in cfg])
    elif cfg['type'] == 'ConcatDataset':
        dataset = ConcatDataset(
            [build_dataset(c, default_args) for c in cfg['datasets']],
            cfg.get('separate_eval', True))
    elif cfg['type'] == 'RepeatDataset':
        dataset = RepeatDataset(
            build_dataset(cfg['dataset'], default_args), cfg['times'])
    elif cfg['type'] == 'ClassBalancedDataset':
        dataset = ClassBalancedDataset(
            build_dataset(cfg['dataset'], default_args), cfg['oversample_thr'])
    elif cfg['type'] == 'QueryAwareDataset':
        dataset = QueryAwareDataset(
            build_dataset(cfg['dataset'], default_args), cfg['support_way'],
            cfg['support_shot'])
    elif cfg['type'] == 'NwayKshotDataset':
        dataset = NwayKshotDataset(
            build_dataset(cfg['dataset'], default_args), cfg['support_way'],
            cfg['support_shot'])
    elif isinstance(cfg.get('ann_file'), (list, tuple)):
        dataset = _concat_dataset(cfg, default_args)
    else:
        dataset = build_from_cfg(cfg, DATASETS, default_args)
    return dataset
 def build_dataloader(dataset,
                     samples_per_gpu,
                     workers_per_gpu,
                     num_gpus=1,
                     dist=True,
                     shuffle=True,
                     seed=None,
                     **kwargs):
    """Build PyTorch DataLoader.
    In distributed training, each GPU/process has a dataloader.
    In non-distributed training, there is only one dataloader for all GPUs.
    Args:
        dataset (Dataset): A PyTorch dataset.
        samples_per_gpu (int): Number of training samples on each GPU, i.e.,
            batch size of each GPU.
        workers_per_gpu (int): How many subprocesses to use for data loading
            for each GPU.
        num_gpus (int): Number of GPUs. Only used in non-distributed training.
        dist (bool): Distributed training/test or not. Default: True.
        shuffle (bool): Whether to shuffle the data at every epoch.
            Default: True.
        seed (int): Random seed.
        kwargs: any keyword argument to be used to initialize DataLoader
    Returns:
        DataLoader: A PyTorch dataloader.
    """
    rank, world_size = get_dist_info()
    (sampler, batch_size, num_workers) \
        = build_sampler(dist=dist,
                        shuffle=shuffle,
                        dataset=dataset,
                        num_gpus=num_gpus,
                        samples_per_gpu=samples_per_gpu,
                        workers_per_gpu=workers_per_gpu,
                        seed=seed, )
    init_fn = partial(
        worker_init_fn, num_workers=num_workers, rank=rank,
        seed=seed) if seed is not None else None
    if isinstance(dataset, QueryAwareDataset):
        from .utils import query_support_collate_fn
        data_loader = DataLoader(
            dataset,
            batch_size=batch_size,
            sampler=sampler,
            num_workers=num_workers,
            collate_fn=partial(
                query_support_collate_fn, samples_per_gpu=samples_per_gpu),
            pin_memory=False,
            worker_init_fn=init_fn,
            **kwargs)
    elif isinstance(dataset, NwayKshotDataset):
        from .dataloader_wrappers import NwayKshotDataloader
        from .utils import query_support_collate_fn
        # init query dataloader
        query_data_loader = DataLoader(
            dataset,
            batch_size=batch_size,
            sampler=sampler,
            num_workers=num_workers,
            collate_fn=partial(
                query_support_collate_fn, samples_per_gpu=samples_per_gpu),
            pin_memory=False,
            worker_init_fn=init_fn,
            **kwargs)
        # creat support dataset from query dataset and
        # pre sample batch index with same length as query dataloader
        support_dataset = copy.deepcopy(dataset)
        support_dataset.convert_query_to_support(len(query_data_loader))
        (support_sampler, _, _) \
            = build_sampler(dist=dist,
                            shuffle=shuffle,
                            dataset=support_dataset,
                            num_gpus=num_gpus,
                            samples_per_gpu=1,
                            workers_per_gpu=workers_per_gpu,
                            seed=seed,
                            )
        data_loader = NwayKshotDataloader(
            query_data_loader=query_data_loader,
            support_dataset=support_dataset,
            support_sampler=support_sampler,
            num_workers=num_workers,
            support_collate_fn=partial(
                query_support_collate_fn, samples_per_gpu=1),
            pin_memory=False,
            worker_init_fn=init_fn,
            **kwargs)
    else:
        data_loader = DataLoader(
            dataset,
            batch_size=batch_size,
            sampler=sampler,
            num_workers=num_workers,
            collate_fn=partial(collate, samples_per_gpu=samples_per_gpu),
            pin_memory=False,
            worker_init_fn=init_fn,
            **kwargs)
    return data_loader
 def build_sampler(dist, shuffle, dataset, num_gpus, samples_per_gpu,
                  workers_per_gpu, seed):
    """Build pytorch sampler for dataLoader.
    Args:
        dist (bool): Distributed training/test or not.
        shuffle (bool): Whether to shuffle the data at every epoch.
        dataset (Dataset): A PyTorch dataset.
        num_gpus (int): Number of GPUs. Only used in non-distributed training.
        samples_per_gpu (int): Number of training samples on each GPU, i.e.,
            batch size of each GPU.
        workers_per_gpu (int): How many subprocesses to use for data loading
            for each GPU.
        seed (int): Random seed.
    Returns:
    """
    rank, world_size = get_dist_info()
    if dist:
        # DistributedGroupSampler will definitely shuffle the data to satisfy
        # that images on each GPU are in the same group
        if shuffle:
            sampler = DistributedGroupSampler(
                dataset, samples_per_gpu, world_size, rank, seed=seed)
        else:
            sampler = DistributedSampler(
                dataset, world_size, rank, shuffle=False, seed=seed)
        batch_size = samples_per_gpu
        num_workers = workers_per_gpu
    else:
        sampler = GroupSampler(dataset, samples_per_gpu) if shuffle else None
        batch_size = num_gpus * samples_per_gpu
        num_workers = num_gpus * workers_per_gpu
    return sampler, batch_size, num_workers
--- a/mmfewshot/detection/datasets/coco.py
+++ b/mmfewshot/detection/datasets/coco.py
@ -0,0 +1,536 @@
 import itertools
 import logging
 import os.path as osp
 import tempfile
 import warnings
 from collections import OrderedDict
 import mmcv
 import numpy as np
 from mmcv.utils import print_log
 from mmdet.core import eval_recalls
 from mmdet.datasets.api_wrappers import COCO, COCOeval
 from mmdet.datasets.builder import DATASETS
 from terminaltables import AsciiTable
 from .few_shot_custom import FewShotCustomDataset
@DATASETS.register_module()
 class FewShotCocoDataset(FewShotCustomDataset):
    def __init__(self, **kwargs):
        assert self.CLASSES or kwargs.get('classes', None),\
            'CLASSES in `FewShotCocoDataset` can not be None.'
        super(FewShotCocoDataset, self).__init__(**kwargs)
    def load_annotations(self, ann_file):
        """Load annotation from COCO style annotation file.
        Args:
            ann_file (str): Path of annotation file.
        Returns:
            list[dict]: Annotation info from COCO api.
        """
        self.coco = COCO(ann_file)
        self.cat_ids = []
        self.cat2label = {}
        # to keep the label order equal to the order in CLASSES
        for i, class_name in enumerate(self.CLASSES):
            cat_id = self.coco.get_cat_ids(cat_names=[class_name])[0]
            self.cat_ids.append(cat_id)
            self.cat2label[cat_id] = i
        self.img_ids = self.coco.get_img_ids()
        data_infos = []
        total_ann_ids = []
        for i in self.img_ids:
            info = self.coco.load_imgs([i])[0]
            info['filename'] = info['file_name']
            info['ann'] = self._get_ann_info(info)
            data_infos.append(info)
            ann_ids = self.coco.get_ann_ids(img_ids=[i])
            total_ann_ids.extend(ann_ids)
        assert len(set(total_ann_ids)) == len(
            total_ann_ids), f"Annotation ids in '{ann_file}' are not unique!"
        return data_infos
    def _get_ann_info(self, data_info):
        """Get COCO annotation by index.
        Args:
            data_info dict: Data info.
        Returns:
            dict: Annotation info of specified index.
        """
        img_id = data_info['id']
        ann_ids = self.coco.get_ann_ids(img_ids=[img_id])
        ann_info = self.coco.load_anns(ann_ids)
        return self._parse_ann_info(data_info, ann_info)
    def _filter_imgs(self, min_size=32):
        """Filter images too small or without ground truths."""
        valid_inds = []
        # obtain images that contain annotation
        ids_with_ann = set(_['image_id'] for _ in self.coco.anns.values())
        # obtain images that contain annotations of the required categories
        ids_in_cat = set()
        for i, class_id in enumerate(self.cat_ids):
            ids_in_cat |= set(self.coco.cat_img_map[class_id])
        # merge the image id sets of the two conditions and use the merged set
        # to filter out images if self.filter_empty_gt=True
        ids_in_cat &= ids_with_ann
        valid_img_ids = []
        for i, img_info in enumerate(self.data_infos):
            img_id = self.img_ids[i]
            if self.filter_empty_gt and img_id not in ids_in_cat:
                continue
            if min(img_info['width'], img_info['height']) >= min_size:
                valid_inds.append(i)
                valid_img_ids.append(img_id)
        self.img_ids = valid_img_ids
        return valid_inds
    def _parse_ann_info(self, img_info, ann_info):
        """Parse bbox and mask annotation.
        Args:
            img_info (dict): Image info.
            ann_info (list[dict]): Annotation info of an image.
        Returns:
            dict: A dict containing the following keys: bboxes, bboxes_ignore,\
                labels, masks, seg_map. "masks" are raw annotations and not \
                decoded into binary masks.
        """
        gt_bboxes = []
        gt_labels = []
        gt_bboxes_ignore = []
        gt_masks_ann = []
        for i, ann in enumerate(ann_info):
            if ann.get('ignore', False):
                continue
            x1, y1, w, h = ann['bbox']
            inter_w = max(0, min(x1 + w, img_info['width']) - max(x1, 0))
            inter_h = max(0, min(y1 + h, img_info['height']) - max(y1, 0))
            if inter_w * inter_h == 0:
                continue
            if ann['area'] <= 0 or w < 1 or h < 1:
                continue
            if ann['category_id'] not in self.cat_ids:
                continue
            bbox = [x1, y1, x1 + w, y1 + h]
            if ann.get('iscrowd', False):
                gt_bboxes_ignore.append(bbox)
            else:
                gt_bboxes.append(bbox)
                gt_labels.append(self.cat2label[ann['category_id']])
                gt_masks_ann.append(ann.get('segmentation', None))
        if gt_bboxes:
            gt_bboxes = np.array(gt_bboxes, dtype=np.float32)
            gt_labels = np.array(gt_labels, dtype=np.int64)
        else:
            gt_bboxes = np.zeros((0, 4), dtype=np.float32)
            gt_labels = np.array([], dtype=np.int64)
        if gt_bboxes_ignore:
            gt_bboxes_ignore = np.array(gt_bboxes_ignore, dtype=np.float32)
        else:
            gt_bboxes_ignore = np.zeros((0, 4), dtype=np.float32)
        seg_map = img_info['filename'].replace('jpg', 'png')
        ann = dict(
            bboxes=gt_bboxes,
            labels=gt_labels,
            bboxes_ignore=gt_bboxes_ignore,
            masks=gt_masks_ann,
            seg_map=seg_map)
        return ann
    def xyxy2xywh(self, bbox):
        """Convert ``xyxy`` style bounding boxes to ``xywh`` style for COCO
        evaluation.
        Args:
            bbox (numpy.ndarray): The bounding boxes, shape (4, ), in
                ``xyxy`` order.
        Returns:
            list[float]: The converted bounding boxes, in ``xywh`` order.
        """
        _bbox = bbox.tolist()
        return [
            _bbox[0],
            _bbox[1],
            _bbox[2] - _bbox[0],
            _bbox[3] - _bbox[1],
        ]
    def _proposal2json(self, results):
        """Convert proposal results to COCO json style."""
        json_results = []
        for idx in range(len(self)):
            img_id = self.img_ids[idx]
            bboxes = results[idx]
            for i in range(bboxes.shape[0]):
                data = dict()
                data['image_id'] = img_id
                data['bbox'] = self.xyxy2xywh(bboxes[i])
                data['score'] = float(bboxes[i][4])
                data['category_id'] = 1
                json_results.append(data)
        return json_results
    def _det2json(self, results):
        """Convert detection results to COCO json style."""
        json_results = []
        for idx in range(len(self)):
            img_id = self.img_ids[idx]
            result = results[idx]
            for label in range(len(result)):
                bboxes = result[label]
                for i in range(bboxes.shape[0]):
                    data = dict()
                    data['image_id'] = img_id
                    data['bbox'] = self.xyxy2xywh(bboxes[i])
                    data['score'] = float(bboxes[i][4])
                    data['category_id'] = self.cat_ids[label]
                    json_results.append(data)
        return json_results
    def _segm2json(self, results):
        """Convert instance segmentation results to COCO json style."""
        bbox_json_results = []
        segm_json_results = []
        for idx in range(len(self)):
            img_id = self.img_ids[idx]
            det, seg = results[idx]
            for label in range(len(det)):
                # bbox results
                bboxes = det[label]
                for i in range(bboxes.shape[0]):
                    data = dict()
                    data['image_id'] = img_id
                    data['bbox'] = self.xyxy2xywh(bboxes[i])
                    data['score'] = float(bboxes[i][4])
                    data['category_id'] = self.cat_ids[label]
                    bbox_json_results.append(data)
                # segm results
                # some detectors use different scores for bbox and mask
                if isinstance(seg, tuple):
                    segms = seg[0][label]
                    mask_score = seg[1][label]
                else:
                    segms = seg[label]
                    mask_score = [bbox[4] for bbox in bboxes]
                for i in range(bboxes.shape[0]):
                    data = dict()
                    data['image_id'] = img_id
                    data['bbox'] = self.xyxy2xywh(bboxes[i])
                    data['score'] = float(mask_score[i])
                    data['category_id'] = self.cat_ids[label]
                    if isinstance(segms[i]['counts'], bytes):
                        segms[i]['counts'] = segms[i]['counts'].decode()
                    data['segmentation'] = segms[i]
                    segm_json_results.append(data)
        return bbox_json_results, segm_json_results
    def results2json(self, results, outfile_prefix):
        """Dump the detection results to a COCO style json file.
        There are 3 types of results: proposals, bbox predictions, mask
        predictions, and they have different data types. This method will
        automatically recognize the type, and dump them to json files.
        Args:
            results (list[list | tuple | ndarray]): Testing results of the
                dataset.
            outfile_prefix (str): The filename prefix of the json files. If the
                prefix is "somepath/xxx", the json files will be named
                "somepath/xxx.bbox.json", "somepath/xxx.segm.json",
                "somepath/xxx.proposal.json".
        Returns:
            dict[str: str]: Possible keys are "bbox", "segm", "proposal", and \
                values are corresponding filenames.
        """
        result_files = dict()
        if isinstance(results[0], list):
            json_results = self._det2json(results)
            result_files['bbox'] = f'{outfile_prefix}.bbox.json'
            result_files['proposal'] = f'{outfile_prefix}.bbox.json'
            mmcv.dump(json_results, result_files['bbox'])
        elif isinstance(results[0], tuple):
            json_results = self._segm2json(results)
            result_files['bbox'] = f'{outfile_prefix}.bbox.json'
            result_files['proposal'] = f'{outfile_prefix}.bbox.json'
            result_files['segm'] = f'{outfile_prefix}.segm.json'
            mmcv.dump(json_results[0], result_files['bbox'])
            mmcv.dump(json_results[1], result_files['segm'])
        elif isinstance(results[0], np.ndarray):
            json_results = self._proposal2json(results)
            result_files['proposal'] = f'{outfile_prefix}.proposal.json'
            mmcv.dump(json_results, result_files['proposal'])
        else:
            raise TypeError('invalid type of results')
        return result_files
    def fast_eval_recall(self, results, proposal_nums, iou_thrs, logger=None):
        gt_bboxes = []
        for i in range(len(self.img_ids)):
            ann_ids = self.coco.get_ann_ids(img_ids=self.img_ids[i])
            ann_info = self.coco.load_anns(ann_ids)
            if len(ann_info) == 0:
                gt_bboxes.append(np.zeros((0, 4)))
                continue
            bboxes = []
            for ann in ann_info:
                if ann.get('ignore', False) or ann['iscrowd']:
                    continue
                x1, y1, w, h = ann['bbox']
                bboxes.append([x1, y1, x1 + w, y1 + h])
            bboxes = np.array(bboxes, dtype=np.float32)
            if bboxes.shape[0] == 0:
                bboxes = np.zeros((0, 4))
            gt_bboxes.append(bboxes)
        recalls = eval_recalls(
            gt_bboxes, results, proposal_nums, iou_thrs, logger=logger)
        ar = recalls.mean(axis=1)
        return ar
    def format_results(self, results, jsonfile_prefix=None, **kwargs):
        """Format the results to json (standard format for COCO evaluation).
        Args:
            results (list[tuple | numpy.ndarray]): Testing results of the
                dataset.
            jsonfile_prefix (str | None): The prefix of json files. It includes
                the file path and the prefix of filename, e.g., "a/b/prefix".
                If not specified, a temp file will be created. Default: None.
        Returns:
            tuple: (result_files, tmp_dir), result_files is a dict containing \
                the json filepaths, tmp_dir is the temporal directory created \
                for saving json files when jsonfile_prefix is not specified.
        """
        assert isinstance(results, list), 'results must be a list'
        assert len(results) == len(self), (
            'The length of results is not equal to the dataset len: {} != {}'.
            format(len(results), len(self)))
        if jsonfile_prefix is None:
            tmp_dir = tempfile.TemporaryDirectory()
            jsonfile_prefix = osp.join(tmp_dir.name, 'results')
        else:
            tmp_dir = None
        result_files = self.results2json(results, jsonfile_prefix)
        return result_files, tmp_dir
    def evaluate(self,
                 results,
                 metric='bbox',
                 logger=None,
                 jsonfile_prefix=None,
                 classwise=False,
                 proposal_nums=(100, 300, 1000),
                 iou_thrs=None,
                 metric_items=None):
        """Evaluation in COCO protocol.
        Args:
            results (list[list | tuple]): Testing results of the dataset.
            metric (str | list[str]): Metrics to be evaluated. Options are
                'bbox', 'segm', 'proposal', 'proposal_fast'.
            logger (logging.Logger | str | None): Logger used for printing
                related information during evaluation. Default: None.
            jsonfile_prefix (str | None): The prefix of json files. It includes
                the file path and the prefix of filename, e.g., "a/b/prefix".
                If not specified, a temp file will be created. Default: None.
            classwise (bool): Whether to evaluating the AP for each class.
            proposal_nums (Sequence[int]): Proposal number used for evaluating
                recalls, such as recall@100, recall@1000.
                Default: (100, 300, 1000).
            iou_thrs (Sequence[float], optional): IoU threshold used for
                evaluating recalls/mAPs. If set to a list, the average of all
                IoUs will also be computed. If not specified, [0.50, 0.55,
                0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95] will be used.
                Default: None.
            metric_items (list[str] | str, optional): Metric items that will
                be returned. If not specified, ``['AR@100', 'AR@300',
                'AR@1000', 'AR_s@1000', 'AR_m@1000', 'AR_l@1000' ]`` will be
                used when ``metric=='proposal'``, ``['mAP', 'mAP_50', 'mAP_75',
                'mAP_s', 'mAP_m', 'mAP_l']`` will be used when
                ``metric=='bbox' or metric=='segm'``.
        Returns:
            dict[str, float]: COCO style evaluation metric.
        """
        metrics = metric if isinstance(metric, list) else [metric]
        allowed_metrics = ['bbox', 'segm', 'proposal', 'proposal_fast']
        for metric in metrics:
            if metric not in allowed_metrics:
                raise KeyError(f'metric {metric} is not supported')
        if iou_thrs is None:
            iou_thrs = np.linspace(
                .5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True)
        if metric_items is not None:
            if not isinstance(metric_items, list):
                metric_items = [metric_items]
        result_files, tmp_dir = self.format_results(results, jsonfile_prefix)
        eval_results = OrderedDict()
        cocoGt = self.coco
        for metric in metrics:
            msg = f'Evaluating {metric}...'
            if logger is None:
                msg = '\n' + msg
            print_log(msg, logger=logger)
            if metric == 'proposal_fast':
                ar = self.fast_eval_recall(
                    results, proposal_nums, iou_thrs, logger='silent')
                log_msg = []
                for i, num in enumerate(proposal_nums):
                    eval_results[f'AR@{num}'] = ar[i]
                    log_msg.append(f'\nAR@{num}\t{ar[i]:.4f}')
                log_msg = ''.join(log_msg)
                print_log(log_msg, logger=logger)
                continue
            iou_type = 'bbox' if metric == 'proposal' else metric
            if metric not in result_files:
                raise KeyError(f'{metric} is not in results')
            try:
                predictions = mmcv.load(result_files[metric])
                if iou_type == 'segm':
                    # Refer to https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/coco.py#L331  # noqa
                    # When evaluating mask AP, if the results contain bbox,
                    # cocoapi will use the box area instead of the mask area
                    # for calculating the instance area. Though the overall AP
                    # is not affected, this leads to different
                    # small/medium/large mask AP results.
                    for x in predictions:
                        x.pop('bbox')
                    warnings.simplefilter('once')
                    warnings.warn(
                        'The key "bbox" is deleted for more accurate mask AP '
                        'of small/medium/large instances since v2.12.0. This '
                        'does not change the overall mAP calculation.',
                        UserWarning)
                cocoDt = cocoGt.loadRes(predictions)
            except IndexError:
                print_log(
                    'The testing results of the whole dataset is empty.',
                    logger=logger,
                    level=logging.ERROR)
                break
            cocoEval = COCOeval(cocoGt, cocoDt, iou_type)
            cocoEval.params.catIds = self.cat_ids
            cocoEval.params.imgIds = self.img_ids
            cocoEval.params.maxDets = list(proposal_nums)
            cocoEval.params.iouThrs = iou_thrs
            # mapping of cocoEval.stats
            coco_metric_names = {
                'mAP': 0,
                'mAP_50': 1,
                'mAP_75': 2,
                'mAP_s': 3,
                'mAP_m': 4,
                'mAP_l': 5,
                'AR@100': 6,
                'AR@300': 7,
                'AR@1000': 8,
                'AR_s@1000': 9,
                'AR_m@1000': 10,
                'AR_l@1000': 11
            }
            if metric_items is not None:
                for metric_item in metric_items:
                    if metric_item not in coco_metric_names:
                        raise KeyError(
                            f'metric item {metric_item} is not supported')
            if metric == 'proposal':
                cocoEval.params.useCats = 0
                cocoEval.evaluate()
                cocoEval.accumulate()
                cocoEval.summarize()
                if metric_items is None:
                    metric_items = [
                        'AR@100', 'AR@300', 'AR@1000', 'AR_s@1000',
                        'AR_m@1000', 'AR_l@1000'
                    ]
                for item in metric_items:
                    val = float(
                        f'{cocoEval.stats[coco_metric_names[item]]:.3f}')
                    eval_results[item] = val
            else:
                cocoEval.evaluate()
                cocoEval.accumulate()
                cocoEval.summarize()
                if classwise:  # Compute per-category AP
                    # Compute per-category AP
                    # from https://github.com/facebookresearch/detectron2/
                    precisions = cocoEval.eval['precision']
                    # precision: (iou, recall, cls, area range, max dets)
                    assert len(self.cat_ids) == precisions.shape[2]
                    results_per_category = []
                    for idx, catId in enumerate(self.cat_ids):
                        # area range index 0: all area ranges
                        # max dets index -1: typically 100 per image
                        nm = self.coco.loadCats(catId)[0]
                        precision = precisions[:, :, idx, 0, -1]
                        precision = precision[precision > -1]
                        if precision.size:
                            ap = np.mean(precision)
                        else:
                            ap = float('nan')
                        results_per_category.append(
                            (f'{nm["name"]}', f'{float(ap):0.3f}'))
                    num_columns = min(6, len(results_per_category) * 2)
                    results_flatten = list(
                        itertools.chain(*results_per_category))
                    headers = ['category', 'AP'] * (num_columns // 2)
                    results_2d = itertools.zip_longest(*[
                        results_flatten[i::num_columns]
                        for i in range(num_columns)
                    ])
                    table_data = [headers]
                    table_data += [result for result in results_2d]
                    table = AsciiTable(table_data)
                    print_log('\n' + table.table, logger=logger)
                if metric_items is None:
                    metric_items = [
                        'mAP', 'mAP_50', 'mAP_75', 'mAP_s', 'mAP_m', 'mAP_l'
                    ]
                for metric_item in metric_items:
                    key = f'{metric}_{metric_item}'
                    val = float(
                        f'{cocoEval.stats[coco_metric_names[metric_item]]:.3f}'
                    )
                    eval_results[key] = val
                ap = cocoEval.stats[:6]
                eval_results[f'{metric}_mAP_copypaste'] = (
                    f'{ap[0]:.3f} {ap[1]:.3f} {ap[2]:.3f} {ap[3]:.3f} '
                    f'{ap[4]:.3f} {ap[5]:.3f}')
        if tmp_dir is not None:
            tmp_dir.cleanup()
        return eval_results
--- a/mmfewshot/detection/datasets/dataloader_wrappers.py
+++ b/mmfewshot/detection/datasets/dataloader_wrappers.py
@ -0,0 +1,64 @@
 from torch.utils.data import DataLoader
 class NwayKshotDataloader(object):
    """A dataloader wrapper of NwayKshotDataset dataset. Create a iterator to
    generate query and support batch simultaneously. Each batch return a batch
    of query data (batch_size) and support data.
       (support_way * support_shot).
    Args:
        datasets (list[:obj:`NwayKshotDataset`]): A list of datasets.
        batch_size (int): How many query samples per batch to load.
        sampler (Sampler): Sampler for query dataloader only.
        num_workers (int): Num workers for both support and query dataloader.
        collate_fn (callable): Collate function for query dataloader.
        pin_memory (bool): Pin memory for both support and query dataloader.
        worker_init_fn (callable): Worker init function for both
            support and query dataloader.
        kwargs: any keyword argument to be used to initialize DataLoader.
    """
    def __init__(self, query_data_loader, support_dataset, support_sampler,
                 num_workers, support_collate_fn, pin_memory, worker_init_fn,
                 **kwargs):
        self.dataset = query_data_loader.dataset
        self.query_data_loader = query_data_loader
        self.support_dataset = support_dataset
        self.support_sampler = support_sampler
        self.num_workers = num_workers
        self.support_collate_fn = support_collate_fn
        self.pin_memory = pin_memory
        self.worker_init_fn = worker_init_fn
        self.kwargs = kwargs
    def __iter__(self):
        # generate different support batch index for each epoch
        self.support_dataset.shuffle_support()
        # init support dataloader with batch_size 1
        # each batch are pre-sampler in dataset and use collate
        # function to generate a batch with support_way*support_shot
        self.support_data_loader = DataLoader(
            self.support_dataset,
            batch_size=1,
            sampler=self.support_sampler,
            num_workers=self.num_workers,
            collate_fn=self.support_collate_fn,
            pin_memory=self.pin_memory,
            worker_init_fn=self.worker_init_fn,
            **self.kwargs)
        # init iterator for query and support
        self.query_iter = iter(self.query_data_loader)
        self.support_iter = iter(self.support_data_loader)
        return self
    def __next__(self):
        # call query and support iterator
        query_data = self.query_iter.next()
        support_data = self.support_iter.next()
        return {'query_data': query_data, 'support_data': support_data}
    def __len__(self):
        return len(self.query_data_loader)
--- a/mmfewshot/detection/datasets/dataset_wrappers.py
+++ b/mmfewshot/detection/datasets/dataset_wrappers.py
@ -0,0 +1,453 @@
 import copy
 import warnings
 import numpy as np
 from mmdet.datasets.builder import DATASETS
@DATASETS.register_module()
 class MergeDataset(object):
    """A wrapper of merge dataset.
    This dataset wrapper would be called when using multiple annotation
    files for NwayKshotDataset, QueryAwareDataset, and FewShotCustomDataset.
    It would merge the data info of input datasets, because different
    annotations of same image will cross different datasets.
    Args:
        datasets (list[:obj:`Dataset`]): A list of datasets.
    """
    def __init__(self, datasets):
        self.dataset = copy.deepcopy(datasets[0])
        self.CLASSES = self.dataset.CLASSES
        for dataset in datasets:
            assert dataset.img_prefix == self.dataset.img_prefix, \
                'when using MergeDataset all img_prefix should be the same'
        self.img_prefix = self.dataset.img_prefix
        # merge datainfos for all datasets
        concat_data_infos = sum([dataset.data_infos for dataset in datasets],
                                [])
        merge_data_dict = {}
        for i, data_info in enumerate(concat_data_infos):
            if merge_data_dict.get(data_info['id'], None) is None:
                merge_data_dict[data_info['id']] = data_info
            else:
                merge_data_dict[data_info['id']]['ann'] = \
                    self.merge_ann(merge_data_dict[data_info['id']]['ann'],
                                   data_info['ann'])
        self.dataset.data_infos = [
            merge_data_dict[key] for key in merge_data_dict.keys()
        ]
        # Disable the groupsampler, because in few shot setting,
        # one group may only has two or three images.
        if hasattr(datasets[0], 'flag'):
            self.flag = np.zeros(len(self.dataset), dtype=np.uint8)
    def get_cat_ids(self, idx):
        """Get category ids of merge dataset by index.
        Args:
            idx (int): Index of data.
        Returns:
            list[int]: All categories in the image of specified index.
        """
        return self.dataset.get_cat_ids(idx)
    def prepare_train_img(self, idx, pipeline_key=None, gt_idx=None):
        """Get training data and annotations after pipeline.
        Args:
            idx (int): Index of data.
            pipeline_key (str): Name of pipeline
            gt_idx (list[int]): Index of used annotation.
        Returns:
            dict: Training data and annotation after pipeline with new keys \
                introduced by pipeline.
        """
        return self.dataset.prepare_train_img(idx, pipeline_key, gt_idx)
    def get_ann_info(self, idx):
        """Get annotation by index.
        Args:
            idx (int): Index of data.
        Returns:
            dict: Annotation info of specified index.
        """
        return self.dataset.get_ann_info(idx)
    def __getitem__(self, idx):
        return self.dataset[idx]
    def __len__(self):
        """Dataset length after merge."""
        return len(self.dataset)
    def __repr__(self):
        return self.dataset.__repr__()
    def evaluate(self, results, logger=None, **kwargs):
        """Evaluate the results.
        Args:
            results (list[list | tuple]): Testing results of the dataset.
            logger (logging.Logger | str | None): Logger used for printing
                related information during evaluation. Default: None.
        Returns:
            dict[str: float]: AP results of the total dataset or each separate
            dataset if `self.separate_eval=True`.
        """
        eval_results = self.dataset.evaluate(results, logger=logger, **kwargs)
        return eval_results
    @staticmethod
    def merge_ann(ann_a, ann_b):
        """Merge two annotations.
        Args:
            ann_a (dict): Dict of annotation.
            ann_b (dict): Dict of annotation.
        Returns:
            dict: Merged annotation.
        """
        assert sorted(ann_a.keys()) == sorted(ann_b.keys()), \
            'can not merge different type of annotations'
        return {
            'bboxes': np.concatenate((ann_a['bboxes'], ann_b['bboxes'])),
            'labels': np.concatenate((ann_a['labels'], ann_b['labels'])),
            'bboxes_ignore': ann_a['bboxes_ignore'],
            'labels_ignore': ann_a['labels_ignore']
        }
@DATASETS.register_module()
 class QueryAwareDataset(object):
    """A wrapper of query aware dataset.
    For each item in dataset, there will be one query image and
    (num_support_way * num_support_shot) support images.
    The support images are sampled according to the selected
    query image and include positive class (random one class
    in query image) and negative class (any classes not appear in
     query image).
    Args:
        datasets (obj:`FewShotDataset`, `MergeDataset`):
            The dataset to be wrapped.
        num_support_way (int): The number of classes for support data,
            the first one always be the positive class.
        num_support_shot (int): The number of shot for each support class.
    """
    def __init__(self, dataset, num_support_way, num_support_shot):
        self.dataset = dataset
        self.num_support_way = num_support_way
        self.num_support_shot = num_support_shot
        self.CLASSES = dataset.CLASSES
        assert self.num_support_way <= len(self.CLASSES), \
            'Please set the num_support_way smaller than the ' \
            'number of classes.'
        # build data index (idx, gt_idx) by class.
        self.data_infos_by_class = {i: [] for i in range(len(self.CLASSES))}
        # count max number of anns in one image for each class, which will
        # decide whether sample repeated instance or not.
        self.max_anns_per_image_by_class = [
            0 for _ in range(len(self.CLASSES))
        ]
        # count image for each class annotation when novel class only
        # has one image, the positive support is allowed sampled from itself.
        self.num_image_by_class = [0 for _ in range(len(self.CLASSES))]
        for idx in range(len(self.dataset)):
            labels = self.dataset.get_ann_info(idx)['labels']
            class_count = [0 for _ in range(len(self.CLASSES))]
            for gt_idx, gt in enumerate(labels):
                self.data_infos_by_class[gt].append((idx, gt_idx))
                class_count[gt] += 1
            for i in range(len(self.CLASSES)):
                # number of images for each class
                if class_count[i] > 0:
                    self.num_image_by_class[i] += 1
                # max number of one class annotations in one image
                if class_count[i] > self.max_anns_per_image_by_class[i]:
                    self.max_anns_per_image_by_class[i] = class_count[i]
        for i in range(len(self.CLASSES)):
            assert len(self.data_infos_by_class[i]) > 0, \
                f'Class {self.CLASSES[i]} has zero annotation'
            if len(self.data_infos_by_class[i]) <= self.num_support_shot - \
                    self.max_anns_per_image_by_class[i]:
                warnings.warn(
                    f'During training, instances of class {self.CLASSES[i]} '
                    f'may smaller than the number of support shots which '
                    f'causes some instance will be sampled multiple times')
            if self.num_image_by_class[i] == 1:
                warnings.warn(f'Class {self.CLASSES[i]} only have one '
                              f'image, query and support will sample '
                              f'from instance of same image')
        # Disable the groupsampler, because in few shot setting,
        # one group may only has two or three images.
        if hasattr(dataset, 'flag'):
            self.flag = np.zeros(len(self.dataset), dtype=np.uint8)
    def __getitem__(self, idx):
        # sample query data
        try_time = 0
        while True:
            try_time += 1
            cat_ids = self.dataset.get_cat_ids(idx)
            # query image have too many classes, can not find enough
            # negative support classes.
            if len(self.CLASSES) - len(cat_ids) >= self.num_support_way - 1:
                break
            else:
                idx = self._rand_another(idx)
            assert try_time < 100, \
                'Not enough negative support classes for query image,' \
                ' please try a smaller support way.'
        query_class = np.random.choice(cat_ids)
        query_gt_idx = [
            i for i in range(len(cat_ids)) if cat_ids[i] == query_class
        ]
        query_data = self.dataset.prepare_train_img(idx, 'query', query_gt_idx)
        query_data['query_class'] = [query_class]
        # sample negative support classes, which not appear in query image
        support_class = [
            i for i in range(len(self.CLASSES)) if i not in cat_ids
        ]
        support_class = np.random.choice(
            support_class,
            min(self.num_support_way - 1, len(support_class)),
            replace=False)
        support_idxes = self.generate_support(idx, query_class, support_class)
        support_data = [
            self.dataset.prepare_train_img(idx, 'support', [gt_idx])
            for (idx, gt_idx) in support_idxes
        ]
        return {'query_data': query_data, 'support_data': support_data}
    def __len__(self):
        """Length after repetition."""
        return len(self.dataset)
    def _rand_another(self, idx):
        """Get another random index from the same group as the given index."""
        pool = np.where(self.flag == self.flag[idx])[0]
        return np.random.choice(pool)
    def generate_support(self, idx, query_class, support_classes):
        """Generate support indexes of query images.
        Args:
            idx (int): Index of query data.
            query_class (int): Query class.
            support_classes (list[int]): Classes of support data.
        Returns:
            list[(int, int)]: A batch (num_support_way * num_support_shot)
                of support data (idx, gt_idx).
        """
        support_idxes = []
        if self.num_image_by_class[query_class] == 1:
            # only have one image, instance will sample from same image
            pos_support_idxes = self.sample_support_shots(
                idx, query_class, allow_same_image=True)
        else:
            # instance will sample from different image from query image
            pos_support_idxes = self.sample_support_shots(idx, query_class)
        support_idxes.extend(pos_support_idxes)
        for support_class in support_classes:
            neg_support_idxes = self.sample_support_shots(idx, support_class)
            support_idxes.extend(neg_support_idxes)
        return support_idxes
    def sample_support_shots(self, idx, class_id, allow_same_image=False):
        """Generate positive support indexes by class id.
        Args:
            idx (int): Index of query data.
            class_id (int): Support class.
            allow_same_image: Allow instance sampled from same image
                as query image. Default: False.
        Returns:
            list[(int, int)]: Support data (num_support_shot)
                of specific class.
        """
        support_idxes = []
        num_total_shot = len(self.data_infos_by_class[class_id])
        num_ignore_shot = self.count_class_id(idx, class_id)
        # set num_sample_shots for each time of sampling
        if num_total_shot - num_ignore_shot < self.num_support_shot:
            # if not have enough support data allow repeated data
            num_sample_shots = num_total_shot
            allow_repeat = True
        else:
            # if have enough support data not allow repeated data
            num_sample_shots = self.num_support_shot
            allow_repeat = False
        while len(support_idxes) < self.num_support_shot:
            selected_gt_idxes = np.random.choice(
                num_total_shot, num_sample_shots, replace=False)
            selected_gts = [
                self.data_infos_by_class[class_id][selected_gt_idx]
                for selected_gt_idx in selected_gt_idxes
            ]
            for selected_gt in selected_gts:
                # filter out query annotations
                if selected_gt[0] == idx:
                    if not allow_same_image:
                        continue
                if allow_repeat:
                    support_idxes.append(selected_gt)
                elif selected_gt not in support_idxes:
                    support_idxes.append(selected_gt)
                if len(support_idxes) == self.num_support_shot:
                    break
            # update the number of data for next time sample
            num_sample_shots = min(self.num_support_shot - len(support_idxes),
                                   num_sample_shots)
        return support_idxes
    def count_class_id(self, idx, class_id):
        """Count number of instance of specific."""
        cat_ids = self.dataset.get_cat_ids(idx)
        cat_ids_of_class = [
            i for i in range(len(cat_ids)) if cat_ids[i] == class_id
        ]
        return len(cat_ids_of_class)
@DATASETS.register_module()
 class NwayKshotDataset(object):
    """A dataset wrapper of NwayKshotDataset.
    Based on incoming dataset, query dataset will sample batch data as
    regular dataset, while support dataset will pre sample batch data
    indexes. Each batch index contain (num_support_way * num_support_shot)
    samples. The default format of NwayKshotDataset is query dataset and
    the query dataset will convert into support dataset by using convert
    function.
    Args:
        datasets (obj:`FewShotDataset`, `MergeDataset`):
            The dataset to be wrapped.
        num_support_way (int):
            The number of classes in support data batch.
        num_support_shot (int):
            The number of shots for each class in support data batch.
    """
    def __init__(self, dataset, num_support_way, num_support_shot):
        self.dataset = dataset
        self.CLASSES = dataset.CLASSES
        # The data_type determinate the behavior of fetching data,
        # the default data_type is 'query', which is the same as regular
        # dataset. To convert the dataset into 'support' dataset, simply
        # call the function convert_query_to_support().
        self.data_type = 'query'
        self.num_support_way = num_support_way
        assert num_support_way <= len(self.CLASSES), \
            'support way can not larger than the number of classes'
        self.num_support_shot = num_support_shot
        self.batch_index = []
        self.data_infos_by_class = {i: [] for i in range(len(self.CLASSES))}
        # Disable the groupsampler, because in few shot setting,
        # one group may only has two or three images.
        if hasattr(dataset, 'flag'):
            self.flag = np.zeros(len(self.dataset), dtype=np.uint8)
    def __getitem__(self, idx):
        if self.data_type == 'query':
            # loads one data in query pipeline
            return self.dataset.prepare_train_img(idx, 'query')
        elif self.data_type == 'support':
            # loads one batch of data in support pipeline
            b_idx = self.batch_index[idx]
            batch_data = [
                self.dataset.prepare_train_img(idx, 'support', [gt_idx])
                for (idx, gt_idx) in b_idx
            ]
            return batch_data
        else:
            raise ValueError('not support data type')
    def __len__(self):
        """Length of dataset."""
        if self.data_type == 'query':
            return len(self.dataset)
        elif self.data_type == 'support':
            return len(self.batch_index)
        else:
            raise ValueError('not support data type')
    def shuffle_support(self):
        """Generate new batch indexes."""
        if self.data_type == 'query':
            raise ValueError('not support data type')
        self.batch_index = self.generate_batch_index(len(self.batch_index))
    def convert_query_to_support(self, support_dataset_len):
        """Convert query dataset to support dataset.
        Args:
            support_dataset_len (int): Length of pre sample batch indexes.
        """
        # create lookup table for annotations in same class
        for idx in range(len(self.dataset)):
            labels = self.dataset.get_ann_info(idx)['labels']
            for gt_idx, gt in enumerate(labels):
                self.data_infos_by_class[gt].append((idx, gt_idx))
        # make sure all class index lists have enough
        # instances (length > num_support_shot)
        for i in range(len(self.CLASSES)):
            num_gts = len(self.data_infos_by_class[i])
            if num_gts < self.num_support_shot:
                self.data_infos_by_class[i] = self.data_infos_by_class[i] * \
                                        (self.num_support_shot // num_gts + 1)
        self.batch_index = self.generate_batch_index(support_dataset_len)
        self.data_type = 'support'
        if hasattr(self, 'flag'):
            self.flag = np.zeros(support_dataset_len, dtype=np.uint8)
    def generate_batch_index(self, dataset_len):
        """Generate batch index [length of datasets * [support way * support shots]].
        Args:
            dataset_len: Length of pre sample batch indexes.
        Returns:
            List[List[(data_idx, gt_idx)]]: Pre sample batch indexes.
        """
        total_batch_index = []
        for _ in range(dataset_len):
            batch_index = []
            selected_classes = np.random.choice(
                len(self.CLASSES), self.num_support_way, replace=False)
            for cls in selected_classes:
                num_gts = len(self.data_infos_by_class[cls])
                selected_gts_idx = np.random.choice(
                    num_gts, self.num_support_shot, replace=False)
                selected_gts = [
                    self.data_infos_by_class[cls][gt_idx]
                    for gt_idx in selected_gts_idx
                ]
                batch_index.extend(selected_gts)
            total_batch_index.append(batch_index)
        return total_batch_index
--- a/mmfewshot/detection/datasets/few_shot_custom.py
+++ b/mmfewshot/detection/datasets/few_shot_custom.py
@ -0,0 +1,264 @@
 import copy
 import os.path as osp
 import warnings
 import numpy as np
 from mmdet.datasets.builder import DATASETS
 from mmdet.datasets.custom import CustomDataset
 from mmdet.datasets.pipelines import Compose
@DATASETS.register_module()
 class FewShotCustomDataset(CustomDataset):
    """Custom dataset for few shot detection.
    It allow single (normal dataset of fully supervised setting) or
    two (query-support fashion) pipelines for data processing.
    When annotation shots filter is used, it make sure accessible
    annotations meet the few shot setting in exact number of instances.
    The annotation format is shown as follows. The `ann` field
    is optional for testing.
    .. code-block:: none
        [
            {
                'id': '0000001'
                'filename': 'a.jpg',
                'width': 1280,
                'height': 720,
                'ann': {
                    'bboxes': <np.ndarray> (n, 4) in (x1, y1, x2, y2) order.
                    'labels': <np.ndarray> (n, ),
                    'bboxes_ignore': <np.ndarray> (k, 4), (optional field)
                    'labels_ignore': <np.ndarray> (k, 4) (optional field)
                }
            },
            ...
        ]
    Args:
        ann_file (str): Annotation file path.
        pipeline (list[dict] | dict): Processing pipeline
            If is list[dict] all data will pass through this pipeline,
            If is dict, query and support data will be processed with
            two different pipelines and the dict should contain two keys:
                - 'query': list[dict]
                - 'support': list[dict]
        classes (str | Sequence[str]): Classes for model training and
            provide fixed label for each class.
        data_root (str, optional): Data root for ``ann_file``,
            ``img_prefix``, ``seg_prefix``, ``proposal_file`` if specified.
        test_mode (bool, optional): If set True, annotation will not be loaded.
        filter_empty_gt (bool, optional): If set true, images without bounding
            boxes of the dataset's classes will be filtered out. This option
            only works when `test_mode=False`, i.e., we never filter images
            during tests.
        ann_shot_filter (dict, optional): If set None, all annotation from
            ann file will be loaded. If not None, annotation shot filter will
            specific which class and the maximum number of instances to load
            from annotation file. For example: {'dog': 10, 'person': 5}.
            Default: None.
    """
    CLASSES = None
    def __init__(
        self,
        ann_file,
        pipeline,
        classes,
        data_root=None,
        img_prefix='',
        seg_prefix=None,
        proposal_file=None,
        test_mode=False,
        filter_empty_gt=True,
        ann_shot_filter=None,
    ):
        self.ann_file = ann_file
        self.data_root = data_root
        self.img_prefix = img_prefix
        self.seg_prefix = seg_prefix
        self.proposal_file = proposal_file
        self.test_mode = test_mode
        self.filter_empty_gt = filter_empty_gt
        self.CLASSES = self.get_classes(classes)
        self.ann_shot_filter = ann_shot_filter
        if self.ann_shot_filter is not None:
            for class_name in list(self.ann_shot_filter.keys()):
                assert class_name in self.CLASSES, \
                    f'class {class_name} from ' \
                    f'ann_shot_filter not in CLASSES, '
        # join paths if data_root is specified
        if self.data_root is not None:
            if not osp.isabs(self.ann_file):
                self.ann_file = osp.join(self.data_root, self.ann_file)
            if not (self.img_prefix is None or osp.isabs(self.img_prefix)):
                self.img_prefix = osp.join(self.data_root, self.img_prefix)
            if not (self.seg_prefix is None or osp.isabs(self.seg_prefix)):
                self.seg_prefix = osp.join(self.data_root, self.seg_prefix)
            if not (self.proposal_file is None
                    or osp.isabs(self.proposal_file)):
                self.proposal_file = osp.join(self.data_root,
                                              self.proposal_file)
        # load annotations (and proposals)
        self.data_infos = self.load_annotations(self.ann_file)
        # filter annotations according to ann_shot_filter
        if self.ann_shot_filter is not None:
            self.data_infos = self._filter_annotations(self.data_infos,
                                                       self.ann_shot_filter)
        if self.proposal_file is not None:
            self.proposals = self.load_proposals(self.proposal_file)
        else:
            self.proposals = None
        # filter images too small and containing no annotations
        if not test_mode:
            valid_inds = self._filter_imgs()
            self.data_infos = [self.data_infos[i] for i in valid_inds]
            if self.proposals is not None:
                self.proposals = [self.proposals[i] for i in valid_inds]
            # set group flag for the sampler
            self._set_group_flag()
        # processing pipeline if there are two pipeline the
        # pipeline will be determined by key name of query or support
        if isinstance(pipeline, dict):
            self.pipeline = {}
            for key in pipeline.keys():
                self.pipeline[key] = Compose(pipeline[key])
        else:
            self.pipeline = Compose(pipeline)
    def get_ann_info(self, idx):
        """Get annotation by index.
        When override this function please make sure same annotations are used
        during the whole training.
        Args:
            idx (int): Index of data.
        Returns:
            dict: Annotation info of specified index.
        """
        return copy.deepcopy(self.data_infos[idx]['ann'])
    def prepare_train_img(self, idx, pipeline_key=None, gt_idx=None):
        """Get training data and annotations after pipeline.
        Args:
            idx (int): Index of data.
            pipeline_key (str): Name of pipeline
            gt_idx (list[int]): Index of used annotation.
        Returns:
            dict: Training data and annotation after pipeline with new keys \
                introduced by pipeline.
        """
        img_info = self.data_infos[idx]
        ann_info = self.get_ann_info(idx)
        # annotation filter
        if gt_idx is not None:
            selected_ann_info = {
                'bboxes': ann_info['bboxes'][gt_idx],
                'labels': ann_info['labels'][gt_idx]
            }
            # keep pace with new annotations
            new_img_info = copy.deepcopy(img_info)
            new_img_info['ann'] = selected_ann_info
            results = dict(img_info=new_img_info, ann_info=selected_ann_info)
        else:
            results = dict(img_info=copy.deepcopy(img_info), ann_info=ann_info)
        if self.proposals is not None:
            results['proposals'] = self.proposals[idx]
        self.pre_pipeline(results)
        if pipeline_key is None:
            return self.pipeline(results)
        else:
            return self.pipeline[pipeline_key](results)
    def _filter_annotations(self, data_infos, ann_shot_filter):
        """Filter out annotations not in class_masks and excess annotations of
        specific class, while annotations of other classes in class_masks
        remain unchanged.
        Args:
            data_infos (list[dict]): Annotation infos.
            ann_shot_filter (dict): Specific which class and how many
            instances of each class to load from annotation file.
            For example: {'dog': 10, 'cat': 10, 'person': 5} Default: None.
        Returns:
            list[dict]: Annotation infos where number of specified class
                shots less than or equal to predefined number.
        """
        # build instance indexes of (img_id, gt_idx)
        total_instance_dict = {key: [] for key in ann_shot_filter.keys()}
        for data_info in data_infos:
            img_id = data_info['id']
            ann = data_info['ann']
            for i in range(ann['labels'].shape[0]):
                instance_class_name = self.CLASSES[ann['labels'][i]]
                if instance_class_name in ann_shot_filter.keys():
                    total_instance_dict[instance_class_name].append(
                        (img_id, i))
        total_instance_indexes = []
        for class_name in ann_shot_filter.keys():
            num_shot = ann_shot_filter[class_name]
            instance_indexes = total_instance_dict[class_name]
            # we will random sample from all instances to get exact
            # number of instance
            if len(instance_indexes) > num_shot:
                random_select = np.random.choice(
                    len(instance_indexes), num_shot, replace=False)
                total_instance_indexes += \
                    [instance_indexes[i] for i in random_select]
            # number of shot less than the predefined number,
            # which may cause the performance degradation
            elif len(instance_indexes) < num_shot:
                warning = f'number of {class_name} instance ' \
                          f'is {len(instance_indexes)} which is ' \
                          f'less than predefined shots {num_shot}.'
                warnings.warn(warning)
                total_instance_indexes += instance_indexes
            else:
                total_instance_indexes += instance_indexes
        new_data_infos = []
        for data_info in data_infos:
            img_id = data_info['id']
            selected_instance_index = \
                sorted([instance[1] for instance in total_instance_indexes
                        if instance[0] == img_id])
            ann = data_info['ann']
            if len(selected_instance_index) == 0:
                continue
            selected_ann = dict(
                bboxes=ann['bboxes'][selected_instance_index],
                labels=ann['labels'][selected_instance_index],
            )
            if ann.get('bboxes_ignore') is not None:
                selected_ann['bboxes_ignore'] = ann['bboxes_ignore']
            if ann.get('labels_ignore') is not None:
                selected_ann['labels_ignore'] = ann['labels_ignore']
            new_data_infos.append(
                dict(
                    id=img_id,
                    filename=data_info['filename'],
                    width=data_info['width'],
                    height=data_info['height'],
                    ann=selected_ann))
        return new_data_infos
--- a/mmfewshot/detection/datasets/utils.py
+++ b/mmfewshot/detection/datasets/utils.py
@ -0,0 +1,92 @@
 # Copyright (c) Open-MMLab. All rights reserved.
 from collections.abc import Mapping, Sequence
 import torch
 import torch.nn.functional as F
 from mmcv.parallel.data_container import DataContainer
 from torch.utils.data.dataloader import default_collate
 def query_support_collate_fn(batch, samples_per_gpu=1):
    """Puts each data field into a tensor/DataContainer with outer dimension
    batch size.
    Extend default_collate to add support for
    :type:`~mmcv.parallel.DataContainer`. There are 3 cases.
    1. cpu_only = True, e.g., meta data
    2. cpu_only = False, stack = True, e.g., images tensors
    3. cpu_only = False, stack = False, e.g., gt bboxes
    """
    if not isinstance(batch, Sequence):
        raise TypeError(f'{batch.dtype} is not supported.')
    # process the support batch data in type of List: [ List: [ DataContainer]]
    if isinstance(batch[0], Sequence):
        samples_per_gpu = len(batch[0]) * samples_per_gpu
        batch = sum(batch, [])
    if isinstance(batch[0], DataContainer):
        stacked = []
        if batch[0].cpu_only:
            for i in range(0, len(batch), samples_per_gpu):
                stacked.append(
                    [sample.data for sample in batch[i:i + samples_per_gpu]])
            return DataContainer(
                stacked, batch[0].stack, batch[0].padding_value, cpu_only=True)
        elif batch[0].stack:
            for i in range(0, len(batch), samples_per_gpu):
                assert isinstance(batch[i].data, torch.Tensor)
                if batch[i].pad_dims is not None:
                    ndim = batch[i].dim()
                    assert ndim > batch[i].pad_dims
                    max_shape = [0 for _ in range(batch[i].pad_dims)]
                    for dim in range(1, batch[i].pad_dims + 1):
                        max_shape[dim - 1] = batch[i].size(-dim)
                    for sample in batch[i:i + samples_per_gpu]:
                        for dim in range(0, ndim - batch[i].pad_dims):
                            assert batch[i].size(dim) == sample.size(dim)
                        for dim in range(1, batch[i].pad_dims + 1):
                            max_shape[dim - 1] = max(max_shape[dim - 1],
                                                     sample.size(-dim))
                    padded_samples = []
                    for sample in batch[i:i + samples_per_gpu]:
                        pad = [0 for _ in range(batch[i].pad_dims * 2)]
                        for dim in range(1, batch[i].pad_dims + 1):
                            pad[2 * dim -
                                1] = max_shape[dim - 1] - sample.size(-dim)
                        padded_samples.append(
                            F.pad(
                                sample.data, pad, value=sample.padding_value))
                    stacked.append(default_collate(padded_samples))
                elif batch[i].pad_dims is None:
                    stacked.append(
                        default_collate([
                            sample.data
                            for sample in batch[i:i + samples_per_gpu]
                        ]))
                else:
                    raise ValueError(
                        'pad_dims should be either None or integers (1-3)')
        else:
            for i in range(0, len(batch), samples_per_gpu):
                stacked.append(
                    [sample.data for sample in batch[i:i + samples_per_gpu]])
        return DataContainer(stacked, batch[0].stack, batch[0].padding_value)
    elif isinstance(batch[0], Sequence):
        transposed = zip(*batch)
        return [
            query_support_collate_fn(samples, samples_per_gpu)
            for samples in transposed
        ]
    elif isinstance(batch[0], Mapping):
        return {
            key: query_support_collate_fn([d[key] for d in batch],
                                          samples_per_gpu)
            for key in batch[0]
        }
    else:
        return default_collate(batch)
--- a/mmfewshot/detection/datasets/voc.py
+++ b/mmfewshot/detection/datasets/voc.py
@ -0,0 +1,183 @@
 import os.path as osp
 import xml.etree.ElementTree as ET
 import mmcv
 import numpy as np
 from mmdet.datasets.builder import DATASETS
 from .few_shot_custom import FewShotCustomDataset
@DATASETS.register_module()
 class FewShotVOCDataset(FewShotCustomDataset):
    """VOC dataset for few shot detection.
    FewShotVOCDataset allow annotation mask during loading annotation.
    The annotation can be loaded from image id or image path. For example:
    .. code-block:: none
            ann_image_id.txt:
                000001
                000002
            ann_image_path.txt:
                VOC2007/JPEGImages/000001.jpg
                VOC2007/JPEGImages/000002.jpg
    Args:
        min_size (int | float, optional): The minimum size of bounding
            boxes in the images. If the size of a bounding box is less than
            ``min_size``, it would be add to ignored field. Default: None.
    """
    def __init__(self, min_size=None, **kwargs):
        assert self.CLASSES or kwargs.get(
            'classes', None), 'CLASSES in `XMLDataset` can not be None.'
        self.min_size = min_size
        super(FewShotVOCDataset, self).__init__(**kwargs)
    def load_annotations(self, ann_file):
        """Load annotation from XML style ann_file.
        Args:
            ann_file (str): Path of XML file.
        Returns:
            list[dict]: Annotation info from XML file.
        """
        self.cat2label = {cat: i for i, cat in enumerate(self.CLASSES)}
        data_infos = []
        img_names = mmcv.list_from_file(ann_file)
        for img_name in img_names:
            # ann file in image path format
            if 'VOC2007' in img_name:
                dataset_year = 'VOC2007'
                img_id = img_name.split('/')[-1].split('.')[0]
                filename = img_name
            # ann file in image path format
            elif 'VOC2012' in img_name:
                dataset_year = 'VOC2012'
                img_id = img_name.split('/')[-1].split('.')[0]
                filename = img_name
            # ann file in image id format
            elif 'VOC2007' in ann_file:
                dataset_year = 'VOC2007'
                img_id = img_name
                filename = f'VOC2007/JPEGImages/{img_name}.jpg'
            # ann file in image id format
            elif 'VOC2012' in ann_file:
                dataset_year = 'VOC2012'
                img_id = img_name
                filename = f'VOC2012/JPEGImages/{img_name}.jpg'
            else:
                raise ValueError('Cannot infer dataset year from img_prefix')
            xml_path = osp.join(self.img_prefix, dataset_year, 'Annotations',
                                f'{img_id}.xml')
            tree = ET.parse(xml_path)
            root = tree.getroot()
            size = root.find('size')
            if size is not None:
                width = int(size.find('width').text)
                height = int(size.find('height').text)
            else:
                img_path = osp.join(self.img_prefix, dataset_year,
                                    'JPEGImages', '{}.jpg'.format(img_id))
                img = mmcv.imread(img_path)
                width, height = img.size
            # save annotation infos into data infos, because not all the
            # annotations will be used for training and the used annotations
            # should stay the same anytime during training.
            ann_info = self._get_ann_info(dataset_year, img_id)
            data_infos.append(
                dict(
                    id=img_id,
                    filename=filename,
                    width=width,
                    height=height,
                    ann=ann_info))
        return data_infos
    def _get_ann_info(self, dataset_year, img_id):
        """Get annotation from XML file by img_id.
        Args:
            dataset_year (str): Year of voc dataset. Options are
                'VOC2007', 'VOC2012'
            img_id (str): Id of image.
        Returns:
            dict: Annotation info of specified id with specified class.
        """
        bboxes = []
        labels = []
        bboxes_ignore = []
        labels_ignore = []
        xml_path = osp.join(self.img_prefix, dataset_year, 'Annotations',
                            f'{img_id}.xml')
        tree = ET.parse(xml_path)
        root = tree.getroot()
        for obj in root.findall('object'):
            name = obj.find('name').text
            if name not in self.CLASSES:
                continue
            label = self.cat2label[name]
            difficult = obj.find('difficult')
            difficult = 0 if difficult is None else int(difficult.text)
            bnd_box = obj.find('bndbox')
            # TODO: check whether it is necessary to use int
            # Coordinates may be float type
            bbox = [
                int(float(bnd_box.find('xmin').text)),
                int(float(bnd_box.find('ymin').text)),
                int(float(bnd_box.find('xmax').text)),
                int(float(bnd_box.find('ymax').text))
            ]
            ignore = False
            if self.min_size:
                assert not self.test_mode
                w = bbox[2] - bbox[0]
                h = bbox[3] - bbox[1]
                if w < self.min_size or h < self.min_size:
                    ignore = True
            if difficult or ignore:
                bboxes_ignore.append(bbox)
                labels_ignore.append(label)
            else:
                bboxes.append(bbox)
                labels.append(label)
        if not bboxes:
            bboxes = np.zeros((0, 4))
            labels = np.zeros((0, ))
        else:
            bboxes = np.array(bboxes, ndmin=2) - 1
            labels = np.array(labels)
        if not bboxes_ignore:
            bboxes_ignore = np.zeros((0, 4))
            labels_ignore = np.zeros((0, ))
        else:
            bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1
            labels_ignore = np.array(labels_ignore)
        ann_info = dict(
            bboxes=bboxes.astype(np.float32),
            labels=labels.astype(np.int64),
            bboxes_ignore=bboxes_ignore.astype(np.float32),
            labels_ignore=labels_ignore.astype(np.int64))
        return ann_info
    def _filter_imgs(self, min_size=32):
        """Filter images too small or without annotation."""
        valid_inds = []
        for i, img_info in enumerate(self.data_infos):
            if min(img_info['width'], img_info['height']) < min_size:
                continue
            if self.filter_empty_gt:
                cat_ids = img_info['ann']['labels'].astype(np.int).tolist()
                if len(cat_ids) > 0:
                    valid_inds.append(i)
            else:
                valid_inds.append(i)
        return valid_inds
--- a/setup.cfg
+++ b/setup.cfg
@ -3,7 +3,8 @@ line_length = 79
 multi_line_output = 0
 known_standard_library = setuptools
 known_first_party = mmfewshot
-known_third_party = mmcls,mmcv,mmdet,numpy,pytest,torch
+known_third_party = mmcls,mmcv,mmdet,numpy,pytest,terminaltables,torch
 no_lines_before = STDLIB,LOCALFOLDER
 default_section = THIRDPARTY
--- a/tests/data/VOCdevkit/VOC2007/Annotations/000001.xml
+++ b/tests/data/VOCdevkit/VOC2007/Annotations/000001.xml
@ -0,0 +1,44 @@
 <annotation>
 	<folder>VOC2007</folder>
 	<filename>000001.jpg</filename>
 	<source>
 		<database>The VOC2007 Database</database>
 		<annotation>PASCAL VOC2007</annotation>
 		<image>flickr</image>
 		<flickrid>341012865</flickrid>
 	</source>
 	<owner>
 		<flickrid>Fried Camels</flickrid>
 		<name>Jinky the Fruit Bat</name>
 	</owner>
 	<size>
 		<width>353</width>
 		<height>500</height>
 		<depth>3</depth>
 	</size>
 	<segmented>0</segmented>
 	<object>
 		<name>dog</name>
 		<pose>Left</pose>
 		<truncated>1</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>48</xmin>
 			<ymin>240</ymin>
 			<xmax>195</xmax>
 			<ymax>371</ymax>
 		</bndbox>
 	</object>
 	<object>
 		<name>person</name>
 		<pose>Left</pose>
 		<truncated>1</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>8</xmin>
 			<ymin>12</ymin>
 			<xmax>352</xmax>
 			<ymax>498</ymax>
 		</bndbox>
 	</object>
 </annotation>
--- a/tests/data/VOCdevkit/VOC2007/Annotations/000002.xml
+++ b/tests/data/VOCdevkit/VOC2007/Annotations/000002.xml
@ -0,0 +1,32 @@
 <annotation>
 	<folder>VOC2007</folder>
 	<filename>000002.jpg</filename>
 	<source>
 		<database>The VOC2007 Database</database>
 		<annotation>PASCAL VOC2007</annotation>
 		<image>flickr</image>
 		<flickrid>329145082</flickrid>
 	</source>
 	<owner>
 		<flickrid>hiromori2</flickrid>
 		<name>Hiroyuki Mori</name>
 	</owner>
 	<size>
 		<width>335</width>
 		<height>500</height>
 		<depth>3</depth>
 	</size>
 	<segmented>0</segmented>
 	<object>
 		<name>train</name>
 		<pose>Unspecified</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>139</xmin>
 			<ymin>200</ymin>
 			<xmax>207</xmax>
 			<ymax>301</ymax>
 		</bndbox>
 	</object>
 </annotation>
--- a/tests/data/VOCdevkit/VOC2007/Annotations/000003.xml
+++ b/tests/data/VOCdevkit/VOC2007/Annotations/000003.xml
@ -0,0 +1,44 @@
 <annotation>
 	<folder>VOC2007</folder>
 	<filename>000003.jpg</filename>
 	<source>
 		<database>The VOC2007 Database</database>
 		<annotation>PASCAL VOC2007</annotation>
 		<image>flickr</image>
 		<flickrid>138563409</flickrid>
 	</source>
 	<owner>
 		<flickrid>RandomEvent101</flickrid>
 		<name>?</name>
 	</owner>
 	<size>
 		<width>500</width>
 		<height>375</height>
 		<depth>3</depth>
 	</size>
 	<segmented>0</segmented>
 	<object>
 		<name>sofa</name>
 		<pose>Unspecified</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>123</xmin>
 			<ymin>155</ymin>
 			<xmax>215</xmax>
 			<ymax>195</ymax>
 		</bndbox>
 	</object>
 	<object>
 		<name>chair</name>
 		<pose>Left</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>239</xmin>
 			<ymin>156</ymin>
 			<xmax>307</xmax>
 			<ymax>205</ymax>
 		</bndbox>
 	</object>
 </annotation>
--- a/tests/data/VOCdevkit/VOC2007/Annotations/000004.xml
+++ b/tests/data/VOCdevkit/VOC2007/Annotations/000004.xml
@ -0,0 +1,104 @@
 <annotation>
 	<folder>VOC2007</folder>
 	<filename>000004.jpg</filename>
 	<source>
 		<database>The VOC2007 Database</database>
 		<annotation>PASCAL VOC2007</annotation>
 		<image>flickr</image>
 		<flickrid>322032655</flickrid>
 	</source>
 	<owner>
 		<flickrid>paytonc</flickrid>
 		<name>Payton Chung</name>
 	</owner>
 	<size>
 		<width>500</width>
 		<height>406</height>
 		<depth>3</depth>
 	</size>
 	<segmented>0</segmented>
 	<object>
 		<name>car</name>
 		<pose>Frontal</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>13</xmin>
 			<ymin>311</ymin>
 			<xmax>84</xmax>
 			<ymax>362</ymax>
 		</bndbox>
 	</object>
 	<object>
 		<name>car</name>
 		<pose>Unspecified</pose>
 		<truncated>1</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>362</xmin>
 			<ymin>330</ymin>
 			<xmax>500</xmax>
 			<ymax>389</ymax>
 		</bndbox>
 	</object>
 	<object>
 		<name>car</name>
 		<pose>Unspecified</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>235</xmin>
 			<ymin>328</ymin>
 			<xmax>334</xmax>
 			<ymax>375</ymax>
 		</bndbox>
 	</object>
 	<object>
 		<name>car</name>
 		<pose>Unspecified</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>175</xmin>
 			<ymin>327</ymin>
 			<xmax>252</xmax>
 			<ymax>364</ymax>
 		</bndbox>
 	</object>
 	<object>
 		<name>car</name>
 		<pose>Unspecified</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>139</xmin>
 			<ymin>320</ymin>
 			<xmax>189</xmax>
 			<ymax>359</ymax>
 		</bndbox>
 	</object>
 	<object>
 		<name>car</name>
 		<pose>Unspecified</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>108</xmin>
 			<ymin>325</ymin>
 			<xmax>150</xmax>
 			<ymax>353</ymax>
 		</bndbox>
 	</object>
 	<object>
 		<name>car</name>
 		<pose>Unspecified</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>84</xmin>
 			<ymin>323</ymin>
 			<xmax>121</xmax>
 			<ymax>350</ymax>
 		</bndbox>
 	</object>
 </annotation>
--- a/tests/data/VOCdevkit/VOC2007/Annotations/000005.xml
+++ b/tests/data/VOCdevkit/VOC2007/Annotations/000005.xml
@ -0,0 +1,80 @@
 <annotation>
 	<folder>VOC2007</folder>
 	<filename>000005.jpg</filename>
 	<source>
 		<database>The VOC2007 Database</database>
 		<annotation>PASCAL VOC2007</annotation>
 		<image>flickr</image>
 		<flickrid>325991873</flickrid>
 	</source>
 	<owner>
 		<flickrid>archintent louisville</flickrid>
 		<name>?</name>
 	</owner>
 	<size>
 		<width>500</width>
 		<height>375</height>
 		<depth>3</depth>
 	</size>
 	<segmented>0</segmented>
 	<object>
 		<name>chair</name>
 		<pose>Rear</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>263</xmin>
 			<ymin>211</ymin>
 			<xmax>324</xmax>
 			<ymax>339</ymax>
 		</bndbox>
 	</object>
 	<object>
 		<name>chair</name>
 		<pose>Unspecified</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>165</xmin>
 			<ymin>264</ymin>
 			<xmax>253</xmax>
 			<ymax>372</ymax>
 		</bndbox>
 	</object>
 	<object>
 		<name>chair</name>
 		<pose>Unspecified</pose>
 		<truncated>1</truncated>
 		<difficult>1</difficult>
 		<bndbox>
 			<xmin>5</xmin>
 			<ymin>244</ymin>
 			<xmax>67</xmax>
 			<ymax>374</ymax>
 		</bndbox>
 	</object>
 	<object>
 		<name>chair</name>
 		<pose>Unspecified</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>241</xmin>
 			<ymin>194</ymin>
 			<xmax>295</xmax>
 			<ymax>299</ymax>
 		</bndbox>
 	</object>
 	<object>
 		<name>chair</name>
 		<pose>Unspecified</pose>
 		<truncated>1</truncated>
 		<difficult>1</difficult>
 		<bndbox>
 			<xmin>277</xmin>
 			<ymin>186</ymin>
 			<xmax>312</xmax>
 			<ymax>220</ymax>
 		</bndbox>
 	</object>
 </annotation>
--- a/tests/data/VOCdevkit/VOC2007/ImageSets/Main/test.txt
+++ b/tests/data/VOCdevkit/VOC2007/ImageSets/Main/test.txt
@ -0,0 +1,5 @@
 000001
 000002
 000003
 000004
 000005
--- a/tests/data/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt
+++ b/tests/data/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt
@ -0,0 +1,5 @@
 000001
 000002
 000003
 000004
 000005
--- a/tests/data/VOCdevkit/VOC2007/JPEGImages/000001.jpg
+++ b/tests/data/VOCdevkit/VOC2007/JPEGImages/000001.jpg
--- a/tests/data/VOCdevkit/VOC2007/JPEGImages/000002.jpg
+++ b/tests/data/VOCdevkit/VOC2007/JPEGImages/000002.jpg
--- a/tests/data/VOCdevkit/VOC2007/JPEGImages/000003.jpg
+++ b/tests/data/VOCdevkit/VOC2007/JPEGImages/000003.jpg
--- a/tests/data/VOCdevkit/VOC2007/JPEGImages/000004.jpg
+++ b/tests/data/VOCdevkit/VOC2007/JPEGImages/000004.jpg
--- a/tests/data/VOCdevkit/VOC2007/JPEGImages/000005.jpg
+++ b/tests/data/VOCdevkit/VOC2007/JPEGImages/000005.jpg
--- a/tests/data/VOCdevkit/VOC2012/Annotations/2007_000027.xml
+++ b/tests/data/VOCdevkit/VOC2012/Annotations/2007_000027.xml
@ -0,0 +1,63 @@
 <annotation>
 	<folder>VOC2012</folder>
 	<filename>2007_000027.jpg</filename>
 	<source>
 		<database>The VOC2007 Database</database>
 		<annotation>PASCAL VOC2007</annotation>
 		<image>flickr</image>
 	</source>
 	<size>
 		<width>486</width>
 		<height>500</height>
 		<depth>3</depth>
 	</size>
 	<segmented>0</segmented>
 	<object>
 		<name>person</name>
 		<pose>Unspecified</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>174</xmin>
 			<ymin>101</ymin>
 			<xmax>349</xmax>
 			<ymax>351</ymax>
 		</bndbox>
 		<part>
 			<name>head</name>
 			<bndbox>
 				<xmin>169</xmin>
 				<ymin>104</ymin>
 				<xmax>209</xmax>
 				<ymax>146</ymax>
 			</bndbox>
 		</part>
 		<part>
 			<name>hand</name>
 			<bndbox>
 				<xmin>278</xmin>
 				<ymin>210</ymin>
 				<xmax>297</xmax>
 				<ymax>233</ymax>
 			</bndbox>
 		</part>
 		<part>
 			<name>foot</name>
 			<bndbox>
 				<xmin>273</xmin>
 				<ymin>333</ymin>
 				<xmax>297</xmax>
 				<ymax>354</ymax>
 			</bndbox>
 		</part>
 		<part>
 			<name>foot</name>
 			<bndbox>
 				<xmin>319</xmin>
 				<ymin>307</ymin>
 				<xmax>340</xmax>
 				<ymax>326</ymax>
 			</bndbox>
 		</part>
 	</object>
 </annotation>
--- a/tests/data/VOCdevkit/VOC2012/Annotations/2007_000032.xml
+++ b/tests/data/VOCdevkit/VOC2012/Annotations/2007_000032.xml
@ -0,0 +1,63 @@
 <annotation>
 	<folder>VOC2012</folder>
 	<filename>2007_000032.jpg</filename>
 	<source>
 		<database>The VOC2007 Database</database>
 		<annotation>PASCAL VOC2007</annotation>
 		<image>flickr</image>
 	</source>
 	<size>
 		<width>500</width>
 		<height>281</height>
 		<depth>3</depth>
 	</size>
 	<segmented>1</segmented>
 	<object>
 		<name>aeroplane</name>
 		<pose>Frontal</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>104</xmin>
 			<ymin>78</ymin>
 			<xmax>375</xmax>
 			<ymax>183</ymax>
 		</bndbox>
 	</object>
 	<object>
 		<name>aeroplane</name>
 		<pose>Left</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>133</xmin>
 			<ymin>88</ymin>
 			<xmax>197</xmax>
 			<ymax>123</ymax>
 		</bndbox>
 	</object>
 	<object>
 		<name>person</name>
 		<pose>Rear</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>195</xmin>
 			<ymin>180</ymin>
 			<xmax>213</xmax>
 			<ymax>229</ymax>
 		</bndbox>
 	</object>
 	<object>
 		<name>person</name>
 		<pose>Rear</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>26</xmin>
 			<ymin>189</ymin>
 			<xmax>44</xmax>
 			<ymax>238</ymax>
 		</bndbox>
 	</object>
 </annotation>
--- a/tests/data/VOCdevkit/VOC2012/Annotations/2007_000033.xml
+++ b/tests/data/VOCdevkit/VOC2012/Annotations/2007_000033.xml
@ -0,0 +1,51 @@
 <annotation>
 	<folder>VOC2012</folder>
 	<filename>2007_000033.jpg</filename>
 	<source>
 		<database>The VOC2007 Database</database>
 		<annotation>PASCAL VOC2007</annotation>
 		<image>flickr</image>
 	</source>
 	<size>
 		<width>500</width>
 		<height>366</height>
 		<depth>3</depth>
 	</size>
 	<segmented>1</segmented>
 	<object>
 		<name>aeroplane</name>
 		<pose>Unspecified</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>9</xmin>
 			<ymin>107</ymin>
 			<xmax>499</xmax>
 			<ymax>263</ymax>
 		</bndbox>
 	</object>
 	<object>
 		<name>aeroplane</name>
 		<pose>Left</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>421</xmin>
 			<ymin>200</ymin>
 			<xmax>482</xmax>
 			<ymax>226</ymax>
 		</bndbox>
 	</object>
 	<object>
 		<name>aeroplane</name>
 		<pose>Left</pose>
 		<truncated>1</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>325</xmin>
 			<ymin>188</ymin>
 			<xmax>411</xmax>
 			<ymax>223</ymax>
 		</bndbox>
 	</object>
 </annotation>
--- a/tests/data/VOCdevkit/VOC2012/Annotations/2007_000039.xml
+++ b/tests/data/VOCdevkit/VOC2012/Annotations/2007_000039.xml
@ -0,0 +1,27 @@
 <annotation>
 	<folder>VOC2012</folder>
 	<filename>2007_000039.jpg</filename>
 	<source>
 		<database>The VOC2007 Database</database>
 		<annotation>PASCAL VOC2007</annotation>
 		<image>flickr</image>
 	</source>
 	<size>
 		<width>500</width>
 		<height>375</height>
 		<depth>3</depth>
 	</size>
 	<segmented>1</segmented>
 	<object>
 		<name>tvmonitor</name>
 		<pose>Frontal</pose>
 		<truncated>0</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>156</xmin>
 			<ymin>89</ymin>
 			<xmax>344</xmax>
 			<ymax>279</ymax>
 		</bndbox>
 	</object>
 </annotation>
--- a/tests/data/VOCdevkit/VOC2012/Annotations/2007_000042.xml
+++ b/tests/data/VOCdevkit/VOC2012/Annotations/2007_000042.xml
@ -0,0 +1,39 @@
 <annotation>
 	<folder>VOC2012</folder>
 	<filename>2007_000042.jpg</filename>
 	<source>
 		<database>The VOC2007 Database</database>
 		<annotation>PASCAL VOC2007</annotation>
 		<image>flickr</image>
 	</source>
 	<size>
 		<width>500</width>
 		<height>335</height>
 		<depth>3</depth>
 	</size>
 	<segmented>1</segmented>
 	<object>
 		<name>train</name>
 		<pose>Unspecified</pose>
 		<truncated>1</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>263</xmin>
 			<ymin>32</ymin>
 			<xmax>500</xmax>
 			<ymax>295</ymax>
 		</bndbox>
 	</object>
 	<object>
 		<name>train</name>
 		<pose>Unspecified</pose>
 		<truncated>1</truncated>
 		<difficult>0</difficult>
 		<bndbox>
 			<xmin>1</xmin>
 			<ymin>36</ymin>
 			<xmax>235</xmax>
 			<ymax>299</ymax>
 		</bndbox>
 	</object>
 </annotation>
--- a/tests/data/VOCdevkit/VOC2012/ImageSets/Main/test.txt
+++ b/tests/data/VOCdevkit/VOC2012/ImageSets/Main/test.txt
@ -0,0 +1,5 @@
 2007_000027
 2007_000032
 2007_000033
 2007_000039
 2007_000042
--- a/tests/data/VOCdevkit/VOC2012/ImageSets/Main/trainval.txt
+++ b/tests/data/VOCdevkit/VOC2012/ImageSets/Main/trainval.txt
@ -0,0 +1,5 @@
 2007_000027
 2007_000032
 2007_000033
 2007_000039
 2007_000042
--- a/tests/data/VOCdevkit/VOC2012/JPEGImages/2007_000027.jpg
+++ b/tests/data/VOCdevkit/VOC2012/JPEGImages/2007_000027.jpg
--- a/tests/data/VOCdevkit/VOC2012/JPEGImages/2007_000032.jpg
+++ b/tests/data/VOCdevkit/VOC2012/JPEGImages/2007_000032.jpg
--- a/tests/data/VOCdevkit/VOC2012/JPEGImages/2007_000033.jpg
+++ b/tests/data/VOCdevkit/VOC2012/JPEGImages/2007_000033.jpg
--- a/tests/data/VOCdevkit/VOC2012/JPEGImages/2007_000039.jpg
+++ b/tests/data/VOCdevkit/VOC2012/JPEGImages/2007_000039.jpg
--- a/tests/data/VOCdevkit/VOC2012/JPEGImages/2007_000042.jpg
+++ b/tests/data/VOCdevkit/VOC2012/JPEGImages/2007_000042.jpg
--- a/tests/data/coco_sample.json
+++ b/tests/data/coco_sample.json
@ -0,0 +1,77 @@
 {
    "images": [
        {
            "file_name": "fake1.jpg",
            "height": 800,
            "width": 800,
            "id": 0
        },
        {
            "file_name": "fake2.jpg",
            "height": 800,
            "width": 800,
            "id": 1
        },
        {
            "file_name": "fake3.jpg",
            "height": 800,
            "width": 800,
            "id": 2
        }
    ],
    "annotations": [
        {
            "bbox": [
                0,
                0,
                20,
                20
            ],
            "area": 400.00,
            "score": 1.0,
            "category_id": 1,
            "id": 1,
            "image_id": 0
        },
        {
            "bbox": [
                0,
                0,
                20,
                20
            ],
            "area": 400.00,
            "score": 1.0,
            "category_id": 2,
            "id": 2,
            "image_id": 0
        },
        {
            "bbox": [
                0,
                0,
                20,
                20
            ],
            "area": 400.00,
            "score": 1.0,
            "category_id": 1,
            "id": 3,
            "image_id": 1
        }
    ],
    "categories": [
        {
            "id": 1,
            "name": "bus",
            "supercategory": "none"
        },
        {
            "id": 2,
            "name": "car",
            "supercategory": "none"
        }
    ],
    "licenses": [],
    "info": null
 }
--- a/tests/data/color.jpg
+++ b/tests/data/color.jpg
--- a/tests/data/few_shot_coco_split/bus.json
+++ b/tests/data/few_shot_coco_split/bus.json
--- a/tests/data/few_shot_coco_split/car.json
+++ b/tests/data/few_shot_coco_split/car.json
--- a/tests/data/few_shot_coco_split/cat.json
+++ b/tests/data/few_shot_coco_split/cat.json
--- a/tests/data/few_shot_coco_split/dog.json
+++ b/tests/data/few_shot_coco_split/dog.json
--- a/tests/data/few_shot_coco_split/person.json
+++ b/tests/data/few_shot_coco_split/person.json
--- a/tests/data/few_shot_voc_split/1.txt
+++ b/tests/data/few_shot_voc_split/1.txt
@ -0,0 +1,10 @@
 VOC2007/JPEGImages/000001.jpg
 VOC2007/JPEGImages/000002.jpg
 VOC2007/JPEGImages/000003.jpg
 VOC2007/JPEGImages/000004.jpg
 VOC2007/JPEGImages/000005.jpg
 VOC2012/JPEGImages/2007_000027.jpg
 VOC2012/JPEGImages/2007_000032.jpg
 VOC2012/JPEGImages/2007_000033.jpg
 VOC2012/JPEGImages/2007_000039.jpg
 VOC2012/JPEGImages/2007_000042.jpg
--- a/tests/data/few_shot_voc_split/2.txt
+++ b/tests/data/few_shot_voc_split/2.txt
@ -0,0 +1,10 @@
 VOC2007/JPEGImages/000001.jpg
 VOC2007/JPEGImages/000002.jpg
 VOC2007/JPEGImages/000003.jpg
 VOC2007/JPEGImages/000004.jpg
 VOC2007/JPEGImages/000005.jpg
 VOC2012/JPEGImages/2007_000027.jpg
 VOC2012/JPEGImages/2007_000032.jpg
 VOC2012/JPEGImages/2007_000033.jpg
 VOC2012/JPEGImages/2007_000039.jpg
 VOC2012/JPEGImages/2007_000042.jpg
--- a/tests/data/few_shot_voc_split/3.txt
+++ b/tests/data/few_shot_voc_split/3.txt
@ -0,0 +1,10 @@
 VOC2007/JPEGImages/000001.jpg
 VOC2007/JPEGImages/000002.jpg
 VOC2007/JPEGImages/000003.jpg
 VOC2007/JPEGImages/000004.jpg
 VOC2007/JPEGImages/000005.jpg
 VOC2012/JPEGImages/2007_000027.jpg
 VOC2012/JPEGImages/2007_000032.jpg
 VOC2012/JPEGImages/2007_000033.jpg
 VOC2012/JPEGImages/2007_000039.jpg
 VOC2012/JPEGImages/2007_000042.jpg
--- a/tests/data/few_shot_voc_split/4.txt
+++ b/tests/data/few_shot_voc_split/4.txt
@ -0,0 +1,10 @@
 VOC2007/JPEGImages/000001.jpg
 VOC2007/JPEGImages/000002.jpg
 VOC2007/JPEGImages/000003.jpg
 VOC2007/JPEGImages/000004.jpg
 VOC2007/JPEGImages/000005.jpg
 VOC2012/JPEGImages/2007_000027.jpg
 VOC2012/JPEGImages/2007_000032.jpg
 VOC2012/JPEGImages/2007_000033.jpg
 VOC2012/JPEGImages/2007_000039.jpg
 VOC2012/JPEGImages/2007_000042.jpg
--- a/tests/data/few_shot_voc_split/5.txt
+++ b/tests/data/few_shot_voc_split/5.txt
@ -0,0 +1,10 @@
 VOC2007/JPEGImages/000001.jpg
 VOC2007/JPEGImages/000002.jpg
 VOC2007/JPEGImages/000003.jpg
 VOC2007/JPEGImages/000004.jpg
 VOC2007/JPEGImages/000005.jpg
 VOC2012/JPEGImages/2007_000027.jpg
 VOC2012/JPEGImages/2007_000032.jpg
 VOC2012/JPEGImages/2007_000033.jpg
 VOC2012/JPEGImages/2007_000039.jpg
 VOC2012/JPEGImages/2007_000042.jpg
--- a/tests/data/gray.jpg
+++ b/tests/data/gray.jpg
--- a/tests/test_detection_data/test_dataloader/test_dataloader.py
+++ b/tests/test_detection_data/test_dataloader/test_dataloader.py
@ -0,0 +1,247 @@
 import torch
 from mmfewshot.apis.train import set_random_seed
 from mmfewshot.detection.datasets.builder import (build_dataloader,
                                                  build_dataset)
 def test_dataloader():
    set_random_seed(2021)
    # test regular and few shot annotations
    dataconfigs = [{
        'type': 'NwayKshotDataset',
        'support_way': 5,
        'support_shot': 1,
        'dataset': {
            'type':
            'FewShotVOCDataset',
            'ann_file': [
                'tests/data/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt',
                'tests/data/VOCdevkit/VOC2012/ImageSets/Main/trainval.txt'
            ],
            'img_prefix': [
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
            ],
            'pipeline': {
                'query': [
                    dict(type='LoadImageFromFile'),
                    dict(type='LoadAnnotations', with_bbox=True),
                    dict(type='RandomFlip', flip_ratio=0.5),
                    dict(type='Pad', size_divisor=32),
                    dict(type='DefaultFormatBundle'),
                    dict(
                        type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
                ],
                'support': [
                    dict(type='LoadImageFromFile'),
                    dict(type='LoadAnnotations', with_bbox=True),
                    dict(type='RandomFlip', flip_ratio=0.5),
                    dict(type='Pad', size_divisor=32),
                    dict(type='DefaultFormatBundle'),
                    dict(
                        type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
                ]
            },
            'classes': ('person', 'dog', 'chair', 'car', 'aeroplane', 'train'),
            'merge_dataset':
            True
        }
    }, {
        'type': 'NwayKshotDataset',
        'support_way': 5,
        'support_shot': 1,
        'dataset': {
            'type':
            'FewShotVOCDataset',
            'ann_file': [
                'tests/data/few_shot_voc_split/1.txt',
                'tests/data/few_shot_voc_split/2.txt',
                'tests/data/few_shot_voc_split/3.txt',
                'tests/data/few_shot_voc_split/4.txt',
                'tests/data/few_shot_voc_split/5.txt'
            ],
            'img_prefix': [
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
            ],
            'ann_shot_filter': [{
                'person': 2
            }, {
                'dog': 2
            }, {
                'chair': 3
            }, {
                'car': 3
            }, {
                'aeroplane': 3
            }],
            'pipeline': {
                'query': [
                    dict(type='LoadImageFromFile'),
                    dict(type='LoadAnnotations', with_bbox=True),
                    dict(type='RandomFlip', flip_ratio=0.5),
                    dict(type='Pad', size_divisor=32),
                    dict(type='DefaultFormatBundle'),
                    dict(
                        type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
                ],
                'support': [
                    dict(type='LoadImageFromFile'),
                    dict(type='LoadAnnotations', with_bbox=True),
                    dict(type='RandomFlip', flip_ratio=0.5),
                    dict(type='Pad', size_divisor=32),
                    dict(type='DefaultFormatBundle'),
                    dict(
                        type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
                ]
            },
            'classes': ('person', 'dog', 'chair', 'car', 'aeroplane'),
            'merge_dataset':
            True
        }
    }]
    for dataconfig in dataconfigs:
        nway_kshot_dataset = build_dataset(cfg=dataconfig)
        nway_kshot_dataloader = build_dataloader(
            nway_kshot_dataset,
            samples_per_gpu=2,
            workers_per_gpu=0,
            num_gpus=1,
            dist=False,
            shuffle=True,
            seed=2021)
        for i, data_batch in enumerate(nway_kshot_dataloader):
            assert len(data_batch['query_data']['img_metas'].data[0]) == 2
            assert len(nway_kshot_dataloader.query_data_loader) == \
                   len(nway_kshot_dataloader.support_data_loader)
            support_labels = data_batch['support_data']['gt_labels'].data[0]
            assert len(set(torch.cat(
                support_labels).tolist())) == dataconfig['support_way']
            assert len(torch.cat(support_labels).tolist()) == \
                   dataconfig['support_way'] * dataconfig['support_shot']
    dataconfigs = [{
        'type': 'QueryAwareDataset',
        'support_way': 3,
        'support_shot': 5,
        'dataset': {
            'type':
            'FewShotVOCDataset',
            'ann_file': [
                'tests/data/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt',
                'tests/data/VOCdevkit/VOC2012/ImageSets/Main/trainval.txt'
            ],
            'img_prefix': [
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
            ],
            'pipeline': {
                'query': [
                    dict(type='LoadImageFromFile'),
                    dict(type='LoadAnnotations', with_bbox=True),
                    dict(type='RandomFlip', flip_ratio=0.5),
                    dict(type='Pad', size_divisor=32),
                    dict(type='DefaultFormatBundle'),
                    dict(
                        type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
                ],
                'support': [
                    dict(type='LoadImageFromFile'),
                    dict(type='LoadAnnotations', with_bbox=True),
                    dict(type='RandomFlip', flip_ratio=0.5),
                    dict(type='Pad', size_divisor=32),
                    dict(type='DefaultFormatBundle'),
                    dict(
                        type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
                ]
            },
            'classes': ('dog', 'chair', 'car'),
            'merge_dataset':
            True
        }
    }, {
        'type': 'QueryAwareDataset',
        'support_way': 3,
        'support_shot': 2,
        'dataset': {
            'type':
            'FewShotVOCDataset',
            'ann_file': [
                'tests/data/few_shot_voc_split/1.txt',
                'tests/data/few_shot_voc_split/2.txt',
                'tests/data/few_shot_voc_split/3.txt',
                'tests/data/few_shot_voc_split/4.txt',
                'tests/data/few_shot_voc_split/5.txt'
            ],
            'img_prefix': [
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
            ],
            'ann_shot_filter': [{
                'person': 1
            }, {
                'dog': 1
            }, {
                'chair': 2
            }, {
                'car': 2
            }, {
                'aeroplane': 2
            }],
            'pipeline': {
                'query': [
                    dict(type='LoadImageFromFile'),
                    dict(type='LoadAnnotations', with_bbox=True),
                    dict(type='RandomFlip', flip_ratio=0.5),
                    dict(type='Pad', size_divisor=32),
                    dict(type='DefaultFormatBundle'),
                    dict(
                        type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
                ],
                'support': [
                    dict(type='LoadImageFromFile'),
                    dict(type='LoadAnnotations', with_bbox=True),
                    dict(type='RandomFlip', flip_ratio=0.5),
                    dict(type='Pad', size_divisor=32),
                    dict(type='DefaultFormatBundle'),
                    dict(
                        type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
                ]
            },
            'classes': ('person', 'dog', 'chair', 'car', 'aeroplane'),
            'merge_dataset':
            True
        }
    }]
    for dataconfig in dataconfigs:
        query_aware_dataset = build_dataset(cfg=dataconfig)
        query_aware_dataloader = build_dataloader(
            query_aware_dataset,
            samples_per_gpu=2,
            workers_per_gpu=0,
            num_gpus=1,
            dist=False,
            shuffle=True,
            seed=2021)
        for i, data_batch in enumerate(query_aware_dataloader):
            assert len(data_batch['query_data']['img_metas'].data[0]) == 2
            assert len(data_batch['query_data']['query_class'].tolist()) == 2
            support_labels = data_batch['support_data']['gt_labels'].data[0]
            half_batch = len(support_labels) // 2
            assert len(set(torch.cat(support_labels[:half_batch]).tolist())) \
                   == dataconfig['support_way']
            assert len(set(torch.cat(support_labels[half_batch:]).tolist())) \
                   == dataconfig['support_way']
--- a/tests/test_detection_data/test_datasets/test_few_shot_coco_dataset.py
+++ b/tests/test_detection_data/test_datasets/test_few_shot_coco_dataset.py
@ -0,0 +1,49 @@
 from mmfewshot.apis.train import set_random_seed
 from mmfewshot.detection.datasets.coco import FewShotCocoDataset
 def test_few_shot_voc_dataset():
    set_random_seed(2021)
    # test regular annotation loading
    dataconfig = {
        'ann_file': 'tests/data/coco_sample.json',
        'img_prefix': '',
        'pipeline': {
            'query': [{
                'type': 'LoadImageFromFile'
            }],
            'support': [{
                'type': 'LoadImageFromFile'
            }]
        },
        'classes': ('bus', 'car')
    }
    few_shot_custom_dataset = FewShotCocoDataset(**dataconfig)
    # filter image without labels
    assert len(few_shot_custom_dataset.data_infos) == 2
    assert few_shot_custom_dataset.CLASSES == ('bus', 'car')
    # test loading annotation with specific class
    dataconfig = {
        'ann_file': 'tests/data/few_shot_coco_split/bus.json',
        'img_prefix': '',
        'ann_shot_filter': {
            'bus': 5
        },
        'pipeline': {
            'query': [{
                'type': 'LoadImageFromFile'
            }],
            'support': [{
                'type': 'LoadImageFromFile'
            }]
        },
        'classes': ('bus', 'dog', 'car'),
    }
    few_shot_custom_dataset = FewShotCocoDataset(**dataconfig)
    count = 0
    for datainfo in few_shot_custom_dataset.data_infos:
        count += len(datainfo['ann']['labels'])
        for i in range(len(datainfo['ann']['labels'])):
            assert datainfo['ann']['labels'][i] == 0
    assert count == 5
--- a/tests/test_detection_data/test_datasets/test_few_shot_custom_dataset.py
+++ b/tests/test_detection_data/test_datasets/test_few_shot_custom_dataset.py
@ -0,0 +1,112 @@
 import copy
 from unittest.mock import MagicMock, patch
 import numpy as np
 from mmfewshot.detection.datasets import FewShotCustomDataset
 data_infos = [
    {
        'id': '1',
        'filename': 'tests/data/VOCdevkit/VOC2007/JPEGImages/000001.jpg',
        'width': 800,
        'height': 720,
        'ann': {
            'bboxes': np.array([[10, 10, 100, 100], [20, 20, 200, 200]]),
            'labels': np.array([0, 1])
        }
    },
    {
        'id': '2',
        'filename': 'tests/data/VOCdevkit/VOC2007/JPEGImages/000002.jpg',
        'width': 800,
        'height': 720,
        'ann': {
            'bboxes': np.array([[11, 11, 100, 100], [20, 20, 200, 200]]),
            'labels': np.array([1, 1])
        }
    },
    {
        'id': '3',
        'filename': 'tests/data/VOCdevkit/VOC2007/JPEGImages/000003.jpg',
        'width': 800,
        'height': 720,
        'ann': {
            'bboxes':
            np.array([[11, 11, 100, 100], [20, 20, 200, 200],
                      [20, 20, 200, 200]]),
            'labels':
            np.array([2, 3, 3, 4])
        }
    },
    {
        'id': '4',
        'filename': 'tests/data/VOCdevkit/VOC2007/JPEGImages/000004.jpg',
        'width': 800,
        'height': 720,
        'ann': {
            'bboxes':
            np.array([[11, 11, 100, 100], [20, 20, 200, 200],
                      [20, 20, 200, 200], [20, 20, 200, 200]]),
            'labels':
            np.array([2, 2, 4, 4])
        }
    },
 ]
@patch('mmfewshot.detection.datasets.FewShotCustomDataset.load_annotations',
       MagicMock(return_value=data_infos))
 def test_few_shot_custom_dataset():
    dataconfig = {
        'ann_file': '',
        'img_prefix': '',
        'ann_shot_filter': {
            'cat': 10,
            'dog': 10,
            'person': 2,
            'car': 2,
        },
        'pipeline': {
            'query': [{
                'type': 'LoadImageFromFile'
            }],
            'support': [{
                'type': 'LoadImageFromFile'
            }]
        },
        'classes': ('cat', 'dog', 'person', 'car', 'bird')
    }
    few_shot_custom_dataset = FewShotCustomDataset(**dataconfig)
    original_data_infos = copy.deepcopy(few_shot_custom_dataset.data_infos)
    # test prepare_train_img()
    data = few_shot_custom_dataset.prepare_train_img(0, 'query')
    assert (data['img_info']['ann']['bboxes'] == np.array([[10, 10, 100, 100],
                                                           [20, 20, 200,
                                                            200]])).all()
    assert (data['img_info']['ann']['labels'] == np.array([0, 1])).all()
    data = few_shot_custom_dataset.prepare_train_img(1, 'support')
    assert (data['img_info']['ann']['bboxes'] == np.array([[11, 11, 100, 100],
                                                           [20, 20, 200,
                                                            200]])).all()
    assert (data['img_info']['ann']['labels'] == np.array([1, 1])).all()
    data = few_shot_custom_dataset.prepare_train_img(0, 'query', [0])
    assert (data['img_info']['ann']['bboxes'] == np.array([[10, 10, 100,
                                                            100]])).all()
    assert (data['img_info']['ann']['labels'] == np.array([0])).all()
    data = few_shot_custom_dataset.prepare_train_img(0, 'support', [1])
    assert (data['img_info']['ann']['bboxes'] == np.array([[20, 20, 200,
                                                            200]])).all()
    assert (data['img_info']['ann']['labels'] == np.array([1])).all()
    # test whether data_infos have been accidentally changed or not
    for i in range(len(few_shot_custom_dataset)):
        assert (original_data_infos[i]['ann']['bboxes'] ==
                few_shot_custom_dataset.data_infos[i]['ann']['bboxes']).all()
        assert (original_data_infos[i]['ann']['labels'] ==
                few_shot_custom_dataset.data_infos[i]['ann']['labels']).all()
--- a/tests/test_detection_data/test_datasets/test_few_shot_voc_dataset.py
+++ b/tests/test_detection_data/test_datasets/test_few_shot_voc_dataset.py
@ -0,0 +1,70 @@
 from mmfewshot.apis.train import set_random_seed
 from mmfewshot.detection.datasets.voc import FewShotVOCDataset
 def test_few_shot_voc_dataset():
    set_random_seed(2021)
    # test regular annotation loading
    dataconfig = {
        'ann_file': 'tests/data/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt',
        'img_prefix': 'tests/data/VOCdevkit/',
        'pipeline': {
            'query': [{
                'type': 'LoadImageFromFile'
            }],
            'support': [{
                'type': 'LoadImageFromFile'
            }]
        },
        'classes': ('car', 'dog', 'chair')
    }
    few_shot_custom_dataset = FewShotVOCDataset(**dataconfig)
    # filter image without labels
    assert len(few_shot_custom_dataset.data_infos) == 4
    assert few_shot_custom_dataset.CLASSES == ('car', 'dog', 'chair')
    # test loading annotation with specific class
    dataconfig = {
        'ann_file': 'tests/data/few_shot_voc_split/1.txt',
        'img_prefix': 'tests/data/VOCdevkit/',
        'ann_shot_filter': {
            'aeroplane': 10
        },
        'pipeline': {
            'query': [{
                'type': 'LoadImageFromFile'
            }],
            'support': [{
                'type': 'LoadImageFromFile'
            }]
        },
        'classes': ('car', 'dog', 'chair', 'aeroplane'),
    }
    few_shot_custom_dataset = FewShotVOCDataset(**dataconfig)
    count = 0
    for datainfo in few_shot_custom_dataset.data_infos:
        count += len(datainfo['ann']['bboxes'])
    assert count == 5
    # test loading annotation with specific class with specific shot
    dataconfig = {
        'ann_file': 'tests/data/few_shot_voc_split/1.txt',
        'img_prefix': 'tests/data/VOCdevkit/',
        'ann_shot_filter': {
            'aeroplane': 2
        },
        'pipeline': {
            'query': [{
                'type': 'LoadImageFromFile'
            }],
            'support': [{
                'type': 'LoadImageFromFile'
            }]
        },
        'classes': ('car', 'dog', 'chair', 'aeroplane'),
    }
    few_shot_custom_dataset = FewShotVOCDataset(**dataconfig)
    count = 0
    for datainfo in few_shot_custom_dataset.data_infos:
        count += len(datainfo['ann']['bboxes'])
    assert count == 2
--- a/tests/test_detection_data/test_datasets/test_merge_dataset.py
+++ b/tests/test_detection_data/test_datasets/test_merge_dataset.py
@ -0,0 +1,137 @@
 import numpy as np
 from mmfewshot.apis.train import set_random_seed
 from mmfewshot.detection.datasets.builder import build_dataset
 def test_merge_dataset():
    set_random_seed(2023)
    # test merge dataset load regular annotation
    dataconfig = {
        'type':
        'FewShotVOCDataset',
        'ann_file': [
            'tests/data/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt',
            'tests/data/VOCdevkit/VOC2012/ImageSets/Main/trainval.txt'
        ],
        'img_prefix': [
            'tests/data/VOCdevkit/',
            'tests/data/VOCdevkit/',
        ],
        'pipeline': {
            'query': [{
                'type': 'LoadImageFromFile'
            }],
            'support': [{
                'type': 'LoadImageFromFile'
            }]
        },
        'classes': ('person', 'dog', 'chair', 'car', 'aeroplane'),
        'merge_dataset':
        True
    }
    merge_dataset = build_dataset(cfg=dataconfig)
    count = [0 for _ in range(5)]
    for data_info in merge_dataset.dataset.data_infos:
        # test label merge
        if data_info['id'] == '000001':
            assert (np.sort(data_info['ann']['labels']) == np.array([0, 1
                                                                     ])).all()
        for label in data_info['ann']['labels']:
            count[label] += 1
    assert count == [4, 1, 4, 7, 5]
    # test merge dataset load annotation by class
    dataconfig = {
        'type':
        'FewShotVOCDataset',
        'ann_file': [
            'tests/data/few_shot_voc_split/1.txt',
            'tests/data/few_shot_voc_split/2.txt',
            'tests/data/few_shot_voc_split/3.txt',
            'tests/data/few_shot_voc_split/4.txt',
            'tests/data/few_shot_voc_split/5.txt'
        ],
        'img_prefix': [
            'tests/data/VOCdevkit/',
            'tests/data/VOCdevkit/',
            'tests/data/VOCdevkit/',
            'tests/data/VOCdevkit/',
            'tests/data/VOCdevkit/',
        ],
        'ann_shot_filter': [{
            'person': 2
        }, {
            'dog': 2
        }, {
            'chair': 3
        }, {
            'car': 3
        }, {
            'aeroplane': 3
        }],
        'pipeline': {
            'query': [{
                'type': 'LoadImageFromFile'
            }],
            'support': [{
                'type': 'LoadImageFromFile'
            }]
        },
        'classes': ('person', 'dog', 'chair', 'car', 'aeroplane'),
        'merge_dataset':
        True
    }
    merge_dataset = build_dataset(cfg=dataconfig)
    count = [0 for _ in range(5)]
    for data_info in merge_dataset.dataset.data_infos:
        # test label merge
        if data_info['id'] == '000001':
            assert (np.sort(data_info['ann']['labels']) == np.array([0, 1
                                                                     ])).all()
        for label in data_info['ann']['labels']:
            count[label] += 1
    assert count == [2, 1, 3, 3, 3]
    # test loading annotation with specific class with specific shot
    dataconfig = {
        'type':
        'FewShotCocoDataset',
        'ann_file': [
            'tests/data/few_shot_coco_split/bus.json',
            'tests/data/few_shot_coco_split/car.json',
            'tests/data/few_shot_coco_split/cat.json',
            'tests/data/few_shot_coco_split/dog.json',
            'tests/data/few_shot_coco_split/person.json',
        ],
        'img_prefix': ['', '', '', '', ''],
        'ann_shot_filter': [{
            'bus': 2
        }, {
            'car': 2
        }, {
            'cat': 3
        }, {
            'dog': 3
        }, {
            'person': 3
        }],
        'pipeline': {
            'query': [{
                'type': 'LoadImageFromFile'
            }],
            'support': [{
                'type': 'LoadImageFromFile'
            }]
        },
        'classes': ('bus', 'car', 'cat', 'dog', 'person'),
        'merge_dataset':
        True
    }
    merge_dataset = build_dataset(cfg=dataconfig)
    count = [0 for _ in range(5)]
    for data_info in merge_dataset.dataset.data_infos:
        # test label merge
        for label in data_info['ann']['labels']:
            count[label] += 1
    assert count == [2, 2, 3, 3, 3]
--- a/tests/test_detection_data/test_datasets/test_nway_kshot_dataset.py
+++ b/tests/test_detection_data/test_datasets/test_nway_kshot_dataset.py
@ -0,0 +1,145 @@
 import numpy as np
 from mmfewshot.apis.train import set_random_seed
 from mmfewshot.detection.datasets.builder import build_dataset
 def test_nway_kshot_dataset():
    set_random_seed(2021)
    # test regular and few shot annotations
    dataconfigs = [{
        'type': 'NwayKshotDataset',
        'support_way': 5,
        'support_shot': 1,
        'dataset': {
            'type':
            'FewShotVOCDataset',
            'ann_file': [
                'tests/data/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt',
                'tests/data/VOCdevkit/VOC2012/ImageSets/Main/trainval.txt'
            ],
            'img_prefix': [
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
            ],
            'pipeline': {
                'query': [{
                    'type': 'LoadImageFromFile'
                }],
                'support': [{
                    'type': 'LoadImageFromFile'
                }]
            },
            'classes': ('person', 'dog', 'chair', 'car', 'aeroplane'),
            'merge_dataset':
            True
        }
    }, {
        'type': 'NwayKshotDataset',
        'support_way': 5,
        'support_shot': 1,
        'dataset': {
            'type':
            'FewShotVOCDataset',
            'ann_file': [
                'tests/data/few_shot_voc_split/1.txt',
                'tests/data/few_shot_voc_split/2.txt',
                'tests/data/few_shot_voc_split/3.txt',
                'tests/data/few_shot_voc_split/4.txt',
                'tests/data/few_shot_voc_split/5.txt'
            ],
            'img_prefix': [
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
            ],
            'ann_shot_filter': [{
                'person': 2
            }, {
                'dog': 2
            }, {
                'chair': 3
            }, {
                'car': 3
            }, {
                'aeroplane': 3
            }],
            'pipeline': {
                'query': [{
                    'type': 'LoadImageFromFile'
                }],
                'support': [{
                    'type': 'LoadImageFromFile'
                }]
            },
            'classes': ('person', 'dog', 'chair', 'car', 'aeroplane'),
            'merge_dataset':
            True
        }
    }]
    for dataconfig in dataconfigs:
        # test query dataset with 5 way 1 shot
        nway_kshot_dataset = build_dataset(cfg=dataconfig)
        assert nway_kshot_dataset.data_type == 'query'
        assert np.sum(nway_kshot_dataset.flag) == 0
        assert isinstance(nway_kshot_dataset[0], dict)
        # test support dataset with 5 way 1 shot
        nway_kshot_dataset.convert_query_to_support(support_dataset_len=2)
        batch_index = nway_kshot_dataset.batch_index
        assert nway_kshot_dataset.data_type == 'support'
        assert nway_kshot_dataset.flag.shape[0] == 2
        assert len(batch_index) == 2
        assert len(batch_index[0]) == 5
        assert len(batch_index[0][0]) == 2
        # test batch of support dataset with 5 way 1 shot
        support_batch = nway_kshot_dataset[0]
        assert isinstance(support_batch, list)
        count_classes = [0 for _ in range(5)]
        for item in support_batch:
            count_classes[item['ann_info']['labels'][0]] += 1
        for count in count_classes:
            assert count == 1
        # test support dataset with 4 way 2 shot
        dataconfig['support_way'] = 4
        dataconfig['support_shot'] = 2
        nway_kshot_dataset = build_dataset(cfg=dataconfig)
        assert nway_kshot_dataset.data_type == 'query'
        assert np.sum(nway_kshot_dataset.flag) == 0
        assert isinstance(nway_kshot_dataset[0], dict)
        # test support dataset with 4 way 2 shot
        nway_kshot_dataset.convert_query_to_support(support_dataset_len=3)
        batch_index = nway_kshot_dataset.batch_index
        assert nway_kshot_dataset.data_type == 'support'
        assert nway_kshot_dataset.flag.shape[0] == 3
        assert len(batch_index) == 3
        assert len(batch_index[0]) == 4 * 2
        assert len(batch_index[0][0]) == 2
        for i in range(len(nway_kshot_dataset.CLASSES)):
            assert len(nway_kshot_dataset.data_infos_by_class[i]) >= 2
        # test batch of support dataset with 4 way 2 shot
        for idx in range(3):
            support_batch = nway_kshot_dataset[idx]
            assert isinstance(support_batch, list)
            count_classes = [0 for _ in range(5)]
            dog_ann = None
            for item in support_batch:
                label = item['ann_info']['labels'][0]
                count_classes[label] += 1
                # test whether dog label is repeat or not
                # (only one dog instance)
                if label == 1:
                    if dog_ann is None:
                        dog_ann = item['ann_info']['bboxes']
                    else:
                        assert (dog_ann == item['ann_info']['bboxes']).all()
            # test number of classes sampled
            # 4 class have 2 shots 1 class has 0 shot
            is_skip = False
            for count in count_classes:
                if count == 0:
                    assert not is_skip
                    is_skip = True
                else:
                    assert count == 2
--- a/tests/test_detection_data/test_datasets/test_query_aware_dataset.py
+++ b/tests/test_detection_data/test_datasets/test_query_aware_dataset.py
@ -0,0 +1,136 @@
 import numpy as np
 from mmfewshot.apis.train import set_random_seed
 from mmfewshot.detection.datasets.builder import build_dataset
 def test_query_aware_dataset():
    set_random_seed(2023)
    # test regular annotations
    dataconfig = {
        'type': 'QueryAwareDataset',
        'support_way': 3,
        'support_shot': 5,
        'dataset': {
            'type':
            'FewShotVOCDataset',
            'ann_file': [
                'tests/data/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt',
                'tests/data/VOCdevkit/VOC2012/ImageSets/Main/trainval.txt'
            ],
            'img_prefix': [
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
            ],
            'pipeline': {
                'query': [{
                    'type': 'LoadImageFromFile'
                }],
                'support': [{
                    'type': 'LoadImageFromFile'
                }]
            },
            'classes': ('dog', 'chair', 'car'),
            'merge_dataset':
            True
        }
    }
    # test query dataset with 5 way 2 shot
    query_aware_dataset = build_dataset(cfg=dataconfig)
    assert np.sum(query_aware_dataset.flag) == 0
    # print(query_aware_dataset.data_infos_by_class)
    # self.data_infos_by_class = {
    #  0: [(0, 0)],
    #  1: [(1, 0), (3, 0), (3, 1), (3, 2)],
    #  2: [(2, 0), (2, 1), (2, 2), (2, 3), (2, 4), (2, 5), (2, 6)]
    #  }
    assert query_aware_dataset.sample_support_shots(0, 0, True) == \
           [(0, 0), (0, 0), (0, 0), (0, 0), (0, 0)]
    support = query_aware_dataset.sample_support_shots(0, 1, False)
    assert len(set(support)) == 4
    support = query_aware_dataset.sample_support_shots(1, 1, False)
    assert len(set(support)) == 3
    support = query_aware_dataset.sample_support_shots(3, 1, False)
    assert len(set(support)) == 1
    support = query_aware_dataset.sample_support_shots(3, 2)
    assert len(set(support)) == 5
    support = query_aware_dataset.sample_support_shots(3, 0)
    assert len(set(support)) == 1
    dataconfig = {
        'type': 'QueryAwareDataset',
        'support_way': 3,
        'support_shot': 2,
        'dataset': {
            'type':
            'FewShotVOCDataset',
            'ann_file': [
                'tests/data/few_shot_voc_split/1.txt',
                'tests/data/few_shot_voc_split/2.txt',
                'tests/data/few_shot_voc_split/3.txt',
                'tests/data/few_shot_voc_split/4.txt',
                'tests/data/few_shot_voc_split/5.txt'
            ],
            'img_prefix': [
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
                'tests/data/VOCdevkit/',
            ],
            'ann_shot_filter': [{
                'person': 1
            }, {
                'dog': 1
            }, {
                'chair': 2
            }, {
                'car': 2
            }, {
                'aeroplane': 2
            }],
            'pipeline': {
                'query': [{
                    'type': 'LoadImageFromFile'
                }],
                'support': [{
                    'type': 'LoadImageFromFile'
                }]
            },
            'classes': ('person', 'dog', 'chair', 'car', 'aeroplane'),
            'merge_dataset':
            True
        }
    }
    query_aware_dataset = build_dataset(cfg=dataconfig)
    assert np.sum(query_aware_dataset.flag) == 0
    # print(query_aware_dataset.data_infos_by_class)
    # self.data_infos_by_class = {
    # 0: [(0, 0)],
    # 1: [(1, 0)],
    # 2: [(2, 0), (2, 1)],
    # 3: [(3, 0), (3, 1)],
    # 4: [(4, 0), (5, 0)]}
    assert query_aware_dataset.sample_support_shots(0, 0, True) == \
           [(0, 0), (0, 0)]
    support = query_aware_dataset.sample_support_shots(0, 1, False)
    assert len(set(support)) == 1
    support = query_aware_dataset.sample_support_shots(3, 0)
    assert len(set(support)) == 1
    assert len(support) == 2
    support = query_aware_dataset.sample_support_shots(3, 2)
    assert len(set(support)) == 2
    batch = query_aware_dataset[0]
    assert len(batch['support_data']) == 6
    assert batch['query_data']['ann_info']['labels'][0] == \
           batch['support_data'][0]['ann_info']['labels'][0]
    assert batch['query_data']['ann_info']['labels'][0] == \
           batch['support_data'][1]['ann_info']['labels'][0]
    assert batch['support_data'][2]['ann_info']['labels'][0] == \
           batch['support_data'][3]['ann_info']['labels'][0]
    assert batch['support_data'][4]['ann_info']['labels'][0] == \
           batch['support_data'][5]['ann_info']['labels'][0]
		`@ -0,0 +1,3 @@`
							`from .train import get_root_logger, set_random_seed, train_detector`

							`__all__ = ['get_root_logger', 'set_random_seed', 'train_detector']`
+_000027
+_000032
+_000033
+_000039
+_000042