From bddbc085fcd23895bb2dac31cf6ad95cce0b4853 Mon Sep 17 00:00:00 2001 From: Yuan Liu <30762564+YuanLiuuuuuu@users.noreply.github.com> Date: Tue, 6 Jun 2023 12:50:42 +0800 Subject: [PATCH] [Feature]: Add image_only param (#1613) * [Feature]: Add image_only param * [Feature]: Use image_only --- mmpretrain/datasets/scienceqa.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/mmpretrain/datasets/scienceqa.py b/mmpretrain/datasets/scienceqa.py index f0205a2c..8e442491 100644 --- a/mmpretrain/datasets/scienceqa.py +++ b/mmpretrain/datasets/scienceqa.py @@ -23,6 +23,8 @@ class ScienceQA(BaseDataset): split_file (str): The split file of dataset, which contains the ids of data samples in the split. ann_file (str): Annotation file path. + image_only (bool): Whether only to load data with image. Defaults to + False. data_prefix (dict): Prefix for data field. Defaults to ``dict(img_path='')``. pipeline (Sequence): Processing pipeline. Defaults to an empty tuple. @@ -34,15 +36,16 @@ class ScienceQA(BaseDataset): split: str, split_file: str, ann_file: str, + image_only: bool = False, data_prefix: dict = dict(img_path=''), pipeline: Sequence[Callable] = (), **kwargs): - assert split in [ 'train', 'val', 'test', 'trainval', 'minival', 'minitest' ], f'Invalid split {split}' self.split = split self.split_file = os.path.join(data_root, split_file) + self.image_only = image_only super().__init__( data_root=data_root, @@ -62,6 +65,8 @@ class ScienceQA(BaseDataset): data_list = [] for data_id in current_data_split: ann = annotations[data_id] + if self.image_only and ann['image'] is None: + continue data_info = { 'image_id': data_id,