mmclassification/mmpretrain/datasets/visual_genome.py

# Copyright (c) OpenMMLab. All rights reserved.
import re
from itertools import chain
from typing import List

import mmengine
from mmengine.dataset import BaseDataset

from mmpretrain.registry import DATASETS


@DATASETS.register_module()
class VisualGenomeQA(BaseDataset):
    """Visual Genome Question Answering dataset.

    dataset structure: ::

        data_root
        ├── image
        │   ├── 1.jpg
        │   ├── 2.jpg
        │   └── ...
        └── question_answers.json

    Args:
        data_root (str): The root directory for ``data_prefix``, ``ann_file``
            and ``question_file``.
        data_prefix (str): The directory of images. Defaults to ``"image"``.
        ann_file (str, optional): Annotation file path for training and
            validation. Defaults to ``"question_answers.json"``.
        **kwargs: Other keyword arguments in :class:`BaseDataset`.
    """

    def __init__(self,
                 data_root: str,
                 data_prefix: str = 'image',
                 ann_file: str = 'question_answers.json',
                 **kwarg):
        super().__init__(
            data_root=data_root,
            data_prefix=dict(img_path=data_prefix),
            ann_file=ann_file,
            **kwarg,
        )

    def _create_image_index(self):
        img_prefix = self.data_prefix['img_path']

        files = mmengine.list_dir_or_file(img_prefix, list_dir=False)
        image_index = {}
        for file in files:
            image_id = re.findall(r'\d+', file)
            if len(image_id) > 0:
                image_id = int(image_id[-1])
                image_index[image_id] = mmengine.join_path(img_prefix, file)

        return image_index

    def load_data_list(self) -> List[dict]:
        """Load data list."""
        annotations = mmengine.load(self.ann_file)

        # The original Visual Genome annotation file and question file includes
        # only image id but no image file paths.
        self.image_index = self._create_image_index()

        data_list = []
        for qas in chain.from_iterable(ann['qas'] for ann in annotations):
            # ann example
            # {
            #     'id': 1,
            #     'qas': [
            #         {
            #             'a_objects': [],
            #             'question': 'What color is the clock?',
            #             'image_id': 1,
            #             'qa_id': 986768,
            #             'answer': 'Two.',
            #             'q_objects': [],
            #         }
            #         ...
            #     ]
            # }

            data_info = {
                'img_path': self.image_index[qas['image_id']],
                'quesiton': qas['quesiton'],
                'question_id': qas['question_id'],
                'image_id': qas['image_id'],
                'gt_answer': [qas['answer']],
            }

            data_list.append(data_info)

        return data_list
-												[Feature] Support multiple multi-modal algorithms and inferencers. (#1561)

* [Feat] Migrate blip caption to mmpretrain. (#50)

* Migrate blip caption to mmpretrain

* minor fix

* support train

* [Feature] Support OFA caption task. (#51)

* [Feature] Support OFA caption task.

* Remove duplicated files.

* [Feature] Support OFA vqa task. (#58)

* [Feature] Support OFA vqa task.

* Fix lint.

* [Feat] Add BLIP retrieval to mmpretrain. (#55)

* init

* minor fix for train

* fix according to comments

* refactor

* Update Blip retrieval. (#62)

* [Feature] Support OFA visual grounding task. (#59)

* [Feature] Support OFA visual grounding task.

* minor add TODO

---------

Co-authored-by: yingfhu <yingfhu@gmail.com>

* [Feat] Add flamingos coco caption and vqa. (#60)

* first init

* init flamingo coco

* add vqa

* minor fix

* remove unnecessary modules

* Update config

* Use `ApplyToList`.

---------

Co-authored-by: mzr1996 <mzr1996@163.com>

* [Feature]: BLIP2 coco retrieval  (#53)

* [Feature]: Add blip2 retriever

* [Feature]: Add blip2 all modules

* [Feature]: Refine model

* [Feature]: x1

* [Feature]: Runnable coco ret

* [Feature]: Runnable version

* [Feature]: Fix lint

* [Fix]: Fix lint

* [Feature]: Use 364 img size

* [Feature]: Refactor blip2

* [Fix]: Fix lint

* refactor files

* minor fix

* minor fix

---------

Co-authored-by: yingfhu <yingfhu@gmail.com>

* Remove

* fix blip caption inputs (#68)

* [Feat] Add BLIP NLVR support. (#67)

* first init

* init flamingo coco

* add vqa

* add nlvr

* refactor nlvr

* minor fix

* minor fix

* Update dataset

---------

Co-authored-by: mzr1996 <mzr1996@163.com>

* [Feature]: BLIP2 Caption (#70)

* [Feature]: Add language model

* [Feature]: blip2 caption forward

* [Feature]: Reproduce the results

* [Feature]: Refactor caption

* refine config

---------

Co-authored-by: yingfhu <yingfhu@gmail.com>

* [Feat] Migrate BLIP VQA to mmpretrain (#69)

* reformat

* change

* change

* change

* change

* change

* change

* change

* change

* change

* change

* change

* change

* change

* change

* change

* change

* change

* change

* change

* refactor code

---------

Co-authored-by: yingfhu <yingfhu@gmail.com>

* Update RefCOCO dataset

* [Fix] fix lint

* [Feature] Implement inference APIs for multi-modal tasks. (#65)

* [Feature] Implement inference APIs for multi-modal tasks.

* [Project] Add gradio demo.

* [Improve] Update requirements

* Update flamingo

* Update blip

* Add NLVR inferencer

* Update flamingo

* Update hugging face model register

* Update ofa vqa

* Update BLIP-vqa (#71)

* Update blip-vqa docstring (#72)

* Refine flamingo docstring (#73)

* [Feature]: BLIP2 VQA (#61)

* [Feature]: VQA forward

* [Feature]: Reproduce accuracy

* [Fix]: Fix lint

* [Fix]: Add blank line

* minor fix

---------

Co-authored-by: yingfhu <yingfhu@gmail.com>

* [Feature]: BLIP2 docstring (#74)

* [Feature]: Add caption docstring

* [Feature]: Add docstring to blip2 vqa

* [Feature]: Add docstring to retrieval

* Update BLIP-2 metafile and README (#75)

* [Feature]: Add readme and docstring

* Update blip2 results

---------

Co-authored-by: mzr1996 <mzr1996@163.com>

* [Feature] BLIP Visual Grounding on MMPretrain Branch (#66)

* blip grounding merge with mmpretrain

* remove commit

* blip grounding test and inference api

* refcoco dataset

* refcoco dataset refine config

* rebasing

* gitignore

* rebasing

* minor edit

* minor edit

* Update blip-vqa docstring (#72)

* rebasing

* Revert "minor edit"

This reverts commit 639cec757c215e654625ed0979319e60f0be9044.

* blip grounding final

* precommit

* refine config

* refine config

* Update blip visual grounding

---------

Co-authored-by: Yiqin Wang 王逸钦 <wyq1217@outlook.com>
Co-authored-by: mzr1996 <mzr1996@163.com>

* Update visual grounding metric

* Update OFA docstring, README and metafiles. (#76)

* [Docs] Update installation docs and gradio demo docs. (#77)

* Update OFA name

* Update Visual Grounding Visualizer

* Integrate accelerate support

* Fix imports.

* Fix timm backbone

* Update imports

* Update README

* Update circle ci

* Update flamingo config

* Add gradio demo README

* [Feature]: Add scienceqa (#1571)

* [Feature]: Add scienceqa

* [Feature]: Change param name

* Update docs

* Update video

---------

Co-authored-by: Hubert <42952108+yingfhu@users.noreply.github.com>
Co-authored-by: yingfhu <yingfhu@gmail.com>
Co-authored-by: Yuan Liu <30762564+YuanLiuuuuuu@users.noreply.github.com>
Co-authored-by: Yiqin Wang 王逸钦 <wyq1217@outlook.com>
Co-authored-by: Rongjie Li <limo97@163.com>
											
										
										
											2023-05-19 16:50:04 +08:00
+								# Copyright (c) OpenMMLab. All rights reserved.
 								import re
 								from itertools import chain
 								from typing import List
 								import mmengine
 								from mmengine.dataset import BaseDataset
 								from mmpretrain.registry import DATASETS
 								@DATASETS.register_module()
 								class VisualGenomeQA(BaseDataset):
 								    """Visual Genome Question Answering dataset.
 								    dataset structure: ::
 								        data_root
 								        ├── image
 								        │   ├── 1.jpg
 								        │   ├── 2.jpg
 								        │   └── ...
 								        └── question_answers.json
 								    Args:
 								        data_root (str): The root directory for ``data_prefix``, ``ann_file``
 								            and ``question_file``.
 								        data_prefix (str): The directory of images. Defaults to ``"image"``.
 								        ann_file (str, optional): Annotation file path for training and
 								            validation. Defaults to ``"question_answers.json"``.
 								        **kwargs: Other keyword arguments in :class:`BaseDataset`.
 								    """
 								    def __init__(self,
 								                 data_root: str,
 								                 data_prefix: str = 'image',
 								                 ann_file: str = 'question_answers.json',
 								                 **kwarg):
 								        super().__init__(
 								            data_root=data_root,
 								            data_prefix=dict(img_path=data_prefix),
 								            ann_file=ann_file,
 								            **kwarg,
 								        )
 								    def _create_image_index(self):
 								        img_prefix = self.data_prefix['img_path']
 								        files = mmengine.list_dir_or_file(img_prefix, list_dir=False)
 								        image_index = {}
 								        for file in files:
 								            image_id = re.findall(r'\d+', file)
 								            if len(image_id) > 0:
 								                image_id = int(image_id[-1])
 								                image_index[image_id] = mmengine.join_path(img_prefix, file)
 								        return image_index
 								    def load_data_list(self) -> List[dict]:
 								        """Load data list."""
 								        annotations = mmengine.load(self.ann_file)
 								        # The original Visual Genome annotation file and question file includes
 								        # only image id but no image file paths.
 								        self.image_index = self._create_image_index()
 								        data_list = []
 								        for qas in chain.from_iterable(ann['qas'] for ann in annotations):
 								            # ann example
 								            # {
 								            #     'id': 1,
 								            #     'qas': [
 								            #         {
 								            #             'a_objects': [],
 								            #             'question': 'What color is the clock?',
 								            #             'image_id': 1,
 								            #             'qa_id': 986768,
 								            #             'answer': 'Two.',
 								            #             'q_objects': [],
 								            #         }
 								            #         ...
 								            #     ]
 								            # }
 								            data_info = {
 								                'img_path': self.image_index[qas['image_id']],
 								                'quesiton': qas['quesiton'],
 								                'question_id': qas['question_id'],
 								                'image_id': qas['image_id'],
 								                'gt_answer': [qas['answer']],
 								            }
 								            data_list.append(data_info)
 								        return data_list