mmclassification/tools/dataset_converters/convert_flickr30k_ann.py
Yiqin Wang 王逸钦 6d7fe91a98
[Feature] Support Flickr30k Retrieval dataset (#1625)
* format

* remove abs path

* init add flickr30k caption

* remove abs dir

* update blip readme

* add convert sscripts

* minor

* minor
2023-06-19 15:15:03 +08:00

57 lines
1.7 KiB
Python

# Copyright (c) OpenMMLab. All rights reserved.
"""Create COCO-Style GT annotations based on raw annotation of Flickr30k.
GT annotations are used for evaluation in image caption task.
"""
import json
def main():
with open('dataset_flickr30k.json', 'r') as f:
annotations = json.load(f)
ann_list = []
img_list = []
splits = ['train', 'val', 'test']
for split in splits:
for img in annotations['images']:
# img_example={
# "sentids": [0, 1, 2],
# "imgid": 0,
# "sentences": [
# {"raw": "Two men in green shirts standing in a yard.",
# "imgid": 0, "sentid": 0},
# {"raw": "A man in a blue shirt standing in a garden.",
# "imgid": 0, "sentid": 1},
# {"raw": "Two friends enjoy time spent together.",
# "imgid": 0, "sentid": 2}
# ],
# "split": "train",
# "filename": "1000092795.jpg"
# },
if img['split'] != split:
continue
img_list.append({'id': img['imgid']})
for sentence in img['sentences']:
ann_info = {
'image_id': img['imgid'],
'id': sentence['sentid'],
'caption': sentence['raw']
}
ann_list.append(ann_info)
json_file = {'annotations': ann_list, 'images': img_list}
# generate flickr30k_train_gt.json, flickr30k_val_gt.json
# and flickr30k_test_gt.json
with open(f'flickr30k_{split}_gt.json', 'w') as f:
json.dump(json_file, f)
if __name__ == '__main__':
main()