diff --git a/datasets/coco_utils/create_t1_imageset.py b/datasets/coco_utils/create_t1_imageset.py new file mode 100644 index 0000000..4800025 --- /dev/null +++ b/datasets/coco_utils/create_t1_imageset.py @@ -0,0 +1,62 @@ +from pycocotools.coco import COCO +import numpy as np + +T1_COCO_CLASS_NAMES = [ + "airplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", + "chair", "cow", "dining table", "dog", "horse", "motorcycle", "person", + "potted plant", "sheep", "couch", "train", "tv" +] + +# Train +coco_annotation_file = '/home/datasets/mscoco/annotations/instances_train2017.json' +dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t1_train_sel.txt' + +coco_instance = COCO(coco_annotation_file) + +image_ids = [] +cls = [] +for index, image_id in enumerate(coco_instance.imgToAnns): + image_details = coco_instance.imgs[image_id] + classes = [coco_instance.cats[annotation['category_id']]['name'] for annotation in coco_instance.imgToAnns[image_id]] + if not set(classes).isdisjoint(T1_COCO_CLASS_NAMES): + image_ids.append(image_details['file_name'].split('.')[0]) + cls.extend(classes) + +(unique, counts) = np.unique(cls, return_counts=True) +print({x:y for x,y in zip(unique, counts)}) + +with open(dest_file, 'w') as file: + for image_id in image_ids: + file.write(str(image_id)+'\n') + +print('Created train file') + +# Test +coco_annotation_file = '/home/datasets/mscoco/annotations/instances_val2017.json' +dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t1_test.txt' + +coco_instance = COCO(coco_annotation_file) + +image_ids = [] +cls = [] +for index, image_id in enumerate(coco_instance.imgToAnns): + image_details = coco_instance.imgs[image_id] + classes = [coco_instance.cats[annotation['category_id']]['name'] for annotation in coco_instance.imgToAnns[image_id]] + if not set(classes).isdisjoint(T1_COCO_CLASS_NAMES): + image_ids.append(image_details['file_name'].split('.')[0]) + cls.extend(classes) + +(unique, counts) = np.unique(cls, return_counts=True) +print({x:y for x,y in zip(unique, counts)}) + +with open(dest_file, 'w') as file: + for image_id in image_ids: + file.write(str(image_id)+'\n') +print('Created test file') + +dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t1_test_unk.txt' +with open(dest_file, 'w') as file: + for image_id in image_ids: + file.write(str(image_id)+'\n') + +print('Created test_unk file') diff --git a/datasets/coco_utils/create_t2_imageset.py b/datasets/coco_utils/create_t2_imageset.py index a3f6743..5186486 100644 --- a/datasets/coco_utils/create_t2_imageset.py +++ b/datasets/coco_utils/create_t2_imageset.py @@ -9,8 +9,8 @@ T2_CLASS_NAMES = [ ] # Train -coco_annotation_file = '/home/joseph/workspace/datasets/mscoco/annotations/instances_train2017.json' -dest_file = '/home/joseph/workspace/OWOD/datasets/coco17_voc_style/ImageSets/t2_train.txt' +coco_annotation_file = '/home/datasets/mscoco/annotations/instances_train2017.json' +dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t2_train_sel.txt' coco_instance = COCO(coco_annotation_file) @@ -33,8 +33,8 @@ with open(dest_file, 'w') as file: print('Created train file') # Test -coco_annotation_file = '/home/joseph/workspace/datasets/mscoco/annotations/instances_val2017.json' -dest_file = '/home/joseph/workspace/OWOD/datasets/coco17_voc_style/ImageSets/t2_test.txt' +coco_annotation_file = '/home/datasets/mscoco/annotations/instances_val2017.json' +dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t2_test.txt' coco_instance = COCO(coco_annotation_file) @@ -55,7 +55,7 @@ with open(dest_file, 'w') as file: file.write(str(image_id)+'\n') print('Created test file') -dest_file = '/home/joseph/workspace/OWOD/datasets/coco17_voc_style/ImageSets/t2_test_unk.txt' +dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t2_test_unk.txt' with open(dest_file, 'w') as file: for image_id in image_ids: file.write(str(image_id)+'\n') diff --git a/datasets/coco_utils/create_t3_imageset.py b/datasets/coco_utils/create_t3_imageset.py index 8c790ad..03914f6 100644 --- a/datasets/coco_utils/create_t3_imageset.py +++ b/datasets/coco_utils/create_t3_imageset.py @@ -9,8 +9,8 @@ T3_CLASS_NAMES = [ ] # Train -coco_annotation_file = '/home/joseph/workspace/datasets/mscoco/annotations/instances_train2017.json' -dest_file = '/home/joseph/workspace/OWOD/datasets/coco17_voc_style/ImageSets/t3_train.txt' +coco_annotation_file = '/home/datasets/mscoco/annotations/instances_train2017.json' +dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t3_train_sel.txt' coco_instance = COCO(coco_annotation_file) @@ -33,8 +33,8 @@ with open(dest_file, 'w') as file: print('Created train file') # Test -coco_annotation_file = '/home/joseph/workspace/datasets/mscoco/annotations/instances_val2017.json' -dest_file = '/home/joseph/workspace/OWOD/datasets/coco17_voc_style/ImageSets/t3_test.txt' +coco_annotation_file = '/home/datasets/mscoco/annotations/instances_val2017.json' +dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t3_test.txt' coco_instance = COCO(coco_annotation_file) @@ -55,7 +55,7 @@ with open(dest_file, 'w') as file: file.write(str(image_id)+'\n') print('Created test file') -dest_file = '/home/joseph/workspace/OWOD/datasets/coco17_voc_style/ImageSets/t3_test_unk.txt' +dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t3_test_unk.txt' with open(dest_file, 'w') as file: for image_id in image_ids: file.write(str(image_id)+'\n') diff --git a/datasets/coco_utils/create_t4_imageset.py b/datasets/coco_utils/create_t4_imageset.py index f37ed4e..a855ee1 100644 --- a/datasets/coco_utils/create_t4_imageset.py +++ b/datasets/coco_utils/create_t4_imageset.py @@ -9,8 +9,8 @@ T4_CLASS_NAMES = [ ] # Train -coco_annotation_file = '/home/joseph/workspace/datasets/mscoco/annotations/instances_train2017.json' -dest_file = '/home/joseph/workspace/OWOD/datasets/coco17_voc_style/ImageSets/t4_train.txt' +coco_annotation_file = '/home/datasets/mscoco/annotations/instances_train2017.json' +dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t4_train_sel.txt' coco_instance = COCO(coco_annotation_file) @@ -33,8 +33,8 @@ with open(dest_file, 'w') as file: print('Created train file') # Test -coco_annotation_file = '/home/joseph/workspace/datasets/mscoco/annotations/instances_val2017.json' -dest_file = '/home/joseph/workspace/OWOD/datasets/coco17_voc_style/ImageSets/t4_test.txt' +coco_annotation_file = '/home/datasets/mscoco/annotations/instances_val2017.json' +dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t4_test.txt' coco_instance = COCO(coco_annotation_file) @@ -55,7 +55,7 @@ with open(dest_file, 'w') as file: file.write(str(image_id)+'\n') print('Created test file') -dest_file = '/home/joseph/workspace/OWOD/datasets/coco17_voc_style/ImageSets/t4_test_unk.txt' +dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t4_test_unk.txt' with open(dest_file, 'w') as file: for image_id in image_ids: file.write(str(image_id)+'\n') diff --git a/datasets/coco_utils/deduplicate.py b/datasets/coco_utils/deduplicate.py new file mode 100644 index 0000000..861a00c --- /dev/null +++ b/datasets/coco_utils/deduplicate.py @@ -0,0 +1,101 @@ +import sys +import numpy as np +import xml.etree.ElementTree as ET +from collections import Counter +import random + +t1_train_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/t1_train_sel.txt" +t2_train_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/t2_train_sel.txt" +t3_train_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/t2_train_sel.txt" +t4_train_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/t2_train_sel.txt" +all_val_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/all_task_val.txt" + + + +# Val set creation +# t1_val = random.sample(t1_list, 1000) +# print("t1_val:",len(t1_val)) +# t1_val_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/t1_val.txt" + +# with open(t1_val_file, 'w') as file: +# for image_id in t1_val: +# file.write(str(image_id)+'\n') +# print('Created file') + +# val_list = t1_val_list + t2_val_list + t3_val_list + t4_val_list +# new_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/all_task_val.txt" + +# with open(new_file, 'w') as file: +# for image_id in val_list: +# file.write(str(image_id)+'\n') +# print('Created file') + + + +# Training set deduplication +with open(t1_train_file, 'r') as t1_file: + t1_list = t1_file.read().splitlines() +print("t1_list:",len(t1_list)) + +with open(t2_train_file, 'r') as t2_file: + t2_list = t2_file.read().splitlines() +print("t2_list:",len(t2_list)) + +with open(t3_train_file, 'r') as t3_file: + t3_list = t3_file.read().splitlines() +print("t3_list:",len(t3_list)) + +with open(t4_train_file, 'r') as t4_file: + t4_list = t4_file.read().splitlines() +print("t4_list:",len(t4_list)) + + +with open(all_val_file, 'r') as val_file: + val_list = val_file.read().splitlines() +print("val_list:",len(val_list)) + + + +t1_train = [x for x in t1_list if x not in val_list] +print("t1_train:",len(t1_train)) + +t1_new_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/t1_train.txt" + +with open(t1_new_file, 'w') as file: + for image_id in t1_train: + file.write(str(image_id)+'\n') +print('Created file') + + +t2_train = [x for x in t2_list if x not in val_list] +print("t1_train:",len(t2_train)) + +t2_new_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/t2_train.txt" + +with open(t2_new_file, 'w') as file: + for image_id in t2_train: + file.write(str(image_id)+'\n') +print('Created file') + + +t3_train = [x for x in t3_list if x not in val_list] +print("t1_train:",len(t3_train)) + +t3_new_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/t3_train.txt" + +with open(t3_new_file, 'w') as file: + for image_id in t3_train: + file.write(str(image_id)+'\n') +print('Created file') + + +t4_train = [x for x in t4_list if x not in val_list] +print("t1_train:",len(t4_train)) + +t4_new_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/t4_train.txt" + +with open(t4_new_file, 'w') as file: + for image_id in t4_train: + file.write(str(image_id)+'\n') +print('Created file') +