diff --git a/datasets/coco_utils/create_t1_imageset.py b/datasets/coco_utils/create_t1_imageset.py
new file mode 100644
index 0000000..4800025
--- /dev/null
+++ b/datasets/coco_utils/create_t1_imageset.py
@@ -0,0 +1,62 @@
+from pycocotools.coco import COCO
+import numpy as np
+
+T1_COCO_CLASS_NAMES = [
+    "airplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
+    "chair", "cow", "dining table", "dog", "horse", "motorcycle", "person",
+    "potted plant", "sheep", "couch", "train", "tv"
+]
+
+# Train
+coco_annotation_file = '/home/datasets/mscoco/annotations/instances_train2017.json'
+dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t1_train_sel.txt'
+
+coco_instance = COCO(coco_annotation_file)
+
+image_ids = []
+cls = []
+for index, image_id in enumerate(coco_instance.imgToAnns):
+    image_details = coco_instance.imgs[image_id]
+    classes = [coco_instance.cats[annotation['category_id']]['name'] for annotation in coco_instance.imgToAnns[image_id]]
+    if not set(classes).isdisjoint(T1_COCO_CLASS_NAMES):
+        image_ids.append(image_details['file_name'].split('.')[0])
+        cls.extend(classes)
+
+(unique, counts) = np.unique(cls, return_counts=True)
+print({x:y for x,y in zip(unique, counts)})
+
+with open(dest_file, 'w') as file:
+    for image_id in image_ids:
+        file.write(str(image_id)+'\n')
+
+print('Created train file')
+
+# Test
+coco_annotation_file = '/home/datasets/mscoco/annotations/instances_val2017.json'
+dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t1_test.txt'
+
+coco_instance = COCO(coco_annotation_file)
+
+image_ids = []
+cls = []
+for index, image_id in enumerate(coco_instance.imgToAnns):
+    image_details = coco_instance.imgs[image_id]
+    classes = [coco_instance.cats[annotation['category_id']]['name'] for annotation in coco_instance.imgToAnns[image_id]]
+    if not set(classes).isdisjoint(T1_COCO_CLASS_NAMES):
+        image_ids.append(image_details['file_name'].split('.')[0])
+        cls.extend(classes)
+
+(unique, counts) = np.unique(cls, return_counts=True)
+print({x:y for x,y in zip(unique, counts)})
+
+with open(dest_file, 'w') as file:
+    for image_id in image_ids:
+        file.write(str(image_id)+'\n')
+print('Created test file')
+
+dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t1_test_unk.txt'
+with open(dest_file, 'w') as file:
+    for image_id in image_ids:
+        file.write(str(image_id)+'\n')
+
+print('Created test_unk file')
diff --git a/datasets/coco_utils/create_t2_imageset.py b/datasets/coco_utils/create_t2_imageset.py
index a3f6743..5186486 100644
--- a/datasets/coco_utils/create_t2_imageset.py
+++ b/datasets/coco_utils/create_t2_imageset.py
@@ -9,8 +9,8 @@ T2_CLASS_NAMES = [
 ]
 
 # Train
-coco_annotation_file = '/home/joseph/workspace/datasets/mscoco/annotations/instances_train2017.json'
-dest_file = '/home/joseph/workspace/OWOD/datasets/coco17_voc_style/ImageSets/t2_train.txt'
+coco_annotation_file = '/home/datasets/mscoco/annotations/instances_train2017.json'
+dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t2_train_sel.txt'
 
 coco_instance = COCO(coco_annotation_file)
 
@@ -33,8 +33,8 @@ with open(dest_file, 'w') as file:
 print('Created train file')
 
 # Test
-coco_annotation_file = '/home/joseph/workspace/datasets/mscoco/annotations/instances_val2017.json'
-dest_file = '/home/joseph/workspace/OWOD/datasets/coco17_voc_style/ImageSets/t2_test.txt'
+coco_annotation_file = '/home/datasets/mscoco/annotations/instances_val2017.json'
+dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t2_test.txt'
 
 coco_instance = COCO(coco_annotation_file)
 
@@ -55,7 +55,7 @@ with open(dest_file, 'w') as file:
         file.write(str(image_id)+'\n')
 print('Created test file')
 
-dest_file = '/home/joseph/workspace/OWOD/datasets/coco17_voc_style/ImageSets/t2_test_unk.txt'
+dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t2_test_unk.txt'
 with open(dest_file, 'w') as file:
     for image_id in image_ids:
         file.write(str(image_id)+'\n')
diff --git a/datasets/coco_utils/create_t3_imageset.py b/datasets/coco_utils/create_t3_imageset.py
index 8c790ad..03914f6 100644
--- a/datasets/coco_utils/create_t3_imageset.py
+++ b/datasets/coco_utils/create_t3_imageset.py
@@ -9,8 +9,8 @@ T3_CLASS_NAMES = [
 ]
 
 # Train
-coco_annotation_file = '/home/joseph/workspace/datasets/mscoco/annotations/instances_train2017.json'
-dest_file = '/home/joseph/workspace/OWOD/datasets/coco17_voc_style/ImageSets/t3_train.txt'
+coco_annotation_file = '/home/datasets/mscoco/annotations/instances_train2017.json'
+dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t3_train_sel.txt'
 
 coco_instance = COCO(coco_annotation_file)
 
@@ -33,8 +33,8 @@ with open(dest_file, 'w') as file:
 print('Created train file')
 
 # Test
-coco_annotation_file = '/home/joseph/workspace/datasets/mscoco/annotations/instances_val2017.json'
-dest_file = '/home/joseph/workspace/OWOD/datasets/coco17_voc_style/ImageSets/t3_test.txt'
+coco_annotation_file = '/home/datasets/mscoco/annotations/instances_val2017.json'
+dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t3_test.txt'
 
 coco_instance = COCO(coco_annotation_file)
 
@@ -55,7 +55,7 @@ with open(dest_file, 'w') as file:
         file.write(str(image_id)+'\n')
 print('Created test file')
 
-dest_file = '/home/joseph/workspace/OWOD/datasets/coco17_voc_style/ImageSets/t3_test_unk.txt'
+dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t3_test_unk.txt'
 with open(dest_file, 'w') as file:
     for image_id in image_ids:
         file.write(str(image_id)+'\n')
diff --git a/datasets/coco_utils/create_t4_imageset.py b/datasets/coco_utils/create_t4_imageset.py
index f37ed4e..a855ee1 100644
--- a/datasets/coco_utils/create_t4_imageset.py
+++ b/datasets/coco_utils/create_t4_imageset.py
@@ -9,8 +9,8 @@ T4_CLASS_NAMES = [
 ]
 
 # Train
-coco_annotation_file = '/home/joseph/workspace/datasets/mscoco/annotations/instances_train2017.json'
-dest_file = '/home/joseph/workspace/OWOD/datasets/coco17_voc_style/ImageSets/t4_train.txt'
+coco_annotation_file = '/home/datasets/mscoco/annotations/instances_train2017.json'
+dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t4_train_sel.txt'
 
 coco_instance = COCO(coco_annotation_file)
 
@@ -33,8 +33,8 @@ with open(dest_file, 'w') as file:
 print('Created train file')
 
 # Test
-coco_annotation_file = '/home/joseph/workspace/datasets/mscoco/annotations/instances_val2017.json'
-dest_file = '/home/joseph/workspace/OWOD/datasets/coco17_voc_style/ImageSets/t4_test.txt'
+coco_annotation_file = '/home/datasets/mscoco/annotations/instances_val2017.json'
+dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t4_test.txt'
 
 coco_instance = COCO(coco_annotation_file)
 
@@ -55,7 +55,7 @@ with open(dest_file, 'w') as file:
         file.write(str(image_id)+'\n')
 print('Created test file')
 
-dest_file = '/home/joseph/workspace/OWOD/datasets/coco17_voc_style/ImageSets/t4_test_unk.txt'
+dest_file = '/home/OWOD/datasets/coco17_voc_style/ImageSets/t4_test_unk.txt'
 with open(dest_file, 'w') as file:
     for image_id in image_ids:
         file.write(str(image_id)+'\n')
diff --git a/datasets/coco_utils/deduplicate.py b/datasets/coco_utils/deduplicate.py
new file mode 100644
index 0000000..861a00c
--- /dev/null
+++ b/datasets/coco_utils/deduplicate.py
@@ -0,0 +1,101 @@
+import sys
+import numpy as np
+import xml.etree.ElementTree as ET
+from collections import Counter
+import random
+
+t1_train_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/t1_train_sel.txt"
+t2_train_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/t2_train_sel.txt"
+t3_train_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/t2_train_sel.txt"
+t4_train_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/t2_train_sel.txt"
+all_val_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/all_task_val.txt"
+
+
+
+# Val set creation
+# t1_val = random.sample(t1_list, 1000)
+# print("t1_val:",len(t1_val))
+# t1_val_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/t1_val.txt"
+
+# with open(t1_val_file, 'w') as file:
+#     for image_id in t1_val:
+#         file.write(str(image_id)+'\n')
+# print('Created file')
+
+# val_list = t1_val_list + t2_val_list + t3_val_list + t4_val_list
+# new_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/all_task_val.txt"
+
+# with open(new_file, 'w') as file:
+#     for image_id in val_list:
+#         file.write(str(image_id)+'\n')
+# print('Created file')
+
+
+
+# Training set deduplication
+with open(t1_train_file, 'r') as t1_file:
+    t1_list = t1_file.read().splitlines()
+print("t1_list:",len(t1_list))
+
+with open(t2_train_file, 'r') as t2_file:
+    t2_list = t2_file.read().splitlines()
+print("t2_list:",len(t2_list)) 
+
+with open(t3_train_file, 'r') as t3_file:
+    t3_list = t3_file.read().splitlines()
+print("t3_list:",len(t3_list))
+
+with open(t4_train_file, 'r') as t4_file:
+    t4_list = t4_file.read().splitlines()
+print("t4_list:",len(t4_list))
+
+
+with open(all_val_file, 'r') as val_file:
+    val_list = val_file.read().splitlines()
+print("val_list:",len(val_list))
+
+
+
+t1_train = [x for x in t1_list if x not in val_list]
+print("t1_train:",len(t1_train))
+
+t1_new_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/t1_train.txt"
+
+with open(t1_new_file, 'w') as file:
+    for image_id in t1_train:
+        file.write(str(image_id)+'\n')
+print('Created file')
+
+
+t2_train = [x for x in t2_list if x not in val_list]
+print("t1_train:",len(t2_train))
+
+t2_new_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/t2_train.txt"
+
+with open(t2_new_file, 'w') as file:
+    for image_id in t2_train:
+        file.write(str(image_id)+'\n')
+print('Created file')
+
+
+t3_train = [x for x in t3_list if x not in val_list]
+print("t1_train:",len(t3_train))
+
+t3_new_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/t3_train.txt"
+
+with open(t3_new_file, 'w') as file:
+    for image_id in t3_train:
+        file.write(str(image_id)+'\n')
+print('Created file')
+
+
+t4_train = [x for x in t4_list if x not in val_list]
+print("t1_train:",len(t4_train))
+
+t4_new_file = "/home/OWOD/datasets/coco17_voc_style/ImageSets/t4_train.txt"
+
+with open(t4_new_file, 'w') as file:
+    for image_id in t4_train:
+        file.write(str(image_id)+'\n')
+print('Created file')
+