Adding embedding column to dataset descriptor (#3736)

Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3736

Nit - adding embedding column in dataset descriptor
Nit - initializing cached_ds as part of class instead of post_init

Reviewed By: satymish

Differential Revision: D60858496

fbshipit-source-id: 3358d866a0668424cd6895bc7a5c620ff97e72fa
pull/3749/head
Kumar Saurabh Arora 2024-08-09 17:07:36 -07:00 committed by Facebook GitHub Bot
parent ac18577482
commit 290464f23b
2 changed files with 4 additions and 4 deletions

View File

@ -10,7 +10,7 @@ import logging
import os
import pickle
from dataclasses import dataclass
from typing import Any, List, Optional
from typing import Any, Dict, List, Optional
from zipfile import ZipFile
import faiss # @manual=//faiss/python:pyfaiss_gpu
@ -46,13 +46,11 @@ def merge_rcq_itq(
@dataclass
class BenchmarkIO:
path: str
cached_ds: Dict[Any, Any] = {}
def clone(self):
return BenchmarkIO(path=self.path)
def __post_init__(self):
self.cached_ds = {}
# TODO(kuarora): rename it as get_local_file
def get_local_filename(self, filename):
if len(filename) > 184:

View File

@ -78,6 +78,8 @@ class DatasetDescriptor:
# number of vectors to load from the dataset
num_vectors: Optional[int] = None
embedding_column: Optional[str] = None
def __hash__(self):
return hash(self.get_filename())