mirror of
https://github.com/facebookresearch/faiss.git
synced 2025-06-03 21:54:02 +08:00
Summary: Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3966 This actually enables the linting. Manual changes: - tools/arcanist/lint/fbsource-licenselint-config.toml - tools/arcanist/lint/fbsource-lint-engine.toml Automated changes: `arc lint --apply-patches --take LICENSELINT --paths-cmd 'hg files faiss'` Reviewed By: asadoughi Differential Revision: D64484165 fbshipit-source-id: 4f2f6e953c94ef6ebfea8a5ae035ccfbea65ed04
47 lines
1.1 KiB
Python
47 lines
1.1 KiB
Python
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
#
|
|
# This source code is licensed under the MIT license found in the
|
|
# LICENSE file in the root directory of this source tree.
|
|
|
|
import numpy as np
|
|
import os
|
|
import yaml
|
|
|
|
# with ssnpp sharded data
|
|
root = "/checkpoint/marialomeli/ssnpp_data"
|
|
file_names = [f"ssnpp_{i:010}.npy" for i in range(20)]
|
|
d = 256
|
|
dt = np.dtype(np.uint8)
|
|
|
|
|
|
def read_embeddings(fp):
|
|
fl = os.path.getsize(fp)
|
|
nb = fl // d // dt.itemsize
|
|
print(nb)
|
|
if fl == d * dt.itemsize * nb: # no header
|
|
return ("raw", np.memmap(fp, shape=(nb, d), dtype=dt, mode="r"))
|
|
else: # assume npy
|
|
vecs = np.load(fp, mmap_mode="r")
|
|
assert vecs.shape[1] == d
|
|
assert vecs.dtype == dt
|
|
return ("npy", vecs)
|
|
|
|
|
|
cfg = {}
|
|
files = []
|
|
size = 0
|
|
for fn in file_names:
|
|
fp = f"{root}/{fn}"
|
|
assert os.path.exists(fp), f"{fp} is missing"
|
|
ft, xb = read_embeddings(fp)
|
|
files.append(
|
|
{"name": fn, "size": xb.shape[0], "dtype": dt.name, "format": ft}
|
|
)
|
|
size += xb.shape[0]
|
|
|
|
cfg["size"] = size
|
|
cfg["root"] = root
|
|
cfg["d"] = d
|
|
cfg["files"] = files
|
|
print(yaml.dump(cfg))
|