faiss/demos/offline_ivf/generate_config.py
Michael Norris eff0898a13 Enable linting: lint config changes plus arc lint command (#3966)
Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/3966

This actually enables the linting.

Manual changes:
- tools/arcanist/lint/fbsource-licenselint-config.toml
- tools/arcanist/lint/fbsource-lint-engine.toml

Automated changes:
`arc lint --apply-patches --take LICENSELINT --paths-cmd 'hg files faiss'`

Reviewed By: asadoughi

Differential Revision: D64484165

fbshipit-source-id: 4f2f6e953c94ef6ebfea8a5ae035ccfbea65ed04
2024-10-22 09:46:48 -07:00

47 lines
1.1 KiB
Python

# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import numpy as np
import os
import yaml
# with ssnpp sharded data
root = "/checkpoint/marialomeli/ssnpp_data"
file_names = [f"ssnpp_{i:010}.npy" for i in range(20)]
d = 256
dt = np.dtype(np.uint8)
def read_embeddings(fp):
fl = os.path.getsize(fp)
nb = fl // d // dt.itemsize
print(nb)
if fl == d * dt.itemsize * nb: # no header
return ("raw", np.memmap(fp, shape=(nb, d), dtype=dt, mode="r"))
else: # assume npy
vecs = np.load(fp, mmap_mode="r")
assert vecs.shape[1] == d
assert vecs.dtype == dt
return ("npy", vecs)
cfg = {}
files = []
size = 0
for fn in file_names:
fp = f"{root}/{fn}"
assert os.path.exists(fp), f"{fp} is missing"
ft, xb = read_embeddings(fp)
files.append(
{"name": fn, "size": xb.shape[0], "dtype": dt.name, "format": ft}
)
size += xb.shape[0]
cfg["size"] = size
cfg["root"] = root
cfg["d"] = d
cfg["files"] = files
print(yaml.dump(cfg))