Spaces:
Running
Running
# Copyright (c) Facebook, Inc. and its affiliates. | |
# | |
# This source code is licensed under the MIT license found in the | |
# LICENSE file in the root directory of this source tree. | |
from typing import List | |
import faiss | |
import logging | |
LOG = logging.getLogger(__name__) | |
def merge_ondisk( | |
trained_index: faiss.Index, shard_fnames: List[str], ivfdata_fname: str | |
) -> None: | |
"""Add the contents of the indexes stored in shard_fnames into the index | |
trained_index. The on-disk data is stored in ivfdata_fname""" | |
assert not isinstance( | |
trained_index, faiss.IndexIVFPQR | |
), "IndexIVFPQR is not supported as an on disk index." | |
# merge the images into an on-disk index | |
# first load the inverted lists | |
ivfs = [] | |
for fname in shard_fnames: | |
# the IO_FLAG_MMAP is to avoid actually loading the data thus | |
# the total size of the inverted lists can exceed the | |
# available RAM | |
LOG.info("read " + fname) | |
index = faiss.read_index(fname, faiss.IO_FLAG_MMAP) | |
index_ivf = faiss.extract_index_ivf(index) | |
ivfs.append(index_ivf.invlists) | |
# avoid that the invlists get deallocated with the index | |
index_ivf.own_invlists = False | |
# construct the output index | |
index = trained_index | |
index_ivf = faiss.extract_index_ivf(index) | |
assert index.ntotal == 0, "works only on empty index" | |
# prepare the output inverted lists. They will be written | |
# to merged_index.ivfdata | |
invlists = faiss.OnDiskInvertedLists( | |
index_ivf.nlist, index_ivf.code_size, ivfdata_fname | |
) | |
# merge all the inverted lists | |
ivf_vector = faiss.InvertedListsPtrVector() | |
for ivf in ivfs: | |
ivf_vector.push_back(ivf) | |
LOG.info("merge %d inverted lists " % ivf_vector.size()) | |
ntotal = invlists.merge_from(ivf_vector.data(), ivf_vector.size()) | |
# now replace the inverted lists in the output index | |
index.ntotal = index_ivf.ntotal = ntotal | |
index_ivf.replace_invlists(invlists, True) | |
invlists.this.disown() | |