Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
from appStore.prep_utils import get_client | |
from langchain_qdrant import FastEmbedSparse | |
from torch import cuda | |
from qdrant_client.http import models | |
from langchain_huggingface import HuggingFaceEmbeddings | |
device = 'cuda' if cuda.is_available() else 'cpu' | |
def hybrid_search(client, query, collection_name, limit=500): | |
embeddings = HuggingFaceEmbeddings( | |
model_name='BAAI/bge-m3', | |
model_kwargs={'device': device}, | |
encode_kwargs={'normalize_embeddings': True} | |
) | |
sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25") | |
# 1) Embed the query | |
q_dense = embeddings.embed_query(query) | |
q_sparse = sparse_embeddings.embed_query(query) | |
# 2) Request more than 10 items | |
results = client.search_batch( | |
collection_name=collection_name, | |
requests=[ | |
# Dense request | |
models.SearchRequest( | |
vector=models.NamedVector( | |
name="text-dense", | |
vector=q_dense, | |
), | |
limit=limit, # was 10, now uses the parameter | |
with_payload=True, | |
), | |
# Sparse request | |
models.SearchRequest( | |
vector=models.NamedSparseVector( | |
name="text-sparse", | |
vector=models.SparseVector( | |
indices=q_sparse.indices, | |
values=q_sparse.values, | |
), | |
), | |
limit=limit, # was 10, now uses the parameter | |
with_payload=True, | |
), | |
] | |
) | |
return results | |