from appStore.prep_utils import get_client from langchain_qdrant import FastEmbedSparse, RetrievalMode from torch import cuda from qdrant_client.http import models from langchain_huggingface import HuggingFaceEmbeddings # get the device to be used eithe gpu or cpu device = 'cuda' if cuda.is_available() else 'cpu' def hybrid_search(client, query, collection_name): embeddings = HuggingFaceEmbeddings( model_kwargs = {'device': device}, encode_kwargs = {'normalize_embeddings': True}, model_name='BAAI/bge-m3' ) sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25") # embed query q_dense = embeddings.embed_query(query) q_sparse = sparse_embeddings.embed_query(query) results = client.search_batch(collection_name=collection_name, requests=[ models.SearchRequest( vector=models.NamedVector( name="text-dense", vector=q_dense, ), limit=10, with_payload = True, ), models.SearchRequest( vector=models.NamedSparseVector( name="text-sparse", vector=models.SparseVector( indices=q_sparse.indices, values=q_sparse.values, ), ), limit=10, with_payload = True, ), ],) return results