Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
from langchain_community.embeddings import HuggingFaceEmbeddings, HuggingFaceInferenceAPIEmbeddings | |
from langchain_community.vectorstores import Qdrant | |
from langchain_qdrant import FastEmbedSparse, RetrievalMode | |
# get the device to be used eithe gpu or cpu | |
device = 'cuda' if cuda.is_available() else 'cpu' | |
def hybrid_embed_chunks(chunks): | |
""" | |
takes the chunks and does the hybrid embedding for the list of chunks | |
""" | |
embeddings = HuggingFaceEmbeddings( | |
model_kwargs = {'device': device}, | |
encode_kwargs = {'normalize_embeddings': True}, | |
model_name='BAAI/bge-m3' | |
) | |
sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25") | |
# placeholder for collection | |
print("starting embedding") | |
#qdrant_collections = {} | |
Qdrant.from_documents( | |
chunks, | |
embeddings, | |
sparse_embeddings = sparse_embeddings, | |
path="/data/local_qdrant", | |
collection_name='giz_worldwide', | |
retrieval_mode=RetrievalMode.HYBRID, | |
) | |
print(qdrant_collections) | |
print("vector embeddings done") |