Spaces:

GIZ
/

GIZ-Project-Search

Running on CPU Upgrade

hybrid test

5170600 7 months ago

1.15 kB

	from langchain_community.embeddings import HuggingFaceEmbeddings, HuggingFaceInferenceAPIEmbeddings
	from langchain_community.vectorstores import Qdrant
	from langchain_qdrant import FastEmbedSparse, RetrievalMode
	# get the device to be used eithe gpu or cpu
	device = 'cuda' if cuda.is_available() else 'cpu'


	def hybrid_embed_chunks(chunks):
	"""
	takes the chunks and does the hybrid embedding for the list of chunks
	"""
	embeddings = HuggingFaceEmbeddings(
	model_kwargs = {'device': device},
	encode_kwargs = {'normalize_embeddings': True},
	model_name='BAAI/bge-m3'
	)
	sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
	# placeholder for collection
	print("starting embedding")
	#qdrant_collections = {}
	Qdrant.from_documents(
	chunks,
	embeddings,
	sparse_embeddings = sparse_embeddings,
	path="/data/local_qdrant",
	collection_name='giz_worldwide',
	retrieval_mode=RetrievalMode.HYBRID,
	)

	print(qdrant_collections)
	print("vector embeddings done")