realtime-rag-pipeline / retriever /embed_documents.py
Gourisankar Padihary
Changes for techqa data set
0ea6d19
raw
history blame
357 Bytes
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
def embed_documents(documents):
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")
vector_store = FAISS.from_texts([doc['text'] for doc in documents], embedding_model)
return vector_store