Spaces:
Sleeping
Sleeping
File size: 1,353 Bytes
f66560f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
# preprocessed vectorstore retrieval
import boto3
from botocore import UNSIGNED
from botocore.client import Config
import zipfile
from langchain.vectorstores import FAISS
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
# access .env file
s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))
model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
#model_kwargs = {"device": "cuda"}
embeddings = HuggingFaceEmbeddings(
model_name=model_name,
# model_kwargs=model_kwargs
)
## FAISS
FAISS_INDEX_PATH='./vectorstore/lc-faiss-multi-mpnet-500-markdown'
VS_DESTINATION = FAISS_INDEX_PATH+".zip"
s3.download_file('rad-rag-demos', 'vectorstores/lc-faiss-multi-mpnet-500-markdown.zip', VS_DESTINATION)
with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
zip_ref.extractall('./vectorstore/')
faissdb = FAISS.load_local(FAISS_INDEX_PATH, embeddings)
## Chroma DB
chroma_directory="./vectorstore/lc-chroma-multi-mpnet-500-markdown"
VS_DESTINATION = chroma_directory+".zip"
s3.download_file('rad-rag-demos', 'vectorstores/lc-chroma-multi-mpnet-500-markdown.zip', VS_DESTINATION)
with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
zip_ref.extractall('./vectorstore/')
chromadb = Chroma(persist_directory=chroma_directory, embedding_function=embeddings)
chromadb.get() |