File size: 1,353 Bytes
f66560f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# preprocessed vectorstore retrieval
import boto3
from botocore import UNSIGNED
from botocore.client import Config
import zipfile
from langchain.vectorstores import FAISS
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings

# access .env file

s3 = boto3.client('s3', config=Config(signature_version=UNSIGNED))

model_name = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
#model_kwargs = {"device": "cuda"}

embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
#    model_kwargs=model_kwargs
    )

## FAISS
FAISS_INDEX_PATH='./vectorstore/lc-faiss-multi-mpnet-500-markdown'
VS_DESTINATION = FAISS_INDEX_PATH+".zip"
s3.download_file('rad-rag-demos', 'vectorstores/lc-faiss-multi-mpnet-500-markdown.zip', VS_DESTINATION)
with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
    zip_ref.extractall('./vectorstore/')
faissdb = FAISS.load_local(FAISS_INDEX_PATH, embeddings)

## Chroma DB
chroma_directory="./vectorstore/lc-chroma-multi-mpnet-500-markdown"
VS_DESTINATION = chroma_directory+".zip"
s3.download_file('rad-rag-demos', 'vectorstores/lc-chroma-multi-mpnet-500-markdown.zip', VS_DESTINATION)
with zipfile.ZipFile(VS_DESTINATION, 'r') as zip_ref:
    zip_ref.extractall('./vectorstore/')
chromadb = Chroma(persist_directory=chroma_directory, embedding_function=embeddings)
chromadb.get()