Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -101,18 +101,38 @@ def get_text_chunks(pages):
|
|
101 |
|
102 |
|
103 |
|
104 |
-
def get_vectorstore(text_chunks : list) -> FAISS:
|
105 |
-
model = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
|
106 |
-
encode_kwargs = {
|
107 |
-
"normalize_embeddings": True
|
108 |
-
} # set True to compute cosine similarity
|
109 |
-
embeddings = HuggingFaceBgeEmbeddings(
|
110 |
-
model_name=model, encode_kwargs=encode_kwargs, model_kwargs={"device": "cpu"}
|
111 |
-
)
|
112 |
-
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
return vectorstore
|
114 |
|
115 |
|
|
|
|
|
116 |
def get_conversation_chain(vectorstore:FAISS) -> ConversationalRetrievalChain:
|
117 |
# llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
|
118 |
llm = HuggingFaceHub(
|
|
|
101 |
|
102 |
|
103 |
|
104 |
+
#def get_vectorstore(text_chunks : list) -> FAISS:
|
105 |
+
# model = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
|
106 |
+
# encode_kwargs = {
|
107 |
+
# "normalize_embeddings": True
|
108 |
+
# } # set True to compute cosine similarity
|
109 |
+
# embeddings = HuggingFaceBgeEmbeddings(
|
110 |
+
# model_name=model, encode_kwargs=encode_kwargs, model_kwargs={"device": "cpu"}
|
111 |
+
# )
|
112 |
+
# vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
|
113 |
+
# return vectorstore
|
114 |
+
|
115 |
+
|
116 |
+
def get_vectorstore(text_chunks):
|
117 |
+
"""
|
118 |
+
Generate a vector store from a list of text chunks using HuggingFace BgeEmbeddings.
|
119 |
+
Parameters
|
120 |
+
----------
|
121 |
+
text_chunks : list
|
122 |
+
List of text chunks to be embedded.
|
123 |
+
Returns
|
124 |
+
-------
|
125 |
+
FAISS
|
126 |
+
A FAISS vector store containing the embeddings of the text chunks.
|
127 |
+
"""
|
128 |
+
MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
|
129 |
+
hf_embeddings = HuggingFaceEmbeddings(model_name=MODEL_NAME)
|
130 |
+
vectorstore = Chroma.from_documents(text_chunks, hf_embeddings, persist_directory="db")
|
131 |
return vectorstore
|
132 |
|
133 |
|
134 |
+
|
135 |
+
|
136 |
def get_conversation_chain(vectorstore:FAISS) -> ConversationalRetrievalChain:
|
137 |
# llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
|
138 |
llm = HuggingFaceHub(
|