Spaces:

Shreyas094
/

SearchGPT

Sleeping

App Files Files Community

Shreyas094 commited on Jul 6, 2024

Commit

ee5661b

verified ·

1 Parent(s): 8f325c3

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -7

app.py CHANGED Viewed

@@ -230,19 +230,24 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search):
     model = get_model(temperature, top_p, repetition_penalty)
     embed = get_embeddings()
-    # Check if the FAISS database exists, if not create an empty one
     if os.path.exists("faiss_database"):
         database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
     else:
-        database = FAISS.from_documents([], embed)
-        database.save_local("faiss_database")
     if web_search:
         search_results = google_search(question)
         web_docs = [Document(page_content=result["text"], metadata={"source": result["link"]}) for result in search_results if result["text"]]
-        # Add web search results to the existing database
-        database.add_documents(web_docs)
         database.save_local("faiss_database")
         context_str = "\n".join([doc.page_content for doc in web_docs])
@@ -258,6 +263,9 @@ def ask_question(question, temperature, top_p, repetition_penalty, web_search):
         prompt_val = ChatPromptTemplate.from_template(prompt_template)
         formatted_prompt = prompt_val.format(context=context_str, question=question)
     else:
         history_str = "\n".join([f"Q: {item['question']}\nA: {item['answer']}" for item in conversation_history])
         if is_related_to_history(question, conversation_history):
@@ -290,15 +298,24 @@ def update_vectors(files, use_recursive_splitter):
     embed = get_embeddings()
     total_chunks = 0
     for file in files:
         if use_recursive_splitter:
             data = load_and_split_document_recursive(file)
         else:
             data = load_and_split_document_basic(file)
-        create_or_update_database(data, embed)
         total_chunks += len(data)
-    return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files."
 def extract_db_to_excel():
     embed = get_embeddings()

     model = get_model(temperature, top_p, repetition_penalty)
     embed = get_embeddings()
+    # Check if the FAISS database exists
     if os.path.exists("faiss_database"):
         database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
     else:
+        # If no database exists, we'll create it with the first web search or document upload
+        database = None
     if web_search:
         search_results = google_search(question)
         web_docs = [Document(page_content=result["text"], metadata={"source": result["link"]}) for result in search_results if result["text"]]
+        if database is None:
+            # Create the database with web search results if it doesn't exist
+            database = FAISS.from_documents(web_docs, embed)
+        else:
+            # Add web search results to the existing database
+            database.add_documents(web_docs)
         database.save_local("faiss_database")
         context_str = "\n".join([doc.page_content for doc in web_docs])
         prompt_val = ChatPromptTemplate.from_template(prompt_template)
         formatted_prompt = prompt_val.format(context=context_str, question=question)
     else:
+        if database is None:
+            return "No documents or web search results available. Please upload documents or enable web search."
         history_str = "\n".join([f"Q: {item['question']}\nA: {item['answer']}" for item in conversation_history])
         if is_related_to_history(question, conversation_history):
     embed = get_embeddings()
     total_chunks = 0
+    all_data = []
     for file in files:
         if use_recursive_splitter:
             data = load_and_split_document_recursive(file)
         else:
             data = load_and_split_document_basic(file)
+        all_data.extend(data)
         total_chunks += len(data)
+    if os.path.exists("faiss_database"):
+        database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
+        database.add_documents(all_data)
+    else:
+        database = FAISS.from_documents(all_data, embed)
+    database.save_local("faiss_database")
+    return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files.""
 def extract_db_to_excel():
     embed = get_embeddings()