SearchGPT

Running

App Files Files Community

Shreyas094 commited on Aug 4, 2024

Commit

b43c062

verified ·

1 Parent(s): 9bac56d

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -12

app.py CHANGED Viewed

@@ -18,8 +18,17 @@ import logging
 import shutil
-# Set up basic configuration for logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Environment variables and configurations
 huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
@@ -48,21 +57,27 @@ llama_parser = LlamaParse(
 )
 def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[Document]:
-    """Loads and splits the document into pages."""
     if parser == "pypdf":
         loader = PyPDFLoader(file.name)
-        return loader.load_and_split()
     elif parser == "llamaparse":
         try:
             documents = llama_parser.load_data(file.name)
-            return [Document(page_content=doc.text, metadata={"source": file.name}) for doc in documents]
         except Exception as e:
-            print(f"Error using Llama Parse: {str(e)}")
-            print("Falling back to PyPDF parser")
             loader = PyPDFLoader(file.name)
-            return loader.load_and_split()
     else:
         raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
 def get_embeddings():
     return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
@@ -124,10 +139,14 @@ def update_vectors(files, parser):
         if os.path.exists("faiss_database"):
             logging.info("Updating existing FAISS database")
             database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
             database.add_documents(all_data)
         else:
             logging.info("Creating new FAISS database")
             database = FAISS.from_documents(all_data, embed)
         database.save_local("faiss_database")
         logging.info("FAISS database saved")
@@ -135,8 +154,8 @@ def update_vectors(files, parser):
         logging.error(f"Error updating FAISS database: {str(e)}")
         return f"Error updating vector store: {str(e)}", display_documents()
-    # Save the updated list of documents
     save_documents(uploaded_documents)
     return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", display_documents()
@@ -309,6 +328,7 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
     logging.info(f"User Query: {message}")
     logging.info(f"Model Used: {model}")
     logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
     logging.info(f"Selected Documents: {selected_docs}")
@@ -455,6 +475,7 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
     if os.path.exists("faiss_database"):
         logging.info("Loading FAISS database")
         database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
     else:
         logging.warning("No FAISS database found")
         yield "No documents available. Please upload PDF documents to answer questions."
@@ -474,9 +495,9 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
         yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
         return
-    for doc in filtered_docs:
-        logging.info(f"Document source: {doc.metadata['source']}")
-        logging.info(f"Document content preview: {doc.page_content[:100]}...")  # Log first 100 characters of each document
     context_str = "\n".join([doc.page_content for doc in filtered_docs])
     logging.info(f"Total context length: {len(context_str)}")

 import shutil
+logging.basicConfig(level=logging.DEBUG,
+                    format='%(asctime)s - %(levelname)s - %(message)s',
+                    filename='chatbot.log',
+                    filemode='w')
+# Also log to console
+console = logging.StreamHandler()
+console.setLevel(logging.INFO)
+formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+console.setFormatter(formatter)
+logging.getLogger('').addHandler(console)
 # Environment variables and configurations
 huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
 )
 def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[Document]:
+    logging.info(f"Loading document: {file.name} using parser: {parser}")
     if parser == "pypdf":
         loader = PyPDFLoader(file.name)
+        documents = loader.load_and_split()
     elif parser == "llamaparse":
         try:
             documents = llama_parser.load_data(file.name)
+            documents = [Document(page_content=doc.text, metadata={"source": file.name}) for doc in documents]
         except Exception as e:
+            logging.error(f"Error using Llama Parse: {str(e)}")
+            logging.info("Falling back to PyPDF parser")
             loader = PyPDFLoader(file.name)
+            documents = loader.load_and_split()
     else:
         raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
+    logging.info(f"Loaded {len(documents)} chunks from {file.name}")
+    for i, doc in enumerate(documents):
+        logging.debug(f"Chunk {i} content preview: {doc.page_content[:100]}...")
+    return documents
 def get_embeddings():
     return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
         if os.path.exists("faiss_database"):
             logging.info("Updating existing FAISS database")
             database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
+            initial_size = len(database.index)
             database.add_documents(all_data)
+            final_size = len(database.index)
+            logging.info(f"FAISS database updated. Initial size: {initial_size}, Final size: {final_size}")
         else:
             logging.info("Creating new FAISS database")
             database = FAISS.from_documents(all_data, embed)
+            logging.info(f"New FAISS database created with {len(database.index)} vectors")
         database.save_local("faiss_database")
         logging.info("FAISS database saved")
         logging.error(f"Error updating FAISS database: {str(e)}")
         return f"Error updating vector store: {str(e)}", display_documents()
     save_documents(uploaded_documents)
+    logging.info(f"Updated documents saved. Total documents: {len(uploaded_documents)}")
     return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", display_documents()
     logging.info(f"User Query: {message}")
     logging.info(f"Model Used: {model}")
     logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
+    logging.info(f"Selected Documents: {selected_docs}")
     logging.info(f"Selected Documents: {selected_docs}")
     if os.path.exists("faiss_database"):
         logging.info("Loading FAISS database")
         database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
+        logging.info(f"FAISS database loaded with {len(database.index)} vectors")
     else:
         logging.warning("No FAISS database found")
         yield "No documents available. Please upload PDF documents to answer questions."
         yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
         return
+    for i, doc in enumerate(filtered_docs):
+        logging.info(f"Document {i+1} source: {doc.metadata['source']}")
+        logging.info(f"Document {i+1} content preview: {doc.page_content[:100]}...")
     context_str = "\n".join([doc.page_content for doc in filtered_docs])
     logging.info(f"Total context length: {len(context_str)}")