Shreyas094 commited on
Commit
b43c062
·
verified ·
1 Parent(s): 9bac56d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -12
app.py CHANGED
@@ -18,8 +18,17 @@ import logging
18
  import shutil
19
 
20
 
21
- # Set up basic configuration for logging
22
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
 
 
 
 
 
 
 
 
23
 
24
  # Environment variables and configurations
25
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
@@ -48,21 +57,27 @@ llama_parser = LlamaParse(
48
  )
49
 
50
  def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[Document]:
51
- """Loads and splits the document into pages."""
52
  if parser == "pypdf":
53
  loader = PyPDFLoader(file.name)
54
- return loader.load_and_split()
55
  elif parser == "llamaparse":
56
  try:
57
  documents = llama_parser.load_data(file.name)
58
- return [Document(page_content=doc.text, metadata={"source": file.name}) for doc in documents]
59
  except Exception as e:
60
- print(f"Error using Llama Parse: {str(e)}")
61
- print("Falling back to PyPDF parser")
62
  loader = PyPDFLoader(file.name)
63
- return loader.load_and_split()
64
  else:
65
  raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
 
 
 
 
 
 
66
 
67
  def get_embeddings():
68
  return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
@@ -124,10 +139,14 @@ def update_vectors(files, parser):
124
  if os.path.exists("faiss_database"):
125
  logging.info("Updating existing FAISS database")
126
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
 
127
  database.add_documents(all_data)
 
 
128
  else:
129
  logging.info("Creating new FAISS database")
130
  database = FAISS.from_documents(all_data, embed)
 
131
 
132
  database.save_local("faiss_database")
133
  logging.info("FAISS database saved")
@@ -135,8 +154,8 @@ def update_vectors(files, parser):
135
  logging.error(f"Error updating FAISS database: {str(e)}")
136
  return f"Error updating vector store: {str(e)}", display_documents()
137
 
138
- # Save the updated list of documents
139
  save_documents(uploaded_documents)
 
140
 
141
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", display_documents()
142
 
@@ -309,6 +328,7 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
309
  logging.info(f"User Query: {message}")
310
  logging.info(f"Model Used: {model}")
311
  logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
 
312
 
313
  logging.info(f"Selected Documents: {selected_docs}")
314
 
@@ -455,6 +475,7 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
455
  if os.path.exists("faiss_database"):
456
  logging.info("Loading FAISS database")
457
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
 
458
  else:
459
  logging.warning("No FAISS database found")
460
  yield "No documents available. Please upload PDF documents to answer questions."
@@ -474,9 +495,9 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
474
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
475
  return
476
 
477
- for doc in filtered_docs:
478
- logging.info(f"Document source: {doc.metadata['source']}")
479
- logging.info(f"Document content preview: {doc.page_content[:100]}...") # Log first 100 characters of each document
480
 
481
  context_str = "\n".join([doc.page_content for doc in filtered_docs])
482
  logging.info(f"Total context length: {len(context_str)}")
 
18
  import shutil
19
 
20
 
21
+ logging.basicConfig(level=logging.DEBUG,
22
+ format='%(asctime)s - %(levelname)s - %(message)s',
23
+ filename='chatbot.log',
24
+ filemode='w')
25
+
26
+ # Also log to console
27
+ console = logging.StreamHandler()
28
+ console.setLevel(logging.INFO)
29
+ formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
30
+ console.setFormatter(formatter)
31
+ logging.getLogger('').addHandler(console)
32
 
33
  # Environment variables and configurations
34
  huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
 
57
  )
58
 
59
  def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[Document]:
60
+ logging.info(f"Loading document: {file.name} using parser: {parser}")
61
  if parser == "pypdf":
62
  loader = PyPDFLoader(file.name)
63
+ documents = loader.load_and_split()
64
  elif parser == "llamaparse":
65
  try:
66
  documents = llama_parser.load_data(file.name)
67
+ documents = [Document(page_content=doc.text, metadata={"source": file.name}) for doc in documents]
68
  except Exception as e:
69
+ logging.error(f"Error using Llama Parse: {str(e)}")
70
+ logging.info("Falling back to PyPDF parser")
71
  loader = PyPDFLoader(file.name)
72
+ documents = loader.load_and_split()
73
  else:
74
  raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
75
+
76
+ logging.info(f"Loaded {len(documents)} chunks from {file.name}")
77
+ for i, doc in enumerate(documents):
78
+ logging.debug(f"Chunk {i} content preview: {doc.page_content[:100]}...")
79
+
80
+ return documents
81
 
82
  def get_embeddings():
83
  return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
139
  if os.path.exists("faiss_database"):
140
  logging.info("Updating existing FAISS database")
141
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
142
+ initial_size = len(database.index)
143
  database.add_documents(all_data)
144
+ final_size = len(database.index)
145
+ logging.info(f"FAISS database updated. Initial size: {initial_size}, Final size: {final_size}")
146
  else:
147
  logging.info("Creating new FAISS database")
148
  database = FAISS.from_documents(all_data, embed)
149
+ logging.info(f"New FAISS database created with {len(database.index)} vectors")
150
 
151
  database.save_local("faiss_database")
152
  logging.info("FAISS database saved")
 
154
  logging.error(f"Error updating FAISS database: {str(e)}")
155
  return f"Error updating vector store: {str(e)}", display_documents()
156
 
 
157
  save_documents(uploaded_documents)
158
+ logging.info(f"Updated documents saved. Total documents: {len(uploaded_documents)}")
159
 
160
  return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", display_documents()
161
 
 
328
  logging.info(f"User Query: {message}")
329
  logging.info(f"Model Used: {model}")
330
  logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
331
+ logging.info(f"Selected Documents: {selected_docs}")
332
 
333
  logging.info(f"Selected Documents: {selected_docs}")
334
 
 
475
  if os.path.exists("faiss_database"):
476
  logging.info("Loading FAISS database")
477
  database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
478
+ logging.info(f"FAISS database loaded with {len(database.index)} vectors")
479
  else:
480
  logging.warning("No FAISS database found")
481
  yield "No documents available. Please upload PDF documents to answer questions."
 
495
  yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
496
  return
497
 
498
+ for i, doc in enumerate(filtered_docs):
499
+ logging.info(f"Document {i+1} source: {doc.metadata['source']}")
500
+ logging.info(f"Document {i+1} content preview: {doc.page_content[:100]}...")
501
 
502
  context_str = "\n".join([doc.page_content for doc in filtered_docs])
503
  logging.info(f"Total context length: {len(context_str)}")