Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -18,8 +18,17 @@ import logging
|
|
18 |
import shutil
|
19 |
|
20 |
|
21 |
-
|
22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
# Environment variables and configurations
|
25 |
huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
|
@@ -48,21 +57,27 @@ llama_parser = LlamaParse(
|
|
48 |
)
|
49 |
|
50 |
def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[Document]:
|
51 |
-
"
|
52 |
if parser == "pypdf":
|
53 |
loader = PyPDFLoader(file.name)
|
54 |
-
|
55 |
elif parser == "llamaparse":
|
56 |
try:
|
57 |
documents = llama_parser.load_data(file.name)
|
58 |
-
|
59 |
except Exception as e:
|
60 |
-
|
61 |
-
|
62 |
loader = PyPDFLoader(file.name)
|
63 |
-
|
64 |
else:
|
65 |
raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
def get_embeddings():
|
68 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
@@ -124,10 +139,14 @@ def update_vectors(files, parser):
|
|
124 |
if os.path.exists("faiss_database"):
|
125 |
logging.info("Updating existing FAISS database")
|
126 |
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
|
|
127 |
database.add_documents(all_data)
|
|
|
|
|
128 |
else:
|
129 |
logging.info("Creating new FAISS database")
|
130 |
database = FAISS.from_documents(all_data, embed)
|
|
|
131 |
|
132 |
database.save_local("faiss_database")
|
133 |
logging.info("FAISS database saved")
|
@@ -135,8 +154,8 @@ def update_vectors(files, parser):
|
|
135 |
logging.error(f"Error updating FAISS database: {str(e)}")
|
136 |
return f"Error updating vector store: {str(e)}", display_documents()
|
137 |
|
138 |
-
# Save the updated list of documents
|
139 |
save_documents(uploaded_documents)
|
|
|
140 |
|
141 |
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", display_documents()
|
142 |
|
@@ -309,6 +328,7 @@ def respond(message, history, model, temperature, num_calls, use_web_search, sel
|
|
309 |
logging.info(f"User Query: {message}")
|
310 |
logging.info(f"Model Used: {model}")
|
311 |
logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
|
|
|
312 |
|
313 |
logging.info(f"Selected Documents: {selected_docs}")
|
314 |
|
@@ -455,6 +475,7 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
|
|
455 |
if os.path.exists("faiss_database"):
|
456 |
logging.info("Loading FAISS database")
|
457 |
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
|
|
458 |
else:
|
459 |
logging.warning("No FAISS database found")
|
460 |
yield "No documents available. Please upload PDF documents to answer questions."
|
@@ -474,9 +495,9 @@ def get_response_from_pdf(query, model, selected_docs, num_calls=3, temperature=
|
|
474 |
yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
|
475 |
return
|
476 |
|
477 |
-
for doc in filtered_docs:
|
478 |
-
logging.info(f"Document source: {doc.metadata['source']}")
|
479 |
-
logging.info(f"Document content preview: {doc.page_content[:100]}...")
|
480 |
|
481 |
context_str = "\n".join([doc.page_content for doc in filtered_docs])
|
482 |
logging.info(f"Total context length: {len(context_str)}")
|
|
|
18 |
import shutil
|
19 |
|
20 |
|
21 |
+
logging.basicConfig(level=logging.DEBUG,
|
22 |
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
23 |
+
filename='chatbot.log',
|
24 |
+
filemode='w')
|
25 |
+
|
26 |
+
# Also log to console
|
27 |
+
console = logging.StreamHandler()
|
28 |
+
console.setLevel(logging.INFO)
|
29 |
+
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
30 |
+
console.setFormatter(formatter)
|
31 |
+
logging.getLogger('').addHandler(console)
|
32 |
|
33 |
# Environment variables and configurations
|
34 |
huggingface_token = os.environ.get("HUGGINGFACE_TOKEN")
|
|
|
57 |
)
|
58 |
|
59 |
def load_document(file: NamedTemporaryFile, parser: str = "llamaparse") -> List[Document]:
|
60 |
+
logging.info(f"Loading document: {file.name} using parser: {parser}")
|
61 |
if parser == "pypdf":
|
62 |
loader = PyPDFLoader(file.name)
|
63 |
+
documents = loader.load_and_split()
|
64 |
elif parser == "llamaparse":
|
65 |
try:
|
66 |
documents = llama_parser.load_data(file.name)
|
67 |
+
documents = [Document(page_content=doc.text, metadata={"source": file.name}) for doc in documents]
|
68 |
except Exception as e:
|
69 |
+
logging.error(f"Error using Llama Parse: {str(e)}")
|
70 |
+
logging.info("Falling back to PyPDF parser")
|
71 |
loader = PyPDFLoader(file.name)
|
72 |
+
documents = loader.load_and_split()
|
73 |
else:
|
74 |
raise ValueError("Invalid parser specified. Use 'pypdf' or 'llamaparse'.")
|
75 |
+
|
76 |
+
logging.info(f"Loaded {len(documents)} chunks from {file.name}")
|
77 |
+
for i, doc in enumerate(documents):
|
78 |
+
logging.debug(f"Chunk {i} content preview: {doc.page_content[:100]}...")
|
79 |
+
|
80 |
+
return documents
|
81 |
|
82 |
def get_embeddings():
|
83 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
|
|
139 |
if os.path.exists("faiss_database"):
|
140 |
logging.info("Updating existing FAISS database")
|
141 |
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
142 |
+
initial_size = len(database.index)
|
143 |
database.add_documents(all_data)
|
144 |
+
final_size = len(database.index)
|
145 |
+
logging.info(f"FAISS database updated. Initial size: {initial_size}, Final size: {final_size}")
|
146 |
else:
|
147 |
logging.info("Creating new FAISS database")
|
148 |
database = FAISS.from_documents(all_data, embed)
|
149 |
+
logging.info(f"New FAISS database created with {len(database.index)} vectors")
|
150 |
|
151 |
database.save_local("faiss_database")
|
152 |
logging.info("FAISS database saved")
|
|
|
154 |
logging.error(f"Error updating FAISS database: {str(e)}")
|
155 |
return f"Error updating vector store: {str(e)}", display_documents()
|
156 |
|
|
|
157 |
save_documents(uploaded_documents)
|
158 |
+
logging.info(f"Updated documents saved. Total documents: {len(uploaded_documents)}")
|
159 |
|
160 |
return f"Vector store updated successfully. Processed {total_chunks} chunks from {len(files)} files using {parser}.", display_documents()
|
161 |
|
|
|
328 |
logging.info(f"User Query: {message}")
|
329 |
logging.info(f"Model Used: {model}")
|
330 |
logging.info(f"Search Type: {'Web Search' if use_web_search else 'PDF Search'}")
|
331 |
+
logging.info(f"Selected Documents: {selected_docs}")
|
332 |
|
333 |
logging.info(f"Selected Documents: {selected_docs}")
|
334 |
|
|
|
475 |
if os.path.exists("faiss_database"):
|
476 |
logging.info("Loading FAISS database")
|
477 |
database = FAISS.load_local("faiss_database", embed, allow_dangerous_deserialization=True)
|
478 |
+
logging.info(f"FAISS database loaded with {len(database.index)} vectors")
|
479 |
else:
|
480 |
logging.warning("No FAISS database found")
|
481 |
yield "No documents available. Please upload PDF documents to answer questions."
|
|
|
495 |
yield "No relevant information found in the selected documents. Please try selecting different documents or rephrasing your query."
|
496 |
return
|
497 |
|
498 |
+
for i, doc in enumerate(filtered_docs):
|
499 |
+
logging.info(f"Document {i+1} source: {doc.metadata['source']}")
|
500 |
+
logging.info(f"Document {i+1} content preview: {doc.page_content[:100]}...")
|
501 |
|
502 |
context_str = "\n".join([doc.page_content for doc in filtered_docs])
|
503 |
logging.info(f"Total context length: {len(context_str)}")
|