Spaces:
Running
Running
Jatin Mehra
commited on
Commit
·
4a31622
1
Parent(s):
24b32e6
Enhance PDF chunking logic and add validation for chat requests to improve data integrity and user experience
Browse files
app.py
CHANGED
@@ -178,16 +178,7 @@ async def upload_pdf(
|
|
178 |
print("Warning: TAVILY_API_KEY is not set. Web search will not function.")
|
179 |
|
180 |
documents = process_pdf_file(file_path)
|
181 |
-
|
182 |
-
# The value 1500 might be too large if estimate_tokens is text_len // 4, as it means ~6000 characters.
|
183 |
-
# Let's use a smaller max_length for chunks for better granularity in RAG retrieval.
|
184 |
-
# For `bge-large-en-v1.5` (max sequence length 512 tokens), chunks around 250-400 tokens are often good.
|
185 |
-
# If estimate_tokens is len(text)//4, then max_length of 250 tokens is roughly 1000 characters.
|
186 |
-
# Let's use max_length=256 (tokens) for chunker config, so about 1024 characters.
|
187 |
-
# The chunk_text function uses max_length as character count / 4. So if we want 256 tokens, max_length = 256*4 = 1024
|
188 |
-
# However, the current chunk_text logic is `estimate_tokens(current_chunk + paragraph) <= max_length // 4`.
|
189 |
-
# This means `max_length` is already considered a token limit. So `max_length=256` (tokens) is the target.
|
190 |
-
chunks_with_metadata = chunk_text(documents, max_length=256) # max_length in tokens
|
191 |
|
192 |
embedding_model = SentenceTransformer('BAAI/bge-large-en-v1.5')
|
193 |
embeddings, _ = create_embeddings(chunks_with_metadata, embedding_model) # Chunks are already with metadata
|
@@ -222,11 +213,25 @@ async def upload_pdf(
|
|
222 |
# Route to chat with the document
|
223 |
@app.post("/chat")
|
224 |
async def chat(request: ChatRequest):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
225 |
session, found = load_session(request.session_id, model_name=request.model_name)
|
226 |
if not found:
|
227 |
raise HTTPException(status_code=404, detail="Session not found or expired. Please upload a document first.")
|
228 |
|
229 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
# Per-request memory to ensure chat history is correctly loaded for the agent
|
231 |
agent_memory = ConversationBufferMemory(memory_key="chat_history", input_key="input", return_messages=True)
|
232 |
for entry in session.get("chat_history", []):
|
@@ -237,15 +242,14 @@ async def chat(request: ChatRequest):
|
|
237 |
current_request_tools = []
|
238 |
|
239 |
# 1. Add the document-specific vector search tool
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
print(f"Warning: Session {request.session_id} missing data for vector_database_search tool.")
|
249 |
|
250 |
# 2. Conditionally add Tavily (web search) tool
|
251 |
if request.use_search:
|
@@ -270,6 +274,10 @@ async def chat(request: ChatRequest):
|
|
270 |
k=5 # Number of chunks for initial context
|
271 |
)
|
272 |
|
|
|
|
|
|
|
|
|
273 |
response = agentic_rag(
|
274 |
session["llm"],
|
275 |
current_request_tools, # Pass the dynamically assembled list of tools
|
@@ -280,6 +288,8 @@ async def chat(request: ChatRequest):
|
|
280 |
)
|
281 |
|
282 |
response_output = response.get("output", "Sorry, I could not generate a response.")
|
|
|
|
|
283 |
session["chat_history"].append({"user": request.query, "assistant": response_output})
|
284 |
save_session(request.session_id, session) # Save updated history and potentially other modified session state
|
285 |
|
|
|
178 |
print("Warning: TAVILY_API_KEY is not set. Web search will not function.")
|
179 |
|
180 |
documents = process_pdf_file(file_path)
|
181 |
+
chunks_with_metadata = chunk_text(documents, max_length=1000) # Increased from 256 to 1000 tokens for better context
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
182 |
|
183 |
embedding_model = SentenceTransformer('BAAI/bge-large-en-v1.5')
|
184 |
embeddings, _ = create_embeddings(chunks_with_metadata, embedding_model) # Chunks are already with metadata
|
|
|
213 |
# Route to chat with the document
|
214 |
@app.post("/chat")
|
215 |
async def chat(request: ChatRequest):
|
216 |
+
# Validate query
|
217 |
+
if not request.query or not request.query.strip():
|
218 |
+
raise HTTPException(status_code=400, detail="Query cannot be empty")
|
219 |
+
|
220 |
+
if len(request.query.strip()) < 3:
|
221 |
+
raise HTTPException(status_code=400, detail="Query must be at least 3 characters long")
|
222 |
+
|
223 |
session, found = load_session(request.session_id, model_name=request.model_name)
|
224 |
if not found:
|
225 |
raise HTTPException(status_code=404, detail="Session not found or expired. Please upload a document first.")
|
226 |
|
227 |
try:
|
228 |
+
# Validate session data integrity
|
229 |
+
required_keys = ["index", "chunks", "model", "llm"]
|
230 |
+
missing_keys = [key for key in required_keys if key not in session]
|
231 |
+
if missing_keys:
|
232 |
+
print(f"Warning: Session {request.session_id} missing required data: {missing_keys}")
|
233 |
+
raise HTTPException(status_code=500, detail="Session data is incomplete. Please upload the document again.")
|
234 |
+
|
235 |
# Per-request memory to ensure chat history is correctly loaded for the agent
|
236 |
agent_memory = ConversationBufferMemory(memory_key="chat_history", input_key="input", return_messages=True)
|
237 |
for entry in session.get("chat_history", []):
|
|
|
242 |
current_request_tools = []
|
243 |
|
244 |
# 1. Add the document-specific vector search tool
|
245 |
+
vector_search_tool_instance = create_vector_search_tool(
|
246 |
+
faiss_index=session["index"],
|
247 |
+
document_chunks_with_metadata=session["chunks"], # Pass the correct variable
|
248 |
+
embedding_model=session["model"], # This is the SentenceTransformer model
|
249 |
+
max_chunk_length=1000,
|
250 |
+
k=10
|
251 |
+
)
|
252 |
+
current_request_tools.append(vector_search_tool_instance)
|
|
|
253 |
|
254 |
# 2. Conditionally add Tavily (web search) tool
|
255 |
if request.use_search:
|
|
|
274 |
k=5 # Number of chunks for initial context
|
275 |
)
|
276 |
|
277 |
+
print(f"Query: '{request.query}' - Found {len(initial_similar_chunks)} initial chunks")
|
278 |
+
if initial_similar_chunks:
|
279 |
+
print(f"Best chunk score: {initial_similar_chunks[0][1]:.4f}")
|
280 |
+
|
281 |
response = agentic_rag(
|
282 |
session["llm"],
|
283 |
current_request_tools, # Pass the dynamically assembled list of tools
|
|
|
288 |
)
|
289 |
|
290 |
response_output = response.get("output", "Sorry, I could not generate a response.")
|
291 |
+
print(f"Generated response length: {len(response_output)} characters")
|
292 |
+
|
293 |
session["chat_history"].append({"user": request.query, "assistant": response_output})
|
294 |
save_session(request.session_id, session) # Save updated history and potentially other modified session state
|
295 |
|