Spaces:

jatinmehra
/

PDF-Insight-PRO

Running

App Files Files Community

Jatin Mehra commited on May 23

Commit

24b32e6

1 Parent(s): 63ed7c1

Refactor session management and enhance PDF upload handling for improved error handling and model consistency

Browse files

Files changed (1) hide show

app.py +101 -88

app.py CHANGED Viewed

@@ -18,9 +18,11 @@ from preprocessing import (
     build_faiss_index,
     retrieve_similar_chunks,
     agentic_rag,
-    tools
 )
 from sentence_transformers import SentenceTransformer
 # Load environment variables
 dotenv.load_dotenv()
@@ -57,50 +59,44 @@ class SessionRequest(BaseModel):
 # Function to save session data
 def save_session(session_id, data):
-    sessions[session_id] = data
-    # Create a copy of data that is safe to pickle
     pickle_safe_data = {
         "file_path": data.get("file_path"),
         "file_name": data.get("file_name"),
-        "chunks": data.get("chunks"),
         "chat_history": data.get("chat_history", [])
     }
-    # Persist to disk
     with open(f"{UPLOAD_DIR}/{session_id}_session.pkl", "wb") as f:
         pickle.dump(pickle_safe_data, f)
 # Function to load session data
-def load_session(session_id, model_name="meta-llama/llama-4-scout-17b-16e-instruct"):
     try:
-        # Check if session is already in memory
         if session_id in sessions:
-            # Ensure the LLM in the cached session matches the requested model_name
-            # If not, update it. This handles cases where model_name might change for an existing session.
-            if sessions[session_id].get("llm") is None or sessions[session_id]["llm"].model_name != model_name:
-                try:
-                    sessions[session_id]["llm"] = model_selection(model_name)
-                except Exception as e:
-                    print(f"Error updating LLM for in-memory session {session_id} to {model_name}: {str(e)}")
-                    # Decide if this is a critical error; for now, we'll proceed with the old LLM or handle as error
-                    # For simplicity, if LLM update fails, we might want to indicate session load failure or use existing.
-                    # Here, we'll let it proceed, but this could be a point of further refinement.
-            return sessions[session_id], True
-        # Try to load from disk
         file_path_pkl = f"{UPLOAD_DIR}/{session_id}_session.pkl"
         if os.path.exists(file_path_pkl):
             with open(file_path_pkl, "rb") as f:
-                data = pickle.load(f)  # This is pickle_safe_data
-            # Recreate non-pickled objects
-            # Ensure 'chunks' and 'file_path' (for the original PDF) are present in the loaded data
-            # and the original PDF file still exists.
             original_pdf_path = data.get("file_path")
             if data.get("chunks") and original_pdf_path and os.path.exists(original_pdf_path):
                 embedding_model_instance = SentenceTransformer('BAAI/bge-large-en-v1.5')
-                # data["chunks"] is already the list of dicts: {text: ..., metadata: ...}
                 recreated_embeddings, _ = create_embeddings(data["chunks"], embedding_model_instance)
                 recreated_index = build_faiss_index(recreated_embeddings)
                 recreated_llm = model_selection(model_name)
@@ -108,25 +104,23 @@ def load_session(session_id, model_name="meta-llama/llama-4-scout-17b-16e-instru
                 full_session_data = {
                     "file_path": original_pdf_path,
                     "file_name": data.get("file_name"),
-                    "chunks": data.get("chunks"),  # These are chunks_with_metadata
                     "chat_history": data.get("chat_history", []),
                     "model": embedding_model_instance,    # SentenceTransformer model
                     "index": recreated_index,            # FAISS index
                     "llm": recreated_llm                 # LLM
                 }
-                sessions[session_id] = full_session_data  # Store in memory cache
                 return full_session_data, True
             else:
-                # If essential data for reconstruction is missing from pickle or the original PDF is gone
-                print(f"Warning: Session data for {session_id} is incomplete or its PDF file '{original_pdf_path}' is missing. Cannot reconstruct session.")
-                # Optionally, remove the stale .pkl file
-                # os.remove(file_path_pkl)
                 return None, False
-        return None, False  # Session not in memory and not found on disk, or reconstruction failed
     except Exception as e:
         print(f"Error loading session {session_id}: {str(e)}")
-        print(traceback.format_exc()) # Print full traceback for debugging
         return None, False
 # Function to remove PDF file
@@ -168,44 +162,46 @@ async def read_root():
 @app.post("/upload-pdf")
 async def upload_pdf(
     file: UploadFile = File(...),
-    model_name: str = Form("meta-llama/llama-4-scout-17b-16e-instruct")
 ):
-    # Generate a unique session ID
     session_id = str(uuid.uuid4())
     file_path = None
     try:
-        # Save the uploaded file
         file_path = f"{UPLOAD_DIR}/{session_id}_{file.filename}"
         with open(file_path, "wb") as buffer:
             shutil.copyfileobj(file.file, buffer)
-        # Check if API keys are set
-        if not os.getenv("GROQ_API_KEY"):
-            raise ValueError("GROQ_API_KEY is not set in the environment variables")
-        # Process the PDF
-        documents = process_pdf_file(file_path)  # Returns list of Document objects
-        chunks = chunk_text(documents, max_length=1500)  # Updated to handle documents
-        # Create embeddings
-        model = SentenceTransformer('BAAI/bge-large-en-v1.5')  # Updated embedding model
-        embeddings, chunks_with_metadata = create_embeddings(chunks, model)  # Unpack tuple
-        # Build FAISS index
-        index = build_faiss_index(embeddings)  # Pass only embeddings array
-        # Initialize LLM
         llm = model_selection(model_name)
-        # Save session data
         session_data = {
             "file_path": file_path,
             "file_name": file.filename,
-            "chunks": chunks_with_metadata,  # Store chunks with metadata
-            "model": model,
-            "index": index,
-            "llm": llm,
             "chat_history": []
         }
         save_session(session_id, session_data)
@@ -213,71 +209,89 @@ async def upload_pdf(
         return {"status": "success", "session_id": session_id, "message": f"Processed {file.filename}"}
     except Exception as e:
-        # Clean up on error
         if file_path and os.path.exists(file_path):
             os.remove(file_path)
         error_msg = str(e)
         stack_trace = traceback.format_exc()
-        print(f"Error processing PDF: {error_msg}")
-        print(f"Stack trace: {stack_trace}")
         return JSONResponse(
-            status_code=400,
-            content={
-                "status": "error",
-                "detail": error_msg,
-                "type": type(e).__name__
-            }
         )
 # Route to chat with the document
 @app.post("/chat")
 async def chat(request: ChatRequest):
-    # Try to load session if not in memory
     session, found = load_session(request.session_id, model_name=request.model_name)
     if not found:
-        raise HTTPException(status_code=404, detail="Session not found. Please upload a document first.")
     try:
-        from langchain.memory import ConversationBufferMemory
-        agent_memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
         for entry in session.get("chat_history", []):
             agent_memory.chat_memory.add_user_message(entry["user"])
             agent_memory.chat_memory.add_ai_message(entry["assistant"])
-        # Retrieve similar chunks
-        similar_chunks = retrieve_similar_chunks(
             request.query,
             session["index"],
-            session["chunks"],
-            session["model"],
-            k=10
         )
-        # Generate response using agentic_rag
         response = agentic_rag(
             session["llm"],
-            tools,
             query=request.query,
-            context_chunks=similar_chunks,  # Pass the list of tuples
-            Use_Tavily=request.use_search,
             memory=agent_memory
         )
-        # Update chat history
-        session["chat_history"].append({"user": request.query, "assistant": response["output"]})
-        save_session(request.session_id, session)
         return {
             "status": "success",
-            "answer": response["output"],
-            "context_used": [{"text": chunk, "score": float(score)} for chunk, score, _ in similar_chunks]
         }
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"Error processing query: {str(e)}")
@@ -330,5 +344,4 @@ async def get_models():
 # Run the application if this file is executed directly
 if __name__ == "__main__":
-    uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)

     build_faiss_index,
     retrieve_similar_chunks,
     agentic_rag,
+    tools as global_base_tools,
+    create_vector_search_tool
 )
 from sentence_transformers import SentenceTransformer
+from langchain.memory import ConversationBufferMemory
 # Load environment variables
 dotenv.load_dotenv()
 # Function to save session data
 def save_session(session_id, data):
+    sessions[session_id] = data # Keep non-picklable in memory for active session
     pickle_safe_data = {
         "file_path": data.get("file_path"),
         "file_name": data.get("file_name"),
+        "chunks": data.get("chunks"), # Chunks with metadata (list of dicts)
         "chat_history": data.get("chat_history", [])
+        # FAISS index, embedding model, and LLM model are not pickled, will be reloaded/recreated
     }
     with open(f"{UPLOAD_DIR}/{session_id}_session.pkl", "wb") as f:
         pickle.dump(pickle_safe_data, f)
 # Function to load session data
+def load_session(session_id, model_name="llama3-8b-8192"): # Ensure model_name matches default
     try:
         if session_id in sessions:
+            cached_session = sessions[session_id]
+            # Ensure LLM and potentially other non-pickled parts are up-to-date or loaded
+            if cached_session.get("llm") is None or (hasattr(cached_session["llm"], "model_name") and cached_session["llm"].model_name != model_name):
+                 cached_session["llm"] = model_selection(model_name)
+            if cached_session.get("model") is None: # Embedding model
+                 cached_session["model"] = SentenceTransformer('BAAI/bge-large-en-v1.5')
+            if cached_session.get("index") is None and cached_session.get("chunks"): # FAISS index
+                embeddings, _ = create_embeddings(cached_session["chunks"], cached_session["model"])
+                cached_session["index"] = build_faiss_index(embeddings)
+            return cached_session, True
         file_path_pkl = f"{UPLOAD_DIR}/{session_id}_session.pkl"
         if os.path.exists(file_path_pkl):
             with open(file_path_pkl, "rb") as f:
+                data = pickle.load(f)
             original_pdf_path = data.get("file_path")
             if data.get("chunks") and original_pdf_path and os.path.exists(original_pdf_path):
                 embedding_model_instance = SentenceTransformer('BAAI/bge-large-en-v1.5')
+                # Chunks are already {text: ..., metadata: ...}
                 recreated_embeddings, _ = create_embeddings(data["chunks"], embedding_model_instance)
                 recreated_index = build_faiss_index(recreated_embeddings)
                 recreated_llm = model_selection(model_name)
                 full_session_data = {
                     "file_path": original_pdf_path,
                     "file_name": data.get("file_name"),
+                    "chunks": data.get("chunks"), # chunks_with_metadata
                     "chat_history": data.get("chat_history", []),
                     "model": embedding_model_instance,    # SentenceTransformer model
                     "index": recreated_index,            # FAISS index
                     "llm": recreated_llm                 # LLM
                 }
+                sessions[session_id] = full_session_data
                 return full_session_data, True
             else:
+                print(f"Warning: Session data for {session_id} is incomplete or PDF missing. Cannot reconstruct.")
+                if os.path.exists(file_path_pkl): os.remove(file_path_pkl) # Clean up stale pkl
                 return None, False
+        return None, False
     except Exception as e:
         print(f"Error loading session {session_id}: {str(e)}")
+        print(traceback.format_exc())
         return None, False
 # Function to remove PDF file
 @app.post("/upload-pdf")
 async def upload_pdf(
     file: UploadFile = File(...),
+    model_name: str = Form("llama3-8b-8192") # Default model
 ):
     session_id = str(uuid.uuid4())
     file_path = None
     try:
         file_path = f"{UPLOAD_DIR}/{session_id}_{file.filename}"
         with open(file_path, "wb") as buffer:
             shutil.copyfileobj(file.file, buffer)
+        if not os.getenv("GROQ_API_KEY") and "llama" in model_name: # Llama specific check for Groq
+             raise ValueError("GROQ_API_KEY is not set for Groq Llama models.")
+        if not os.getenv("TAVILY_API_KEY"): # Needed for TavilySearchResults
+            print("Warning: TAVILY_API_KEY is not set. Web search will not function.")
+        documents = process_pdf_file(file_path)
+        # Ensure max_length for chunk_text is appropriate.
+        # The value 1500 might be too large if estimate_tokens is text_len // 4, as it means ~6000 characters.
+        # Let's use a smaller max_length for chunks for better granularity in RAG retrieval.
+        # For `bge-large-en-v1.5` (max sequence length 512 tokens), chunks around 250-400 tokens are often good.
+        # If estimate_tokens is len(text)//4, then max_length of 250 tokens is roughly 1000 characters.
+        # Let's use max_length=256 (tokens) for chunker config, so about 1024 characters.
+        # The chunk_text function uses max_length as character count / 4. So if we want 256 tokens, max_length = 256*4 = 1024
+        # However, the current chunk_text logic is `estimate_tokens(current_chunk + paragraph) <= max_length // 4`.
+        # This means `max_length` is already considered a token limit. So `max_length=256` (tokens) is the target.
+        chunks_with_metadata = chunk_text(documents, max_length=256) # max_length in tokens
+        embedding_model = SentenceTransformer('BAAI/bge-large-en-v1.5')
+        embeddings, _ = create_embeddings(chunks_with_metadata, embedding_model) # Chunks are already with metadata
+        index = build_faiss_index(embeddings)
         llm = model_selection(model_name)
         session_data = {
             "file_path": file_path,
             "file_name": file.filename,
+            "chunks": chunks_with_metadata, # Store chunks with metadata
+            "model": embedding_model,       # SentenceTransformer instance
+            "index": index,                 # FAISS index instance
+            "llm": llm,                     # LLM instance
             "chat_history": []
         }
         save_session(session_id, session_data)
         return {"status": "success", "session_id": session_id, "message": f"Processed {file.filename}"}
     except Exception as e:
         if file_path and os.path.exists(file_path):
             os.remove(file_path)
         error_msg = str(e)
         stack_trace = traceback.format_exc()
+        print(f"Error processing PDF: {error_msg}\nStack trace: {stack_trace}")
         return JSONResponse(
+            status_code=500, # Internal server error for processing issues
+            content={"status": "error", "detail": error_msg, "type": type(e).__name__}
         )
 # Route to chat with the document
 @app.post("/chat")
 async def chat(request: ChatRequest):
     session, found = load_session(request.session_id, model_name=request.model_name)
     if not found:
+        raise HTTPException(status_code=404, detail="Session not found or expired. Please upload a document first.")
     try:
+        # Per-request memory to ensure chat history is correctly loaded for the agent
+        agent_memory = ConversationBufferMemory(memory_key="chat_history", input_key="input", return_messages=True)
         for entry in session.get("chat_history", []):
             agent_memory.chat_memory.add_user_message(entry["user"])
             agent_memory.chat_memory.add_ai_message(entry["assistant"])
+        # Prepare tools for the agent for THIS request
+        current_request_tools = []
+        # 1. Add the document-specific vector search tool
+        if "index" in session and "chunks" in session and "model" in session:
+            vector_search_tool_instance = create_vector_search_tool(
+                faiss_index=session["index"],
+                document_chunks_with_metadata=session["chunks"], # Pass the correct variable
+                embedding_model=session["model"] # This is the SentenceTransformer model
+            )
+            current_request_tools.append(vector_search_tool_instance)
+        else:
+            print(f"Warning: Session {request.session_id} missing data for vector_database_search tool.")
+        # 2. Conditionally add Tavily (web search) tool
+        if request.use_search:
+            if os.getenv("TAVILY_API_KEY"):
+                tavily_tool = next((tool for tool in global_base_tools if tool.name == "tavily_search_results_json"), None)
+                if tavily_tool:
+                    current_request_tools.append(tavily_tool)
+                else: # Should not happen if global_base_tools is defined correctly
+                    print("Warning: Tavily search requested, but tool misconfigured.")
+            else:
+                print("Warning: Tavily search requested, but TAVILY_API_KEY is not set.")
+        # Retrieve initial similar chunks for RAG context (can be empty if no good match)
+        # This context is given to the agent *before* it decides to use tools.
+        # k=5 means we retrieve up to 5 chunks for initial context.
+        # The agent can then use `vector_database_search` to search more if needed.
+        initial_similar_chunks = retrieve_similar_chunks(
             request.query,
             session["index"],
+            session["chunks"], # list of dicts {text:..., metadata:...}
+            session["model"], # SentenceTransformer model
+            k=5 # Number of chunks for initial context
         )
         response = agentic_rag(
             session["llm"],
+            current_request_tools, # Pass the dynamically assembled list of tools
             query=request.query,
+            context_chunks=initial_similar_chunks,
+            Use_Tavily=request.use_search, # Still passed to agentic_rag for potential fine-grained logic, though prompt adapts to tools
             memory=agent_memory
         )
+        response_output = response.get("output", "Sorry, I could not generate a response.")
+        session["chat_history"].append({"user": request.query, "assistant": response_output})
+        save_session(request.session_id, session) # Save updated history and potentially other modified session state
         return {
             "status": "success",
+            "answer": response_output,
+            # Return context that was PRE-FETCHED for the agent, not necessarily all context it might have used via tools
+            "context_used": [{"text": chunk, "score": float(score), "metadata": meta} for chunk, score, meta in initial_similar_chunks]
         }
     except Exception as e:
+        print(f"Error processing chat query: {str(e)}\nTraceback: {traceback.format_exc()}")
         raise HTTPException(status_code=500, detail=f"Error processing query: {str(e)}")
 # Run the application if this file is executed directly
 if __name__ == "__main__":
+    uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)