Spaces:

jatinmehra
/

PDF-Insight-PRO

Running

Jatin Mehra commited on May 25

Commit

ba907cd

1 Parent(s): 33c5afb

Refactor and reorganize codebase for improved maintainability and clarity

- Updated import paths in gen_dataset.py to reflect new module structure.
- Introduced models.py to define Pydantic models for the PDF Insight Beta application.
- Created preprocessing_refactored.py to modularize preprocessing functionality while maintaining backward compatibility.
- Initialized services module with easy imports for all service classes and functions.
- Developed llm_service.py for LLM model management and interaction.
- Implemented rag_service.py for RAG operations, including tool creation and agent management.
- Established session_service.py for high-level session management operations.
- Added test_refactored.py to verify functionality and backward compatibility of refactored code.
- Created utility modules for session management and data persistence.

Files changed (18) hide show

api/__init__.py +34 -0
api/chat_routes.py +109 -0
api/session_routes.py +84 -0
api/upload_routes.py +79 -0
api/utility_routes.py +31 -0
configs/config.py +129 -0
development_scripts/app.py +357 -0
preprocessing.py → development_scripts/preprocessing.py +0 -0
gen_dataset.py +1 -1
models/models.py +114 -0
preprocessing_refactored.py +78 -0
services/__init__.py +39 -0
services/llm_service.py +103 -0
services/rag_service.py +425 -0
services/session_service.py +253 -0
test_refactored.py +176 -0
utils/__init__.py +62 -0
utils/session_utils.py +219 -0

api/__init__.py ADDED Viewed

	@@ -0,0 +1,34 @@

+"""
+API routes module initialization.
+This module provides easy imports for all API route handlers.
+"""
+from .upload_routes import upload_pdf_handler
+from .chat_routes import chat_handler
+from .session_routes import (
+    get_chat_history_handler,
+    clear_history_handler,
+    remove_pdf_handler
+)
+from .utility_routes import (
+    root_handler,
+    get_models_handler
+)
+__all__ = [
+    # Upload routes
+    "upload_pdf_handler",
+    # Chat routes
+    "chat_handler",
+    # Session routes
+    "get_chat_history_handler",
+    "clear_history_handler",
+    "remove_pdf_handler",
+    # Utility routes
+    "root_handler",
+    "get_models_handler"
+]

api/chat_routes.py ADDED Viewed

	@@ -0,0 +1,109 @@

+"""
+Chat API routes.
+This module handles chat and conversation endpoints.
+"""
+import traceback
+from fastapi import HTTPException
+from langchain.memory import ConversationBufferMemory
+from configs.config import Config, ErrorMessages
+from models.models import ChatRequest, ChatResponse
+from services import session_manager, rag_service
+from utils import retrieve_similar_chunks
+async def chat_handler(request: ChatRequest) -> ChatResponse:
+    """
+    Handle chat requests with document context.
+    Args:
+        request: Chat request containing query and session info
+    Returns:
+        Chat response with answer and context
+    Raises:
+        HTTPException: If processing fails
+    """
+    # Validate query
+    if not request.query or not request.query.strip():
+        raise HTTPException(status_code=400, detail=ErrorMessages.EMPTY_QUERY)
+    if len(request.query.strip()) < 3:
+        raise HTTPException(status_code=400, detail=ErrorMessages.QUERY_TOO_SHORT)
+    # Get session data
+    session_data, found = session_manager.get_session(request.session_id, request.model_name)
+    if not found:
+        raise HTTPException(status_code=404, detail=ErrorMessages.SESSION_EXPIRED)
+    try:
+        # Validate session data integrity
+        is_valid, missing_keys = session_manager.validate_session(request.session_id)
+        if not is_valid:
+            raise HTTPException(status_code=500, detail=ErrorMessages.SESSION_INCOMPLETE)
+        # Prepare agent memory with chat history
+        agent_memory = ConversationBufferMemory(
+            memory_key="chat_history",
+            input_key="input",
+            return_messages=True
+        )
+        for entry in session_data.get("chat_history", []):
+            agent_memory.chat_memory.add_user_message(entry["user"])
+            agent_memory.chat_memory.add_ai_message(entry["assistant"])
+        # Retrieve initial similar chunks for context
+        initial_similar_chunks = retrieve_similar_chunks(
+            request.query,
+            session_data["index"],
+            session_data["chunks"],
+            session_data["model"],
+            k=Config.INITIAL_CONTEXT_CHUNKS
+        )
+        # Generate response using RAG service
+        response = rag_service.generate_response(
+            llm=session_data["llm"],
+            query=request.query,
+            context_chunks=initial_similar_chunks,
+            faiss_index=session_data["index"],
+            document_chunks=session_data["chunks"],
+            embedding_model=session_data["model"],
+            memory=agent_memory,
+            use_tavily=request.use_search
+        )
+        response_output = response.get("output", ErrorMessages.RESPONSE_GENERATION_ERROR)
+        # Save chat history
+        session_manager.add_chat_entry(
+            request.session_id,
+            request.query,
+            response_output
+        )
+        return ChatResponse(
+            status="success",
+            answer=response_output,
+            context_used=[
+                {
+                    "text": chunk,
+                    "score": float(score),
+                    "metadata": meta
+                }
+                for chunk, score, meta in initial_similar_chunks
+            ]
+        )
+    except HTTPException:
+        # Re-raise HTTP exceptions as-is
+        raise
+    except Exception as e:
+        raise HTTPException(
+            status_code=500,
+            detail=ErrorMessages.PROCESSING_ERROR.format(error=str(e))
+        )

api/session_routes.py ADDED Viewed

	@@ -0,0 +1,84 @@

+"""
+Session management API routes.
+This module handles session-related endpoints like history and cleanup.
+"""
+from fastapi import HTTPException
+from configs.config import ErrorMessages, SuccessMessages
+from models.models import SessionRequest, ChatHistoryResponse, StatusResponse
+from services import session_manager
+async def get_chat_history_handler(request: SessionRequest) -> ChatHistoryResponse:
+    """
+    Get chat history for a session.
+    Args:
+        request: Session request with session ID
+    Returns:
+        Chat history response
+    Raises:
+        HTTPException: If session not found
+    """
+    session_data, found = session_manager.get_session(request.session_id)
+    if not found:
+        raise HTTPException(status_code=404, detail=ErrorMessages.SESSION_NOT_FOUND)
+    return ChatHistoryResponse(
+        status="success",
+        history=session_data.get("chat_history", [])
+    )
+async def clear_history_handler(request: SessionRequest) -> StatusResponse:
+    """
+    Clear chat history for a session.
+    Args:
+        request: Session request with session ID
+    Returns:
+        Status response
+    Raises:
+        HTTPException: If session not found
+    """
+    success = session_manager.clear_chat_history(request.session_id)
+    if not success:
+        raise HTTPException(status_code=404, detail=ErrorMessages.SESSION_NOT_FOUND)
+    return StatusResponse(
+        status="success",
+        message=SuccessMessages.CHAT_HISTORY_CLEARED
+    )
+async def remove_pdf_handler(request: SessionRequest) -> StatusResponse:
+    """
+    Remove PDF and session data.
+    Args:
+        request: Session request with session ID
+    Returns:
+        Status response
+    Raises:
+        HTTPException: If session not found or removal failed
+    """
+    success = session_manager.remove_session(request.session_id)
+    if success:
+        return StatusResponse(
+            status="success",
+            message=SuccessMessages.PDF_REMOVED
+        )
+    else:
+        raise HTTPException(
+            status_code=404,
+            detail=ErrorMessages.SESSION_REMOVAL_FAILED
+        )

api/upload_routes.py ADDED Viewed

	@@ -0,0 +1,79 @@

+"""
+File upload API routes.
+This module handles PDF file upload and processing endpoints.
+"""
+import os
+import shutil
+import traceback
+import uuid
+from fastapi import UploadFile, File, Form, HTTPException
+from fastapi.responses import JSONResponse
+from configs.config import Config, ErrorMessages, SuccessMessages
+from models.models import UploadResponse
+from services import session_manager, validate_api_keys
+from utils import process_pdf_file, chunk_text, create_upload_file_path
+async def upload_pdf_handler(
+    file: UploadFile = File(...),
+    model_name: str = Form(Config.DEFAULT_MODEL)
+) -> UploadResponse:
+    """
+    Handle PDF file upload and processing.
+    Args:
+        file: Uploaded PDF file
+        model_name: LLM model name to use
+    Returns:
+        Upload response with session ID
+    Raises:
+        HTTPException: If processing fails
+    """
+    session_id = str(uuid.uuid4())
+    file_path = None
+    try:
+        # Validate API keys
+        validate_api_keys(model_name, use_search=False)
+        # Save uploaded file
+        file_path = create_upload_file_path(session_id, file.filename)
+        with open(file_path, "wb") as buffer:
+            shutil.copyfileobj(file.file, buffer)
+        # Process PDF file
+        documents = process_pdf_file(file_path)
+        chunks_with_metadata = chunk_text(documents, max_length=Config.DEFAULT_CHUNK_SIZE)
+        # Create session
+        session_id = session_manager.create_session(
+            file_path=file_path,
+            file_name=file.filename,
+            chunks_with_metadata=chunks_with_metadata,
+            model_name=model_name
+        )
+        return UploadResponse(
+            status="success",
+            session_id=session_id,
+            message=SuccessMessages.PDF_PROCESSED.format(filename=file.filename)
+        )
+    except Exception as e:
+        # Clean up file on error
+        if file_path and os.path.exists(file_path):
+            os.remove(file_path)
+        error_msg = str(e)
+        stack_trace = traceback.format_exc()
+        print(f"Error processing PDF: {error_msg}\nStack trace: {stack_trace}")
+        raise HTTPException(
+            status_code=500,
+            detail=ErrorMessages.PDF_PROCESSING_ERROR.format(error=error_msg)
+        )

api/utility_routes.py ADDED Viewed

	@@ -0,0 +1,31 @@

+"""
+Utility API routes.
+This module handles utility endpoints like model listing and health checks.
+"""
+from fastapi.responses import RedirectResponse
+from models.models import ModelsResponse
+from services import get_available_models
+async def root_handler():
+    """
+    Root endpoint that redirects to the main application.
+    Returns:
+        Redirect response to static index.html
+    """
+    return RedirectResponse(url="/static/index.html")
+async def get_models_handler() -> ModelsResponse:
+    """
+    Get list of available models.
+    Returns:
+        Models response with available model configurations
+    """
+    models = get_available_models()
+    return ModelsResponse(models=models)

configs/config.py ADDED Viewed

	@@ -0,0 +1,129 @@

+"""
+Configuration module for PDF Insight Beta application.
+This module centralizes all configuration settings, constants, and environment variables.
+"""
+import os
+from typing import List, Dict, Any
+import dotenv
+# Load environment variables
+dotenv.load_dotenv()
+class Config:
+    """Application configuration class."""
+    # API Configuration
+    GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
+    TAVILY_API_KEY: str = os.getenv("TAVILY_API_KEY", "")
+    # Application Settings
+    UPLOAD_DIR: str = "uploads"
+    MAX_FILE_SIZE: int = 50 * 1024 * 1024  # 50MB
+    # Model Configuration
+    DEFAULT_MODEL: str = "llama3-8b-8192"
+    EMBEDDING_MODEL: str = "BAAI/bge-large-en-v1.5"
+    # Text Processing Settings
+    DEFAULT_CHUNK_SIZE: int = 1000
+    MIN_CHUNK_LENGTH: int = 20
+    MIN_PARAGRAPH_LENGTH: int = 10
+    # RAG Configuration
+    DEFAULT_K_CHUNKS: int = 10
+    INITIAL_CONTEXT_CHUNKS: int = 5
+    MAX_CONTEXT_TOKENS: int = 7000
+    SIMILARITY_THRESHOLD: float = 1.5
+    # LLM Settings
+    LLM_TEMPERATURE: float = 0.1
+    MAX_TOKENS: int = 4500
+    # FAISS Index Configuration
+    FAISS_NEIGHBORS: int = 32
+    FAISS_EF_CONSTRUCTION: int = 200
+    FAISS_EF_SEARCH: int = 50
+    # Agent Configuration
+    AGENT_MAX_ITERATIONS: int = 2
+    AGENT_VERBOSE: bool = False
+    # Tavily Search Configuration
+    TAVILY_MAX_RESULTS: int = 5
+    TAVILY_SEARCH_DEPTH: str = "advanced"
+    TAVILY_INCLUDE_ANSWER: bool = True
+    TAVILY_INCLUDE_RAW_CONTENT: bool = False
+    # CORS Configuration
+    CORS_ORIGINS: List[str] = ["*"]
+    CORS_CREDENTIALS: bool = True
+    CORS_METHODS: List[str] = ["*"]
+    CORS_HEADERS: List[str] = ["*"]
+class ModelConfig:
+    """Model configuration and metadata."""
+    AVAILABLE_MODELS: List[Dict[str, str]] = [
+        {"id": "meta-llama/llama-4-scout-17b-16e-instruct", "name": "Llama 4 Scout 17B"},
+        {"id": "llama-3.1-8b-instant", "name": "Llama 3.1 8B Instant"},
+        {"id": "llama-3.3-70b-versatile", "name": "Llama 3.3 70b Versatile"},
+    ]
+    @classmethod
+    def get_model_ids(cls) -> List[str]:
+        """Get list of available model IDs."""
+        return [model["id"] for model in cls.AVAILABLE_MODELS]
+    @classmethod
+    def is_valid_model(cls, model_id: str) -> bool:
+        """Check if a model ID is valid."""
+        return model_id in cls.get_model_ids()
+class ErrorMessages:
+    """Centralized error messages."""
+    # Validation Errors
+    EMPTY_QUERY = "Query cannot be empty"
+    QUERY_TOO_SHORT = "Query must be at least 3 characters long"
+    # Session Errors
+    SESSION_NOT_FOUND = "Session not found"
+    SESSION_EXPIRED = "Session not found or expired. Please upload a document first."
+    SESSION_INCOMPLETE = "Session data is incomplete. Please upload the document again."
+    SESSION_REMOVAL_FAILED = "Session not found or could not be removed"
+    # File Errors
+    FILE_NOT_FOUND = "The file {file_path} does not exist."
+    PDF_PROCESSING_ERROR = "Error processing PDF: {error}"
+    # API Key Errors
+    GROQ_API_KEY_MISSING = "GROQ_API_KEY is not set for Groq Llama models."
+    TAVILY_API_KEY_MISSING = "TAVILY_API_KEY is not set. Web search will not function."
+    # Processing Errors
+    PROCESSING_ERROR = "Error processing query: {error}"
+    RESPONSE_GENERATION_ERROR = "Sorry, I could not generate a response."
+class SuccessMessages:
+    """Centralized success messages."""
+    PDF_PROCESSED = "Processed {filename}"
+    PDF_REMOVED = "PDF file and session removed successfully"
+    CHAT_HISTORY_CLEARED = "Chat history cleared"
+# Initialize directories
+def initialize_directories():
+    """Create necessary directories if they don't exist."""
+    if not os.path.exists(Config.UPLOAD_DIR):
+        os.makedirs(Config.UPLOAD_DIR)
+# Initialize on import
+initialize_directories()

development_scripts/app.py ADDED Viewed

	@@ -0,0 +1,357 @@

+import os
+import dotenv
+import pickle
+import uuid
+import shutil
+import traceback
+from fastapi import FastAPI, UploadFile, File, Form, HTTPException
+from fastapi.responses import JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+from pydantic import BaseModel
+import uvicorn
+from development_scripts.preprocessing import (
+    model_selection,
+    process_pdf_file,
+    chunk_text,
+    create_embeddings,
+    build_faiss_index,
+    retrieve_similar_chunks,
+    agentic_rag,
+    tools as global_base_tools,
+    create_vector_search_tool
+)
+from sentence_transformers import SentenceTransformer
+from langchain.memory import ConversationBufferMemory
+# Load environment variables
+dotenv.load_dotenv()
+# Initialize FastAPI app
+app = FastAPI(title="PDF Insight Beta", description="Agentic RAG for PDF documents")
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Create upload directory if it doesn't exist
+UPLOAD_DIR = "uploads"
+if not os.path.exists(UPLOAD_DIR):
+    os.makedirs(UPLOAD_DIR)
+# Store active sessions
+sessions = {}
+# Define model for chat request
+class ChatRequest(BaseModel):
+    session_id: str
+    query: str
+    use_search: bool = False
+    model_name: str = "meta-llama/llama-4-scout-17b-16e-instruct"
+class SessionRequest(BaseModel):
+    session_id: str
+# Function to save session data
+def save_session(session_id, data):
+    sessions[session_id] = data # Keep non-picklable in memory for active session
+    pickle_safe_data = {
+        "file_path": data.get("file_path"),
+        "file_name": data.get("file_name"),
+        "chunks": data.get("chunks"), # Chunks with metadata (list of dicts)
+        "chat_history": data.get("chat_history", [])
+        # FAISS index, embedding model, and LLM model are not pickled, will be reloaded/recreated
+    }
+    with open(f"{UPLOAD_DIR}/{session_id}_session.pkl", "wb") as f:
+        pickle.dump(pickle_safe_data, f)
+# Function to load session data
+def load_session(session_id, model_name="llama3-8b-8192"): # Ensure model_name matches default
+    try:
+        if session_id in sessions:
+            cached_session = sessions[session_id]
+            # Ensure LLM and potentially other non-pickled parts are up-to-date or loaded
+            if cached_session.get("llm") is None or (hasattr(cached_session["llm"], "model_name") and cached_session["llm"].model_name != model_name):
+                 cached_session["llm"] = model_selection(model_name)
+            if cached_session.get("model") is None: # Embedding model
+                 cached_session["model"] = SentenceTransformer('BAAI/bge-large-en-v1.5')
+            if cached_session.get("index") is None and cached_session.get("chunks"): # FAISS index
+                embeddings, _ = create_embeddings(cached_session["chunks"], cached_session["model"])
+                cached_session["index"] = build_faiss_index(embeddings)
+            return cached_session, True
+        file_path_pkl = f"{UPLOAD_DIR}/{session_id}_session.pkl"
+        if os.path.exists(file_path_pkl):
+            with open(file_path_pkl, "rb") as f:
+                data = pickle.load(f)
+            original_pdf_path = data.get("file_path")
+            if data.get("chunks") and original_pdf_path and os.path.exists(original_pdf_path):
+                embedding_model_instance = SentenceTransformer('BAAI/bge-large-en-v1.5')
+                # Chunks are already {text: ..., metadata: ...}
+                recreated_embeddings, _ = create_embeddings(data["chunks"], embedding_model_instance)
+                recreated_index = build_faiss_index(recreated_embeddings)
+                recreated_llm = model_selection(model_name)
+                full_session_data = {
+                    "file_path": original_pdf_path,
+                    "file_name": data.get("file_name"),
+                    "chunks": data.get("chunks"), # chunks_with_metadata
+                    "chat_history": data.get("chat_history", []),
+                    "model": embedding_model_instance,    # SentenceTransformer model
+                    "index": recreated_index,            # FAISS index
+                    "llm": recreated_llm                 # LLM
+                }
+                sessions[session_id] = full_session_data
+                return full_session_data, True
+            else:
+                print(f"Warning: Session data for {session_id} is incomplete or PDF missing. Cannot reconstruct.")
+                if os.path.exists(file_path_pkl): os.remove(file_path_pkl) # Clean up stale pkl
+                return None, False
+        return None, False
+    except Exception as e:
+        print(f"Error loading session {session_id}: {str(e)}")
+        print(traceback.format_exc())
+        return None, False
+# Function to remove PDF file
+def remove_pdf_file(session_id):
+    try:
+        # Check if the session exists
+        session_path = f"{UPLOAD_DIR}/{session_id}_session.pkl"
+        if os.path.exists(session_path):
+            # Load session data
+            with open(session_path, "rb") as f:
+                data = pickle.load(f)
+            # Delete PDF file if it exists
+            if data.get("file_path") and os.path.exists(data["file_path"]):
+                os.remove(data["file_path"])
+            # Remove session file
+            os.remove(session_path)
+        # Remove from memory if exists
+        if session_id in sessions:
+            del sessions[session_id]
+        return True
+    except Exception as e:
+        print(f"Error removing PDF file: {str(e)}")
+        return False
+# Mount static files (we'll create these later)
+app.mount("/static", StaticFiles(directory="static"), name="static")
+# Route for the home page
+@app.get("/")
+async def read_root():
+    from fastapi.responses import RedirectResponse
+    return RedirectResponse(url="/static/index.html")
+# Route to upload a PDF file
+@app.post("/upload-pdf")
+async def upload_pdf(
+    file: UploadFile = File(...),
+    model_name: str = Form("llama3-8b-8192") # Default model
+):
+    session_id = str(uuid.uuid4())
+    file_path = None
+    try:
+        file_path = f"{UPLOAD_DIR}/{session_id}_{file.filename}"
+        with open(file_path, "wb") as buffer:
+            shutil.copyfileobj(file.file, buffer)
+        if not os.getenv("GROQ_API_KEY") and "llama" in model_name: # Llama specific check for Groq
+             raise ValueError("GROQ_API_KEY is not set for Groq Llama models.")
+        if not os.getenv("TAVILY_API_KEY"): # Needed for TavilySearchResults
+            print("Warning: TAVILY_API_KEY is not set. Web search will not function.")
+        documents = process_pdf_file(file_path)
+        chunks_with_metadata = chunk_text(documents, max_length=1000) # Increased from 256 to 1000 tokens for better context
+        embedding_model = SentenceTransformer('BAAI/bge-large-en-v1.5')
+        embeddings, _ = create_embeddings(chunks_with_metadata, embedding_model) # Chunks are already with metadata
+        index = build_faiss_index(embeddings)
+        llm = model_selection(model_name)
+        session_data = {
+            "file_path": file_path,
+            "file_name": file.filename,
+            "chunks": chunks_with_metadata, # Store chunks with metadata
+            "model": embedding_model,       # SentenceTransformer instance
+            "index": index,                 # FAISS index instance
+            "llm": llm,                     # LLM instance
+            "chat_history": []
+        }
+        save_session(session_id, session_data)
+        return {"status": "success", "session_id": session_id, "message": f"Processed {file.filename}"}
+    except Exception as e:
+        if file_path and os.path.exists(file_path):
+            os.remove(file_path)
+        error_msg = str(e)
+        stack_trace = traceback.format_exc()
+        print(f"Error processing PDF: {error_msg}\nStack trace: {stack_trace}")
+        return JSONResponse(
+            status_code=500, # Internal server error for processing issues
+            content={"status": "error", "detail": error_msg, "type": type(e).__name__}
+        )
+# Route to chat with the document
+@app.post("/chat")
+async def chat(request: ChatRequest):
+    # Validate query
+    if not request.query or not request.query.strip():
+        raise HTTPException(status_code=400, detail="Query cannot be empty")
+    if len(request.query.strip()) < 3:
+        raise HTTPException(status_code=400, detail="Query must be at least 3 characters long")
+    session, found = load_session(request.session_id, model_name=request.model_name)
+    if not found:
+        raise HTTPException(status_code=404, detail="Session not found or expired. Please upload a document first.")
+    try:
+        # Validate session data integrity
+        required_keys = ["index", "chunks", "model", "llm"]
+        missing_keys = [key for key in required_keys if key not in session]
+        if missing_keys:
+            print(f"Warning: Session {request.session_id} missing required data: {missing_keys}")
+            raise HTTPException(status_code=500, detail="Session data is incomplete. Please upload the document again.")
+        # Per-request memory to ensure chat history is correctly loaded for the agent
+        agent_memory = ConversationBufferMemory(memory_key="chat_history", input_key="input", return_messages=True)
+        for entry in session.get("chat_history", []):
+            agent_memory.chat_memory.add_user_message(entry["user"])
+            agent_memory.chat_memory.add_ai_message(entry["assistant"])
+        # Prepare tools for the agent for THIS request
+        current_request_tools = []
+        # 1. Add the document-specific vector search tool
+        vector_search_tool_instance = create_vector_search_tool(
+            faiss_index=session["index"],
+            document_chunks_with_metadata=session["chunks"], # Pass the correct variable
+            embedding_model=session["model"], # This is the SentenceTransformer model
+            max_chunk_length=1000,
+            k=10
+        )
+        current_request_tools.append(vector_search_tool_instance)
+        # 2. Conditionally add Tavily (web search) tool
+        if request.use_search:
+            if os.getenv("TAVILY_API_KEY"):
+                tavily_tool = next((tool for tool in global_base_tools if tool.name == "tavily_search_results_json"), None)
+                if tavily_tool:
+                    current_request_tools.append(tavily_tool)
+                else: # Should not happen if global_base_tools is defined correctly
+                    print("Warning: Tavily search requested, but tool misconfigured.")
+            else:
+                print("Warning: Tavily search requested, but TAVILY_API_KEY is not set.")
+        # Retrieve initial similar chunks for RAG context (can be empty if no good match)
+        # This context is given to the agent *before* it decides to use tools.
+        # k=5 means we retrieve up to 5 chunks for initial context.
+        # The agent can then use `vector_database_search` to search more if needed.
+        initial_similar_chunks = retrieve_similar_chunks(
+            request.query,
+            session["index"],
+            session["chunks"], # list of dicts {text:..., metadata:...}
+            session["model"], # SentenceTransformer model
+            k=5 # Number of chunks for initial context
+        )
+        print(f"Query: '{request.query}' - Found {len(initial_similar_chunks)} initial chunks")
+        if initial_similar_chunks:
+            print(f"Best chunk score: {initial_similar_chunks[0][1]:.4f}")
+        response = agentic_rag(
+            session["llm"],
+            current_request_tools, # Pass the dynamically assembled list of tools
+            query=request.query,
+            context_chunks=initial_similar_chunks,
+            Use_Tavily=request.use_search, # Still passed to agentic_rag for potential fine-grained logic, though prompt adapts to tools
+            memory=agent_memory
+        )
+        response_output = response.get("output", "Sorry, I could not generate a response.")
+        print(f"Generated response length: {len(response_output)} characters")
+        session["chat_history"].append({"user": request.query, "assistant": response_output})
+        save_session(request.session_id, session) # Save updated history and potentially other modified session state
+        return {
+            "status": "success",
+            "answer": response_output,
+            # Return context that was PRE-FETCHED for the agent, not necessarily all context it might have used via tools
+            "context_used": [{"text": chunk, "score": float(score), "metadata": meta} for chunk, score, meta in initial_similar_chunks]
+        }
+    except Exception as e:
+        print(f"Error processing chat query: {str(e)}\nTraceback: {traceback.format_exc()}")
+        raise HTTPException(status_code=500, detail=f"Error processing query: {str(e)}")
+# Route to get chat history
+@app.post("/chat-history")
+async def get_chat_history(request: SessionRequest):
+    # Try to load session if not in memory
+    session, found = load_session(request.session_id)
+    if not found:
+        raise HTTPException(status_code=404, detail="Session not found")
+    return {
+        "status": "success",
+        "history": session.get("chat_history", [])
+    }
+# Route to clear chat history
+@app.post("/clear-history")
+async def clear_history(request: SessionRequest):
+    # Try to load session if not in memory
+    session, found = load_session(request.session_id)
+    if not found:
+        raise HTTPException(status_code=404, detail="Session not found")
+    session["chat_history"] = []
+    save_session(request.session_id, session)
+    return {"status": "success", "message": "Chat history cleared"}
+# Route to remove PDF from session
+@app.post("/remove-pdf")
+async def remove_pdf(request: SessionRequest):
+    success = remove_pdf_file(request.session_id)
+    if success:
+        return {"status": "success", "message": "PDF file and session removed successfully"}
+    else:
+        raise HTTPException(status_code=404, detail="Session not found or could not be removed")
+# Route to list available models
+@app.get("/models")
+async def get_models():
+    # You can expand this list as needed
+    models = [
+        {"id": "meta-llama/llama-4-scout-17b-16e-instruct", "name": "Llama 4 Scout 17B"},
+        {"id": "llama-3.1-8b-instant", "name": "Llama 3.1 8B Instant"},
+        {"id": "llama-3.3-70b-versatile", "name": "Llama 3.3 70B Versatile"},
+    ]
+    return {"models": models}
+# Run the application if this file is executed directly
+if __name__ == "__main__":
+    uvicorn.run("app:app", host="0.0.0.0", port=8000, reload=True)

preprocessing.py → development_scripts/preprocessing.py RENAMED Viewed

File without changes

gen_dataset.py CHANGED Viewed

@@ -4,7 +4,7 @@ ds = load_dataset("neural-bridge/rag-dataset-12000")
 # Test the RAG system with DS dataset
 from sentence_transformers import SentenceTransformer
-from preprocessing import model_selection, create_embeddings, build_faiss_index, retrieve_similar_chunks, agentic_rag
 import dotenv
 from langchain_community.tools.tavily_search import TavilySearchResults
 import json

 # Test the RAG system with DS dataset
 from sentence_transformers import SentenceTransformer
+from development_scripts.preprocessing import model_selection, create_embeddings, build_faiss_index, retrieve_similar_chunks, agentic_rag
 import dotenv
 from langchain_community.tools.tavily_search import TavilySearchResults
 import json

models/models.py ADDED Viewed

	@@ -0,0 +1,114 @@

+"""
+Pydantic models and data structures for PDF Insight Beta application.
+This module defines all the data models used throughout the application.
+"""
+from typing import List, Dict, Any, Optional
+from pydantic import BaseModel, Field
+class ChatRequest(BaseModel):
+    """Request model for chat endpoint."""
+    session_id: str = Field(..., description="Session identifier")
+    query: str = Field(..., description="User query")
+    use_search: bool = Field(default=False, description="Whether to use web search")
+    model_name: str = Field(
+        default="meta-llama/llama-4-scout-17b-16e-instruct",
+        description="LLM model to use"
+    )
+class SessionRequest(BaseModel):
+    """Request model for session-related endpoints."""
+    session_id: str = Field(..., description="Session identifier")
+class UploadResponse(BaseModel):
+    """Response model for PDF upload."""
+    status: str
+    session_id: str
+    message: str
+class ChatResponse(BaseModel):
+    """Response model for chat endpoint."""
+    status: str
+    answer: str
+    context_used: List[Dict[str, Any]]
+class ChatHistoryResponse(BaseModel):
+    """Response model for chat history endpoint."""
+    status: str
+    history: List[Dict[str, str]]
+class StatusResponse(BaseModel):
+    """Generic status response model."""
+    status: str
+    message: str
+class ErrorResponse(BaseModel):
+    """Error response model."""
+    status: str
+    detail: str
+    type: Optional[str] = None
+class ModelInfo(BaseModel):
+    """Model information."""
+    id: str
+    name: str
+class ModelsResponse(BaseModel):
+    """Response model for models endpoint."""
+    models: List[ModelInfo]
+class ChunkMetadata(BaseModel):
+    """Metadata for document chunks."""
+    source: Optional[str] = None
+    page: Optional[int] = None
+    class Config:
+        extra = "allow"  # Allow additional metadata fields
+class DocumentChunk(BaseModel):
+    """Document chunk with text and metadata."""
+    text: str
+    metadata: ChunkMetadata
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert to dictionary format used in processing."""
+        return {
+            "text": self.text,
+            "metadata": self.metadata.dict()
+        }
+class SessionData(BaseModel):
+    """Session data structure."""
+    file_path: str
+    file_name: str
+    chunks: List[Dict[str, Any]]  # List of chunk dictionaries
+    chat_history: List[Dict[str, str]] = Field(default_factory=list)
+    class Config:
+        arbitrary_types_allowed = True  # Allow non-Pydantic types like FAISS index
+class ChatHistoryEntry(BaseModel):
+    """Single chat history entry."""
+    user: str
+    assistant: str
+class ContextChunk(BaseModel):
+    """Context chunk with similarity score."""
+    text: str
+    score: float
+    metadata: Dict[str, Any]

preprocessing_refactored.py ADDED Viewed

	@@ -0,0 +1,78 @@

+"""
+Refactored preprocessing module for PDF Insight Beta.
+This module provides the core preprocessing functionality with improved organization.
+The original logic has been preserved while breaking it into more maintainable components.
+This module maintains backward compatibility with the original preprocessing.py interface.
+"""
+# Re-export everything from the new modular structure for backward compatibility
+from configs.config import Config
+from services import (
+    create_llm_model as model_selection,
+    create_tavily_search_tool,
+    rag_service
+)
+from utils import (
+    process_pdf_file,
+    chunk_text,
+    create_embeddings,
+    build_faiss_index,
+    retrieve_similar_chunks,
+    estimate_tokens
+)
+# Create global tools for backward compatibility
+def create_global_tools():
+    """Create global tools list for backward compatibility."""
+    tavily_tool = create_tavily_search_tool()
+    return [tavily_tool] if tavily_tool else []
+# Global tools instance (for backward compatibility)
+tools = create_global_tools()
+# Alias for the main RAG function to maintain original interface
+def agentic_rag(llm, agent_specific_tools, query, context_chunks, memory, Use_Tavily=False):
+    """
+    Main RAG function with original interface for backward compatibility.
+    Args:
+        llm: Language model instance
+        agent_specific_tools: List of tools for the agent
+        query: User query
+        context_chunks: Context chunks from retrieval
+        memory: Conversation memory
+        Use_Tavily: Whether to use web search
+    Returns:
+        Dictionary with 'output' key containing the response
+    """
+    # Convert parameters to work with new RAG service
+    return rag_service.generate_response(
+        llm=llm,
+        query=query,
+        context_chunks=context_chunks,
+        faiss_index=None,  # Will be handled internally by tools
+        document_chunks=[],  # Will be handled internally by tools
+        embedding_model=None,  # Will be handled internally by tools
+        memory=memory,
+        use_tavily=Use_Tavily
+    )
+# Re-export the vector search tool creator for backward compatibility
+from services.rag_service import create_vector_search_tool
+# Maintain all original exports
+__all__ = [
+    'model_selection',
+    'process_pdf_file',
+    'chunk_text',
+    'create_embeddings',
+    'build_faiss_index',
+    'retrieve_similar_chunks',
+    'agentic_rag',
+    'tools',
+    'create_vector_search_tool',
+    'estimate_tokens'
+]

services/__init__.py ADDED Viewed

	@@ -0,0 +1,39 @@

+"""
+Services module initialization.
+This module provides easy imports for all service classes and functions.
+"""
+from .llm_service import (
+    create_llm_model,
+    create_tavily_search_tool,
+    validate_api_keys,
+    get_available_models,
+    is_model_supported
+)
+from .session_service import SessionManager, session_manager
+from .rag_service import (
+    create_vector_search_tool,
+    RAGService,
+    rag_service
+)
+__all__ = [
+    # LLM service
+    "create_llm_model",
+    "create_tavily_search_tool",
+    "validate_api_keys",
+    "get_available_models",
+    "is_model_supported",
+    # Session service
+    "SessionManager",
+    "session_manager",
+    # RAG service
+    "create_vector_search_tool",
+    "RAGService",
+    "rag_service"
+]

services/llm_service.py ADDED Viewed

	@@ -0,0 +1,103 @@

+"""
+LLM service for model management and interaction.
+This module provides services for LLM model creation and management.
+"""
+import os
+from typing import Optional
+from langchain_groq import ChatGroq
+from langchain_community.tools.tavily_search import TavilySearchResults
+from configs.config import Config, ErrorMessages
+def create_llm_model(model_name: str) -> ChatGroq:
+    """
+    Create and configure an LLM model.
+    Args:
+        model_name: Name of the model to create
+    Returns:
+        Configured ChatGroq instance
+    Raises:
+        ValueError: If API key is missing for the model
+    """
+    if not os.getenv("GROQ_API_KEY") and "llama" in model_name:
+        raise ValueError(ErrorMessages.GROQ_API_KEY_MISSING)
+    llm = ChatGroq(
+        model=model_name,
+        api_key=os.getenv("GROQ_API_KEY"),
+        temperature=Config.LLM_TEMPERATURE,
+        max_tokens=Config.MAX_TOKENS
+    )
+    return llm
+def create_tavily_search_tool() -> Optional[TavilySearchResults]:
+    """
+    Create Tavily search tool with error handling.
+    Returns:
+        TavilySearchResults instance or None if creation fails
+    """
+    try:
+        if not os.getenv("TAVILY_API_KEY"):
+            print(f"Warning: {ErrorMessages.TAVILY_API_KEY_MISSING}")
+            return None
+        return TavilySearchResults(
+            max_results=Config.TAVILY_MAX_RESULTS,
+            search_depth=Config.TAVILY_SEARCH_DEPTH,
+            include_answer=Config.TAVILY_INCLUDE_ANSWER,
+            include_raw_content=Config.TAVILY_INCLUDE_RAW_CONTENT
+        )
+    except Exception as e:
+        print(f"Warning: Could not create Tavily tool: {e}")
+        return None
+def validate_api_keys(model_name: str, use_search: bool = False) -> None:
+    """
+    Validate that required API keys are available.
+    Args:
+        model_name: LLM model name
+        use_search: Whether web search is requested
+    Raises:
+        ValueError: If required API keys are missing
+    """
+    if not os.getenv("GROQ_API_KEY") and "llama" in model_name:
+        raise ValueError(ErrorMessages.GROQ_API_KEY_MISSING)
+    if use_search and not os.getenv("TAVILY_API_KEY"):
+        print(f"Warning: {ErrorMessages.TAVILY_API_KEY_MISSING}")
+def get_available_models() -> list:
+    """
+    Get list of available models.
+    Returns:
+        List of available model configurations
+    """
+    from configs.config import ModelConfig
+    return ModelConfig.AVAILABLE_MODELS
+def is_model_supported(model_name: str) -> bool:
+    """
+    Check if a model is supported.
+    Args:
+        model_name: Model name to check
+    Returns:
+        True if model is supported, False otherwise
+    """
+    from configs.config import ModelConfig
+    return ModelConfig.is_valid_model(model_name)

services/rag_service.py ADDED Viewed

	@@ -0,0 +1,425 @@

+"""
+RAG (Retrieval Augmented Generation) service.
+This module provides the RAG implementation with tool creation and agent management.
+"""
+import traceback
+from typing import List, Dict, Any, Optional, Tuple
+from langchain.tools import tool
+from langchain.agents import AgentExecutor, create_tool_calling_agent
+from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
+from langchain.memory import ConversationBufferMemory
+from sentence_transformers import SentenceTransformer
+import faiss
+from configs.config import Config
+from utils import (
+    retrieve_similar_chunks,
+    filter_relevant_chunks,
+    prepare_context_from_chunks
+)
+from services.llm_service import create_tavily_search_tool
+def create_vector_search_tool(
+    faiss_index: faiss.IndexHNSWFlat,
+    document_chunks_with_metadata: List[Dict[str, Any]],
+    embedding_model: SentenceTransformer,
+    k: int = None,
+    max_chunk_length: int = None
+):
+    """
+    Create a vector search tool for document retrieval.
+    Args:
+        faiss_index: FAISS index for similarity search
+        document_chunks_with_metadata: List of document chunks
+        embedding_model: SentenceTransformer model
+        k: Number of chunks to retrieve
+        max_chunk_length: Maximum chunk length
+    Returns:
+        LangChain tool for vector search
+    """
+    if k is None:
+        k = Config.DEFAULT_K_CHUNKS // 3  # Use fewer chunks for tool
+    if max_chunk_length is None:
+        max_chunk_length = Config.DEFAULT_CHUNK_SIZE
+    @tool
+    def vector_database_search(query: str) -> str:
+        """Search the uploaded PDF document for information related to the query.
+        Args:
+            query: The search query string to find relevant information in the document.
+        Returns:
+            A string containing relevant information found in the document.
+        """
+        # Handle very short or empty queries
+        if not query or len(query.strip()) < 3:
+            return "Please provide a more specific search query with at least 3 characters."
+        try:
+            # Retrieve similar chunks using the provided session-specific components
+            similar_chunks_data = retrieve_similar_chunks(
+                query,
+                faiss_index,
+                document_chunks_with_metadata,
+                embedding_model,
+                k=k,
+                max_chunk_length=max_chunk_length
+            )
+            # Format the response
+            if not similar_chunks_data:
+                return "No relevant information found in the document for that query. Please try rephrasing your question or using different keywords."
+            # Filter out chunks with very high distance (low similarity)
+            filtered_chunks = filter_relevant_chunks(similar_chunks_data)
+            if not filtered_chunks:
+                return "No sufficiently relevant information found in the document for that query. Please try rephrasing your question or using different keywords."
+            context = "\n\n---\n\n".join([chunk_text for chunk_text, _, _ in filtered_chunks])
+            return f"The following information was found in the document regarding '{query}':\n{context}"
+        except Exception as e:
+            print(f"Error in vector search tool: {e}")
+            return f"Error searching the document: {str(e)}"
+    return vector_database_search
+class RAGService:
+    """Service for RAG operations."""
+    def __init__(self):
+        """Initialize RAG service."""
+        self.tavily_tool = create_tavily_search_tool()
+    def create_agent_tools(
+        self,
+        faiss_index: faiss.IndexHNSWFlat,
+        document_chunks: List[Dict[str, Any]],
+        embedding_model: SentenceTransformer,
+        use_web_search: bool = False
+    ) -> List:
+        """
+        Create tools for the RAG agent.
+        Args:
+            faiss_index: FAISS index
+            document_chunks: Document chunks
+            embedding_model: Embedding model
+            use_web_search: Whether to include web search tool
+        Returns:
+            List of tools for the agent
+        """
+        tools = []
+        # Add vector search tool
+        vector_tool = create_vector_search_tool(
+            faiss_index=faiss_index,
+            document_chunks_with_metadata=document_chunks,
+            embedding_model=embedding_model,
+            max_chunk_length=Config.DEFAULT_CHUNK_SIZE,
+            k=10
+        )
+        tools.append(vector_tool)
+        # Add web search tool if requested and available
+        if use_web_search and self.tavily_tool:
+            tools.append(self.tavily_tool)
+        return tools
+    def create_agent_prompt(self, has_document_search: bool, has_web_search: bool) -> ChatPromptTemplate:
+        """
+        Create prompt template for the agent.
+        Args:
+            has_document_search: Whether document search is available
+            has_web_search: Whether web search is available
+        Returns:
+            ChatPromptTemplate for the agent
+        """
+        # Build tool instructions dynamically
+        tool_instructions = ""
+        if has_document_search:
+            tool_instructions += "Use vector_database_search to find information in the uploaded document. "
+        if has_web_search:
+            tool_instructions += "Use tavily_search_results_json for web searches when document search is insufficient. "
+        if not tool_instructions:
+            tool_instructions = "Answer based on the provided context only. "
+        return ChatPromptTemplate.from_messages([
+            ("system", f"""You are a helpful AI assistant that answers questions about documents.
+Context: {{context}}
+Tools available: {tool_instructions}
+Instructions:
+- Use the provided context first
+- If context is insufficient, use available tools to search for more information
+- Provide clear, helpful answers
+- If you cannot find an answer, say so clearly"""),
+            ("human", "{input}"),
+            MessagesPlaceholder(variable_name="chat_history"),
+            MessagesPlaceholder(variable_name="agent_scratchpad"),
+        ])
+    def execute_agent(
+        self,
+        llm,
+        tools: List,
+        query: str,
+        context: str,
+        memory: ConversationBufferMemory
+    ) -> Dict[str, Any]:
+        """
+        Execute the RAG agent with given tools and context.
+        Args:
+            llm: Language model
+            tools: List of tools
+            query: User query
+            context: Context string
+            memory: Conversation memory
+        Returns:
+            Agent response
+        """
+        try:
+            # Validate tools
+            for tool in tools:
+                if not hasattr(tool, 'name') or not hasattr(tool, 'description'):
+                    raise ValueError(f"Tool {tool} is missing required attributes")
+            # Create prompt
+            has_document_search = any(t.name == "vector_database_search" for t in tools)
+            has_web_search = any(t.name == "tavily_search_results_json" for t in tools)
+            prompt = self.create_agent_prompt(has_document_search, has_web_search)
+            # Create agent
+            agent = create_tool_calling_agent(llm, tools, prompt)
+            agent_executor = AgentExecutor(
+                agent=agent,
+                tools=tools,
+                memory=memory,
+                verbose=Config.AGENT_VERBOSE,
+                handle_parsing_errors=True,
+                max_iterations=Config.AGENT_MAX_ITERATIONS,
+                return_intermediate_steps=False,
+                early_stopping_method="generate"
+            )
+            # Execute agent
+            agent_input = {
+                "input": query,
+                "context": context,
+            }
+            response_payload = agent_executor.invoke(agent_input)
+            # Validate response
+            agent_output = response_payload.get("output", "") if response_payload else ""
+            if not agent_output or len(agent_output.strip()) < 10:
+                raise ValueError("Insufficient response from agent")
+            # Check for incomplete responses
+            problematic_prefixes = [
+                "Based on the Document,",
+                "According to a web search,",
+                "Based on the available information,",
+                "I need to",
+                "Let me"
+            ]
+            stripped_output = agent_output.strip()
+            if any(stripped_output == prefix.strip() or
+                   stripped_output == prefix.strip() + "."
+                   for prefix in problematic_prefixes):
+                raise ValueError("Agent returned incomplete response")
+            return response_payload
+        except Exception as e:
+            raise
+    def fallback_response(
+        self,
+        llm,
+        tools: List,
+        query: str,
+        context: str,
+        use_tavily: bool = False
+    ) -> Dict[str, Any]:
+        """
+        Generate fallback response using direct tool usage or LLM.
+        Args:
+            llm: Language model
+            tools: List of available tools
+            query: User query
+            context: Context string
+            use_tavily: Whether to use web search
+        Returns:
+            Fallback response
+        """
+        try:
+            tool_results = []
+            # Try vector search first if available
+            vector_tool = next((t for t in tools if t.name == "vector_database_search"), None)
+            if vector_tool:
+                try:
+                    search_result = vector_tool.run(query)
+                    if search_result and "No relevant information" not in search_result:
+                        tool_results.append(f"Document Search: {search_result}")
+                except Exception as tool_error:
+                    pass
+            # Try web search if needed and available
+            if use_tavily:
+                web_tool = next((t for t in tools if t.name == "tavily_search_results_json"), None)
+                if web_tool:
+                    try:
+                        web_result = web_tool.run(query)
+                        if web_result:
+                            tool_results.append(f"Web Search: {web_result}")
+                    except Exception as tool_error:
+                        pass
+            # Combine tool results with context
+            enhanced_context = context
+            if tool_results:
+                enhanced_context += "\n\nAdditional Information:\n" + "\n\n".join(tool_results)
+            # Use direct LLM call with enhanced context
+            direct_prompt = ChatPromptTemplate.from_messages([
+                ("system", "You are a helpful assistant. Use the provided context and information to answer the user's question clearly and completely."),
+                ("human", "Context and Information: {context}\n\nQuestion: {input}")
+            ])
+            formatted_prompt = direct_prompt.format_prompt(
+                context=enhanced_context,
+                input=query
+            ).to_messages()
+            response = llm.invoke(formatted_prompt)
+            direct_output = response.content if hasattr(response, 'content') else str(response)
+            return {"output": direct_output}
+        except Exception as manual_error:
+            # Final fallback - simple LLM response
+            fallback_prompt = ChatPromptTemplate.from_messages([
+                ("system", """You are a helpful assistant that answers questions about documents.
+                Use the provided context to answer the user's question.
+                If the context contains relevant information, start your answer with "Based on the document, ..."
+                If the context is insufficient, clearly state what you don't know."""),
+                ("human", "Context: {context}\n\nQuestion: {input}")
+            ])
+            formatted_fallback = fallback_prompt.format_prompt(
+                context=context,
+                input=query
+            ).to_messages()
+            response = llm.invoke(formatted_fallback)
+            fallback_output = response.content if hasattr(response, 'content') else str(response)
+            return {"output": fallback_output}
+    def generate_response(
+        self,
+        llm,
+        query: str,
+        context_chunks: List[Tuple],
+        faiss_index: faiss.IndexHNSWFlat,
+        document_chunks: List[Dict[str, Any]],
+        embedding_model: SentenceTransformer,
+        memory: ConversationBufferMemory,
+        use_tavily: bool = False
+    ) -> Dict[str, Any]:
+        """
+        Generate RAG response using agent or fallback methods.
+        Args:
+            llm: Language model
+            query: User query
+            context_chunks: Initial context chunks
+            faiss_index: FAISS index
+            document_chunks: Document chunks
+            embedding_model: Embedding model
+            memory: Conversation memory
+            use_tavily: Whether to use web search
+        Returns:
+            Generated response
+        """
+        # Validate inputs
+        if not query or not query.strip():
+            return {"output": "Please provide a valid question."}
+        # Create tools
+        tools = self.create_agent_tools(
+            faiss_index, document_chunks, embedding_model, use_tavily
+        )
+        if not tools:
+            fallback_prompt = ChatPromptTemplate.from_messages([
+                ("system", "You are a helpful assistant that answers questions about documents. Use the provided context to answer the user's question."),
+                ("human", "Context: {context}\n\nQuestion: {input}")
+            ])
+            try:
+                formatted_prompt = fallback_prompt.format_prompt(
+                    context="No context available",
+                    input=query
+                ).to_messages()
+                response = llm.invoke(formatted_prompt)
+                return {"output": response.content if hasattr(response, 'content') else str(response)}
+            except Exception as e:
+                return {"output": "I'm sorry, I encountered an error processing your request."}
+        # Prepare context
+        context = prepare_context_from_chunks(context_chunks)
+        # Try agent execution
+        if not tools:
+            # Handle case where no tools are available
+            fallback_prompt = ChatPromptTemplate.from_messages([
+                ("system", "You are a helpful assistant that answers questions about documents. Use the provided context to answer the user's question."),
+                ("human", "Context: {context}\n\nQuestion: {input}")
+            ])
+            formatted_prompt = fallback_prompt.format_prompt(
+                context=context,
+                input=query
+            ).to_messages()
+            response = llm.invoke(formatted_prompt)
+            return {"output": response.content if hasattr(response, 'content') else str(response)}
+        try:
+            return self.execute_agent(llm, tools, query, context, memory)
+        except Exception as e:
+            error_msg = str(e)
+            # Try fallback approach
+            try:
+                return self.fallback_response(llm, tools, query, context, use_tavily)
+            except Exception as fallback_error:
+                return {"output": "I'm sorry, I encountered an error processing your request. Please try again."}
+# Global RAG service instance
+rag_service = RAGService()

services/session_service.py ADDED Viewed

	@@ -0,0 +1,253 @@

+"""
+Session management service.
+This module provides high-level session management operations.
+"""
+import uuid
+from typing import Dict, Any, Tuple, Optional
+from sentence_transformers import SentenceTransformer
+from configs.config import Config, ErrorMessages
+from services.llm_service import create_llm_model
+from utils import (
+    save_session_to_file,
+    load_session_from_file,
+    reconstruct_session_objects,
+    cleanup_session_files,
+    validate_session_data,
+    session_exists,
+    create_embeddings,
+    build_faiss_index
+)
+class SessionManager:
+    """Manager for session operations."""
+    def __init__(self):
+        """Initialize session manager."""
+        self.active_sessions: Dict[str, Dict[str, Any]] = {}
+    def create_session(
+        self,
+        file_path: str,
+        file_name: str,
+        chunks_with_metadata: list,
+        model_name: str
+    ) -> str:
+        """
+        Create a new session with processed document data.
+        Args:
+            file_path: Path to the uploaded file
+            file_name: Original filename
+            chunks_with_metadata: Processed document chunks
+            model_name: LLM model name
+        Returns:
+            Session ID
+        """
+        session_id = str(uuid.uuid4())
+        # Create embedding model and process chunks
+        embedding_model = SentenceTransformer(Config.EMBEDDING_MODEL)
+        embeddings, _ = create_embeddings(chunks_with_metadata, embedding_model)
+        # Build FAISS index
+        index = build_faiss_index(embeddings)
+        # Create LLM
+        llm = create_llm_model(model_name)
+        # Create session data
+        session_data = {
+            "file_path": file_path,
+            "file_name": file_name,
+            "chunks": chunks_with_metadata,
+            "model": embedding_model,
+            "index": index,
+            "llm": llm,
+            "chat_history": []
+        }
+        # Save to memory and file
+        self.active_sessions[session_id] = session_data
+        save_session_to_file(session_id, session_data)
+        return session_id
+    def get_session(
+        self,
+        session_id: str,
+        model_name: str = None
+    ) -> Tuple[Optional[Dict[str, Any]], bool]:
+        """
+        Retrieve session data, loading from file if necessary.
+        Args:
+            session_id: Session identifier
+            model_name: LLM model name (for reconstruction)
+        Returns:
+            Tuple of (session_data, found)
+        """
+        if model_name is None:
+            model_name = Config.DEFAULT_MODEL
+        try:
+            # Check if session is in memory
+            if session_id in self.active_sessions:
+                cached_session = self.active_sessions[session_id]
+                # Ensure LLM is up-to-date
+                if (cached_session.get("llm") is None or
+                    (hasattr(cached_session["llm"], "model_name") and
+                     cached_session["llm"].model_name != model_name)):
+                    cached_session["llm"] = create_llm_model(model_name)
+                # Ensure embedding model exists
+                if cached_session.get("model") is None:
+                    cached_session["model"] = SentenceTransformer(Config.EMBEDDING_MODEL)
+                # Ensure FAISS index exists
+                if cached_session.get("index") is None and cached_session.get("chunks"):
+                    embeddings, _ = create_embeddings(
+                        cached_session["chunks"],
+                        cached_session["model"]
+                    )
+                    cached_session["index"] = build_faiss_index(embeddings)
+                return cached_session, True
+            # Try to load from file
+            data, success = load_session_from_file(session_id)
+            if not success:
+                return None, False
+            # Check if original PDF exists
+            original_pdf_path = data.get("file_path")
+            if not (data.get("chunks") and original_pdf_path and
+                    session_exists(session_id)):
+                print(f"Warning: Session data for {session_id} is incomplete or PDF missing.")
+                cleanup_session_files(session_id)
+                return None, False
+            # Reconstruct session objects
+            embedding_model = SentenceTransformer(Config.EMBEDDING_MODEL)
+            full_session_data = reconstruct_session_objects(
+                data, model_name, embedding_model
+            )
+            # Cache in memory
+            self.active_sessions[session_id] = full_session_data
+            return full_session_data, True
+        except Exception as e:
+            print(f"Error loading session {session_id}: {str(e)}")
+            return None, False
+    def save_session(self, session_id: str, session_data: Dict[str, Any]) -> bool:
+        """
+        Save session data to memory and file.
+        Args:
+            session_id: Session identifier
+            session_data: Session data to save
+        Returns:
+            True if successful, False otherwise
+        """
+        # Update memory cache
+        self.active_sessions[session_id] = session_data
+        # Save to file
+        return save_session_to_file(session_id, session_data)
+    def remove_session(self, session_id: str) -> bool:
+        """
+        Remove session and associated files.
+        Args:
+            session_id: Session identifier
+        Returns:
+            True if successful, False otherwise
+        """
+        try:
+            # Remove from memory
+            if session_id in self.active_sessions:
+                del self.active_sessions[session_id]
+            # Clean up files
+            return cleanup_session_files(session_id)
+        except Exception as e:
+            print(f"Error removing session {session_id}: {str(e)}")
+            return False
+    def clear_chat_history(self, session_id: str) -> bool:
+        """
+        Clear chat history for a session.
+        Args:
+            session_id: Session identifier
+        Returns:
+            True if successful, False otherwise
+        """
+        session_data, found = self.get_session(session_id)
+        if not found:
+            return False
+        session_data["chat_history"] = []
+        return self.save_session(session_id, session_data)
+    def add_chat_entry(
+        self,
+        session_id: str,
+        user_message: str,
+        assistant_message: str
+    ) -> bool:
+        """
+        Add a chat entry to session history.
+        Args:
+            session_id: Session identifier
+            user_message: User's message
+            assistant_message: Assistant's response
+        Returns:
+            True if successful, False otherwise
+        """
+        session_data, found = self.get_session(session_id)
+        if not found:
+            return False
+        session_data["chat_history"].append({
+            "user": user_message,
+            "assistant": assistant_message
+        })
+        return self.save_session(session_id, session_data)
+    def validate_session(self, session_id: str) -> Tuple[bool, list]:
+        """
+        Validate session data integrity.
+        Args:
+            session_id: Session identifier
+        Returns:
+            Tuple of (is_valid, missing_keys)
+        """
+        session_data, found = self.get_session(session_id)
+        if not found:
+            return False, ["session_not_found"]
+        return validate_session_data(session_data)
+# Global session manager instance
+session_manager = SessionManager()

test_refactored.py ADDED Viewed

	@@ -0,0 +1,176 @@

+"""
+Test script to verify the refactored code works correctly.
+This script tests the main functionality to ensure backward compatibility
+and proper operation of the refactored modules.
+"""
+import sys
+import os
+import tempfile
+import traceback
+# Add the current directory to Python path
+sys.path.insert(0, '/workspaces/PDF-Insight-Beta')
+def test_imports():
+    """Test that all modules can be imported successfully."""
+    print("Testing imports...")
+    try:
+        # Test config import
+        from configs.config import Config, ModelConfig, ErrorMessages
+        print("✓ Config module imported successfully")
+        # Test models import
+        from models.models import ChatRequest, UploadResponse
+        print("✓ Models module imported successfully")
+        # Test utils import
+        from utils import estimate_tokens, process_pdf_file
+        print("✓ Utils module imported successfully")
+        # Test services import
+        from services import create_llm_model, session_manager, rag_service
+        print("✓ Services module imported successfully")
+        # Test API import
+        from api import upload_pdf_handler, chat_handler
+        print("✓ API module imported successfully")
+        # Test backward compatibility
+        from preprocessing_refactored import model_selection, chunk_text, agentic_rag
+        print("✓ Backward compatibility import successful")
+        return True
+    except Exception as e:
+        print(f"✗ Import failed: {e}")
+        traceback.print_exc()
+        return False
+def test_basic_functionality():
+    """Test basic functionality of key components."""
+    print("\nTesting basic functionality...")
+    try:
+        from configs.config import Config
+        from utils.text_processing import estimate_tokens
+        from services.llm_service import get_available_models
+        # Test token estimation
+        tokens = estimate_tokens("This is a test string")
+        assert tokens > 0
+        print(f"✓ Token estimation works: {tokens} tokens")
+        # Test model listing
+        models = get_available_models()
+        assert len(models) > 0
+        print(f"✓ Model listing works: {len(models)} models available")
+        # Test config access
+        assert Config.DEFAULT_CHUNK_SIZE > 0
+        print(f"✓ Config access works: chunk size = {Config.DEFAULT_CHUNK_SIZE}")
+        return True
+    except Exception as e:
+        print(f"✗ Basic functionality test failed: {e}")
+        traceback.print_exc()
+        return False
+def test_backward_compatibility():
+    """Test that original interfaces still work."""
+    print("\nTesting backward compatibility...")
+    try:
+        # Test original preprocessing interface
+        from preprocessing_refactored import model_selection, tools, estimate_tokens
+        # These should work without errors
+        assert callable(model_selection)
+        assert isinstance(tools, list)
+        assert callable(estimate_tokens)
+        print("✓ Original preprocessing interface preserved")
+        # Test that we can still access the original functions
+        from preprocessing_refactored import (
+            process_pdf_file, chunk_text, create_embeddings,
+            build_faiss_index, retrieve_similar_chunks, agentic_rag
+        )
+        print("✓ All original functions accessible")
+        return True
+    except Exception as e:
+        print(f"✗ Backward compatibility test failed: {e}")
+        traceback.print_exc()
+        return False
+def test_app_creation():
+    """Test that the FastAPI app can be created."""
+    print("\nTesting app creation...")
+    try:
+        from app_refactored import create_app
+        app = create_app()
+        assert app is not None
+        print("✓ FastAPI app created successfully")
+        # Check that routes are properly defined
+        routes = [route.path for route in app.routes]
+        expected_routes = ["/", "/upload-pdf", "/chat", "/models"]
+        for route in expected_routes:
+            if route in routes:
+                print(f"✓ Route {route} found")
+            else:
+                print(f"✗ Route {route} missing")
+                return False
+        return True
+    except Exception as e:
+        print(f"✗ App creation test failed: {e}")
+        traceback.print_exc()
+        return False
+def main():
+    """Run all tests."""
+    print("=" * 50)
+    print("Testing Refactored PDF Insight Beta")
+    print("=" * 50)
+    tests = [
+        test_imports,
+        test_basic_functionality,
+        test_backward_compatibility,
+        test_app_creation
+    ]
+    results = []
+    for test in tests:
+        results.append(test())
+    print("\n" + "=" * 50)
+    print("Test Results:")
+    print("=" * 50)
+    passed = sum(results)
+    total = len(results)
+    print(f"Tests passed: {passed}/{total}")
+    if passed == total:
+        print("✓ All tests passed! Refactoring successful.")
+        return 0
+    else:
+        print("✗ Some tests failed. Please check the issues above.")
+        return 1
+if __name__ == "__main__":
+    exit_code = main()
+    sys.exit(exit_code)

utils/__init__.py ADDED Viewed

	@@ -0,0 +1,62 @@

+"""
+Utility modules initialization.
+This module provides easy imports for all utility functions.
+"""
+from .text_processing import (
+    estimate_tokens,
+    process_pdf_file,
+    chunk_text,
+    create_embeddings,
+    filter_relevant_chunks,
+    prepare_context_from_chunks,
+    validate_chunk_data
+)
+from .faiss_utils import (
+    build_faiss_index,
+    retrieve_similar_chunks,
+    search_index_with_validation,
+    get_index_stats
+)
+from .session_utils import (
+    create_session_file_path,
+    create_upload_file_path,
+    prepare_pickle_safe_data,
+    save_session_to_file,
+    load_session_from_file,
+    reconstruct_session_objects,
+    cleanup_session_files,
+    validate_session_data,
+    session_exists
+)
+__all__ = [
+    # Text processing
+    "estimate_tokens",
+    "process_pdf_file",
+    "chunk_text",
+    "create_embeddings",
+    "filter_relevant_chunks",
+    "prepare_context_from_chunks",
+    "validate_chunk_data",
+    # FAISS utilities
+    "build_faiss_index",
+    "retrieve_similar_chunks",
+    "search_index_with_validation",
+    "get_index_stats",
+    # Session utilities
+    "create_session_file_path",
+    "create_upload_file_path",
+    "prepare_pickle_safe_data",
+    "save_session_to_file",
+    "load_session_from_file",
+    "reconstruct_session_objects",
+    "cleanup_session_files",
+    "validate_session_data",
+    "session_exists"
+]

utils/session_utils.py ADDED Viewed

	@@ -0,0 +1,219 @@

+"""
+Session management utilities.
+This module provides utilities for session data persistence and management.
+"""
+import os
+import pickle
+import traceback
+from typing import Dict, Any, Tuple, Optional, List
+from configs.config import Config, ErrorMessages
+def create_session_file_path(session_id: str) -> str:
+    """
+    Create the file path for a session pickle file.
+    Args:
+        session_id: Session identifier
+    Returns:
+        File path for the session data
+    """
+    return f"{Config.UPLOAD_DIR}/{session_id}_session.pkl"
+def create_upload_file_path(session_id: str, filename: str) -> str:
+    """
+    Create the file path for an uploaded file.
+    Args:
+        session_id: Session identifier
+        filename: Original filename
+    Returns:
+        File path for the uploaded file
+    """
+    return f"{Config.UPLOAD_DIR}/{session_id}_{filename}"
+def prepare_pickle_safe_data(session_data: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Prepare session data for pickling by removing non-serializable objects.
+    Args:
+        session_data: Full session data
+    Returns:
+        Pickle-safe session data
+    """
+    return {
+        "file_path": session_data.get("file_path"),
+        "file_name": session_data.get("file_name"),
+        "chunks": session_data.get("chunks"),  # Chunks with metadata (list of dicts)
+        "chat_history": session_data.get("chat_history", [])
+        # FAISS index, embedding model, and LLM model are not pickled
+    }
+def save_session_to_file(session_id: str, session_data: Dict[str, Any]) -> bool:
+    """
+    Save session data to pickle file.
+    Args:
+        session_id: Session identifier
+        session_data: Session data to save
+    Returns:
+        True if successful, False otherwise
+    """
+    try:
+        pickle_safe_data = prepare_pickle_safe_data(session_data)
+        file_path = create_session_file_path(session_id)
+        with open(file_path, "wb") as f:
+            pickle.dump(pickle_safe_data, f)
+        return True
+    except Exception as e:
+        print(f"Error saving session {session_id}: {str(e)}")
+        return False
+def load_session_from_file(session_id: str) -> Tuple[Optional[Dict[str, Any]], bool]:
+    """
+    Load session data from pickle file.
+    Args:
+        session_id: Session identifier
+    Returns:
+        Tuple of (session_data, success)
+    """
+    try:
+        file_path = create_session_file_path(session_id)
+        if not os.path.exists(file_path):
+            return None, False
+        with open(file_path, "rb") as f:
+            data = pickle.load(f)
+        return data, True
+    except Exception as e:
+        print(f"Error loading session {session_id}: {str(e)}")
+        return None, False
+def reconstruct_session_objects(
+    session_data: Dict[str, Any],
+    model_name: str,
+    embedding_model
+) -> Dict[str, Any]:
+    """
+    Reconstruct non-serializable objects in session data.
+    Args:
+        session_data: Basic session data from pickle
+        model_name: LLM model name
+        embedding_model: SentenceTransformer instance
+    Returns:
+        Complete session data with reconstructed objects
+    """
+    # Import here to avoid circular imports
+    from sentence_transformers import SentenceTransformer
+    from langchain_groq import ChatGroq
+    # Create LLM model
+    llm = ChatGroq(
+        model=model_name,
+        api_key=os.getenv("GROQ_API_KEY"),
+        temperature=Config.LLM_TEMPERATURE,
+        max_tokens=Config.MAX_TOKENS
+    )
+    # Reconstruct embeddings and FAISS index
+    if session_data.get("chunks"):
+        # Import here to avoid circular imports
+        from utils.text_processing import create_embeddings
+        from utils.faiss_utils import build_faiss_index
+        embeddings, _ = create_embeddings(session_data["chunks"], embedding_model)
+        faiss_index = build_faiss_index(embeddings)
+    else:
+        embeddings, faiss_index = None, None
+    return {
+        **session_data,
+        "model": embedding_model,
+        "index": faiss_index,
+        "llm": llm
+    }
+def cleanup_session_files(session_id: str) -> bool:
+    """
+    Clean up all files associated with a session.
+    Args:
+        session_id: Session identifier
+    Returns:
+        True if successful, False otherwise
+    """
+    try:
+        session_file = create_session_file_path(session_id)
+        # Load session data to get file path
+        if os.path.exists(session_file):
+            try:
+                with open(session_file, "rb") as f:
+                    data = pickle.load(f)
+                # Delete PDF file if it exists
+                pdf_path = data.get("file_path")
+                if pdf_path and os.path.exists(pdf_path):
+                    os.remove(pdf_path)
+            except Exception as e:
+                print(f"Error reading session file for cleanup: {e}")
+            # Remove session file
+            os.remove(session_file)
+        return True
+    except Exception as e:
+        print(f"Error cleaning up session {session_id}: {str(e)}")
+        return False
+def validate_session_data(session_data: Dict[str, Any]) -> Tuple[bool, List[str]]:
+    """
+    Validate session data integrity.
+    Args:
+        session_data: Session data to validate
+    Returns:
+        Tuple of (is_valid, missing_keys)
+    """
+    required_keys = ["index", "chunks", "model", "llm"]
+    missing_keys = [key for key in required_keys if key not in session_data]
+    return len(missing_keys) == 0, missing_keys
+def session_exists(session_id: str) -> bool:
+    """
+    Check if a session exists.
+    Args:
+        session_id: Session identifier
+    Returns:
+        True if session exists, False otherwise
+    """
+    session_file = create_session_file_path(session_id)
+    return os.path.exists(session_file)