""" Configuration module for PDF Insight Beta application. This module centralizes all configuration settings, constants, and environment variables. """ import os from typing import List, Dict, Any import dotenv # Load environment variables dotenv.load_dotenv() class Config: """Application configuration class.""" # API Configuration GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "") TAVILY_API_KEY: str = os.getenv("TAVILY_API_KEY", "") # Application Settings UPLOAD_DIR: str = "uploads" MAX_FILE_SIZE: int = 50 * 1024 * 1024 # 50MB # Model Configuration DEFAULT_MODEL: str = "llama-3.1-8b-instant" EMBEDDING_MODEL: str = "BAAI/bge-large-en-v1.5" # Text Processing Settings DEFAULT_CHUNK_SIZE: int = 1000 MIN_CHUNK_LENGTH: int = 20 MIN_PARAGRAPH_LENGTH: int = 10 # RAG Configuration DEFAULT_K_CHUNKS: int = 10 INITIAL_CONTEXT_CHUNKS: int = 5 MAX_CONTEXT_TOKENS: int = 7000 SIMILARITY_THRESHOLD: float = 1.5 # LLM Settings LLM_TEMPERATURE: float = 0.1 MAX_TOKENS: int = 4500 # FAISS Index Configuration FAISS_NEIGHBORS: int = 32 FAISS_EF_CONSTRUCTION: int = 200 FAISS_EF_SEARCH: int = 50 # Agent Configuration AGENT_MAX_ITERATIONS: int = 2 AGENT_VERBOSE: bool = False # Tavily Search Configuration TAVILY_MAX_RESULTS: int = 5 TAVILY_SEARCH_DEPTH: str = "advanced" TAVILY_INCLUDE_ANSWER: bool = True TAVILY_INCLUDE_RAW_CONTENT: bool = False # CORS Configuration CORS_ORIGINS: List[str] = ["*"] CORS_CREDENTIALS: bool = True CORS_METHODS: List[str] = ["*"] CORS_HEADERS: List[str] = ["*"] class ModelConfig: """Model configuration and metadata.""" AVAILABLE_MODELS: List[Dict[str, str]] = [ {"id": "meta-llama/llama-4-scout-17b-16e-instruct", "name": "Llama 4 Scout 17B"}, {"id": "llama-3.1-8b-instant", "name": "Llama 3.1 8B Instant"}, {"id": "llama-3.3-70b-versatile", "name": "Llama 3.3 70b Versatile"}, {"id": "openai/gpt-oss-120b", "name": "GPT-OSS 120B"} ] @classmethod def get_model_ids(cls) -> List[str]: """Get list of available model IDs.""" return [model["id"] for model in cls.AVAILABLE_MODELS] @classmethod def is_valid_model(cls, model_id: str) -> bool: """Check if a model ID is valid.""" return model_id in cls.get_model_ids() class ErrorMessages: """Centralized error messages.""" # Validation Errors EMPTY_QUERY = "Query cannot be empty" QUERY_TOO_SHORT = "Query must be at least 3 characters long" # Session Errors SESSION_NOT_FOUND = "Session not found" SESSION_EXPIRED = "Session not found or expired. Please upload a document first." SESSION_INCOMPLETE = "Session data is incomplete. Please upload the document again." SESSION_REMOVAL_FAILED = "Session not found or could not be removed" # File Errors FILE_NOT_FOUND = "The file {file_path} does not exist." PDF_PROCESSING_ERROR = "Error processing PDF: {error}" # API Key Errors GROQ_API_KEY_MISSING = "GROQ_API_KEY is not set for Groq Llama models." TAVILY_API_KEY_MISSING = "TAVILY_API_KEY is not set. Web search will not function." # Processing Errors PROCESSING_ERROR = "Error processing query: {error}" RESPONSE_GENERATION_ERROR = "Sorry, I could not generate a response." class SuccessMessages: """Centralized success messages.""" PDF_PROCESSED = "Processed {filename}" PDF_REMOVED = "PDF file and session removed successfully" CHAT_HISTORY_CLEARED = "Chat history cleared" # Initialize directories def initialize_directories(): """Create necessary directories if they don't exist.""" if not os.path.exists(Config.UPLOAD_DIR): os.makedirs(Config.UPLOAD_DIR) # Initialize on import initialize_directories()