File size: 3,947 Bytes
ba907cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a2ad34
ba907cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1862c3b
ba907cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""
Configuration module for PDF Insight Beta application.

This module centralizes all configuration settings, constants, and environment variables.
"""

import os
from typing import List, Dict, Any
import dotenv

# Load environment variables
dotenv.load_dotenv()


class Config:
    """Application configuration class."""
    
    # API Configuration
    GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
    TAVILY_API_KEY: str = os.getenv("TAVILY_API_KEY", "")
    
    # Application Settings
    UPLOAD_DIR: str = "uploads"
    MAX_FILE_SIZE: int = 50 * 1024 * 1024  # 50MB
    
    # Model Configuration
    DEFAULT_MODEL: str = "llama-3.1-8b-instant"
    EMBEDDING_MODEL: str = "BAAI/bge-large-en-v1.5"
    
    # Text Processing Settings
    DEFAULT_CHUNK_SIZE: int = 1000
    MIN_CHUNK_LENGTH: int = 20
    MIN_PARAGRAPH_LENGTH: int = 10
    
    # RAG Configuration
    DEFAULT_K_CHUNKS: int = 10
    INITIAL_CONTEXT_CHUNKS: int = 5
    MAX_CONTEXT_TOKENS: int = 7000
    SIMILARITY_THRESHOLD: float = 1.5
    
    # LLM Settings
    LLM_TEMPERATURE: float = 0.1
    MAX_TOKENS: int = 4500
    
    # FAISS Index Configuration
    FAISS_NEIGHBORS: int = 32
    FAISS_EF_CONSTRUCTION: int = 200
    FAISS_EF_SEARCH: int = 50
    
    # Agent Configuration
    AGENT_MAX_ITERATIONS: int = 2
    AGENT_VERBOSE: bool = False
    
    # Tavily Search Configuration
    TAVILY_MAX_RESULTS: int = 5
    TAVILY_SEARCH_DEPTH: str = "advanced"
    TAVILY_INCLUDE_ANSWER: bool = True
    TAVILY_INCLUDE_RAW_CONTENT: bool = False
    
    # CORS Configuration
    CORS_ORIGINS: List[str] = ["*"]
    CORS_CREDENTIALS: bool = True
    CORS_METHODS: List[str] = ["*"]
    CORS_HEADERS: List[str] = ["*"]


class ModelConfig:
    """Model configuration and metadata."""
    
    AVAILABLE_MODELS: List[Dict[str, str]] = [
        {"id": "meta-llama/llama-4-scout-17b-16e-instruct", "name": "Llama 4 Scout 17B"},
        {"id": "llama-3.1-8b-instant", "name": "Llama 3.1 8B Instant"},
        {"id": "llama-3.3-70b-versatile", "name": "Llama 3.3 70b Versatile"},
        {"id": "openai/gpt-oss-120b", "name": "GPT-OSS 120B"}
    ]
    
    @classmethod
    def get_model_ids(cls) -> List[str]:
        """Get list of available model IDs."""
        return [model["id"] for model in cls.AVAILABLE_MODELS]
    
    @classmethod
    def is_valid_model(cls, model_id: str) -> bool:
        """Check if a model ID is valid."""
        return model_id in cls.get_model_ids()


class ErrorMessages:
    """Centralized error messages."""
    
    # Validation Errors
    EMPTY_QUERY = "Query cannot be empty"
    QUERY_TOO_SHORT = "Query must be at least 3 characters long"
    
    # Session Errors
    SESSION_NOT_FOUND = "Session not found"
    SESSION_EXPIRED = "Session not found or expired. Please upload a document first."
    SESSION_INCOMPLETE = "Session data is incomplete. Please upload the document again."
    SESSION_REMOVAL_FAILED = "Session not found or could not be removed"
    
    # File Errors
    FILE_NOT_FOUND = "The file {file_path} does not exist."
    PDF_PROCESSING_ERROR = "Error processing PDF: {error}"
    
    # API Key Errors
    GROQ_API_KEY_MISSING = "GROQ_API_KEY is not set for Groq Llama models."
    TAVILY_API_KEY_MISSING = "TAVILY_API_KEY is not set. Web search will not function."
    
    # Processing Errors
    PROCESSING_ERROR = "Error processing query: {error}"
    RESPONSE_GENERATION_ERROR = "Sorry, I could not generate a response."


class SuccessMessages:
    """Centralized success messages."""
    
    PDF_PROCESSED = "Processed {filename}"
    PDF_REMOVED = "PDF file and session removed successfully"
    CHAT_HISTORY_CLEARED = "Chat history cleared"


# Initialize directories
def initialize_directories():
    """Create necessary directories if they don't exist."""
    if not os.path.exists(Config.UPLOAD_DIR):
        os.makedirs(Config.UPLOAD_DIR)


# Initialize on import
initialize_directories()