Spaces:
Running
Running
File size: 3,947 Bytes
ba907cd 7a2ad34 ba907cd 1862c3b ba907cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
"""
Configuration module for PDF Insight Beta application.
This module centralizes all configuration settings, constants, and environment variables.
"""
import os
from typing import List, Dict, Any
import dotenv
# Load environment variables
dotenv.load_dotenv()
class Config:
"""Application configuration class."""
# API Configuration
GROQ_API_KEY: str = os.getenv("GROQ_API_KEY", "")
TAVILY_API_KEY: str = os.getenv("TAVILY_API_KEY", "")
# Application Settings
UPLOAD_DIR: str = "uploads"
MAX_FILE_SIZE: int = 50 * 1024 * 1024 # 50MB
# Model Configuration
DEFAULT_MODEL: str = "llama-3.1-8b-instant"
EMBEDDING_MODEL: str = "BAAI/bge-large-en-v1.5"
# Text Processing Settings
DEFAULT_CHUNK_SIZE: int = 1000
MIN_CHUNK_LENGTH: int = 20
MIN_PARAGRAPH_LENGTH: int = 10
# RAG Configuration
DEFAULT_K_CHUNKS: int = 10
INITIAL_CONTEXT_CHUNKS: int = 5
MAX_CONTEXT_TOKENS: int = 7000
SIMILARITY_THRESHOLD: float = 1.5
# LLM Settings
LLM_TEMPERATURE: float = 0.1
MAX_TOKENS: int = 4500
# FAISS Index Configuration
FAISS_NEIGHBORS: int = 32
FAISS_EF_CONSTRUCTION: int = 200
FAISS_EF_SEARCH: int = 50
# Agent Configuration
AGENT_MAX_ITERATIONS: int = 2
AGENT_VERBOSE: bool = False
# Tavily Search Configuration
TAVILY_MAX_RESULTS: int = 5
TAVILY_SEARCH_DEPTH: str = "advanced"
TAVILY_INCLUDE_ANSWER: bool = True
TAVILY_INCLUDE_RAW_CONTENT: bool = False
# CORS Configuration
CORS_ORIGINS: List[str] = ["*"]
CORS_CREDENTIALS: bool = True
CORS_METHODS: List[str] = ["*"]
CORS_HEADERS: List[str] = ["*"]
class ModelConfig:
"""Model configuration and metadata."""
AVAILABLE_MODELS: List[Dict[str, str]] = [
{"id": "meta-llama/llama-4-scout-17b-16e-instruct", "name": "Llama 4 Scout 17B"},
{"id": "llama-3.1-8b-instant", "name": "Llama 3.1 8B Instant"},
{"id": "llama-3.3-70b-versatile", "name": "Llama 3.3 70b Versatile"},
{"id": "openai/gpt-oss-120b", "name": "GPT-OSS 120B"}
]
@classmethod
def get_model_ids(cls) -> List[str]:
"""Get list of available model IDs."""
return [model["id"] for model in cls.AVAILABLE_MODELS]
@classmethod
def is_valid_model(cls, model_id: str) -> bool:
"""Check if a model ID is valid."""
return model_id in cls.get_model_ids()
class ErrorMessages:
"""Centralized error messages."""
# Validation Errors
EMPTY_QUERY = "Query cannot be empty"
QUERY_TOO_SHORT = "Query must be at least 3 characters long"
# Session Errors
SESSION_NOT_FOUND = "Session not found"
SESSION_EXPIRED = "Session not found or expired. Please upload a document first."
SESSION_INCOMPLETE = "Session data is incomplete. Please upload the document again."
SESSION_REMOVAL_FAILED = "Session not found or could not be removed"
# File Errors
FILE_NOT_FOUND = "The file {file_path} does not exist."
PDF_PROCESSING_ERROR = "Error processing PDF: {error}"
# API Key Errors
GROQ_API_KEY_MISSING = "GROQ_API_KEY is not set for Groq Llama models."
TAVILY_API_KEY_MISSING = "TAVILY_API_KEY is not set. Web search will not function."
# Processing Errors
PROCESSING_ERROR = "Error processing query: {error}"
RESPONSE_GENERATION_ERROR = "Sorry, I could not generate a response."
class SuccessMessages:
"""Centralized success messages."""
PDF_PROCESSED = "Processed {filename}"
PDF_REMOVED = "PDF file and session removed successfully"
CHAT_HISTORY_CLEARED = "Chat history cleared"
# Initialize directories
def initialize_directories():
"""Create necessary directories if they don't exist."""
if not os.path.exists(Config.UPLOAD_DIR):
os.makedirs(Config.UPLOAD_DIR)
# Initialize on import
initialize_directories()
|