KingZack's picture
adding vectorization variables to config like chunk size and model name
23f9974
raw
history blame
1.87 kB
from functools import lru_cache
from typing import Literal, Optional
from pydantic import Field, SecretStr, validator
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
"""
Application settings loaded from environment variables.
"""
# API Configuration
API_HOST: str = "0.0.0.0"
API_PORT: int = 8000
DEBUG: bool = False
# Vectorization Configuration
EMBEDDING_MODEL: str = "text-embedding-3-small"
VECTOR_DIMENSION: int = 1536
CHUNK_SIZE: int = 1000
CHUNK_OVERLAP: int = 200
TOP_K_MATCHES: int = 5
# MongoDB Configuration
MONGODB_URI: Optional[SecretStr] = None # TODO: Remove optionality
MONGODB_DB_NAME: str = "ctp_slack_bot"
# Slack Configuration
SLACK_BOT_TOKEN: Optional[SecretStr] = None # TODO: Remove optionality
SLACK_SIGNING_SECRET: Optional[SecretStr] = None # TODO: Remove optionality
SLACK_APP_TOKEN: Optional[SecretStr] = None
# Hugging Face Configuration
HF_API_TOKEN: Optional[SecretStr] = None
# Logging Configuration
LOG_LEVEL: Literal["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"] = "INFO"
LOG_FORMAT: Literal["text", "json"] = "json"
# APScheduler Configuration
SCHEDULER_TIMEZONE: str = "UTC"
@validator("MONGODB_URI")
def validate_mongodb_uri(cls, v):
"""Validate MongoDB URI format"""
#if not v.get_secret_value().startswith("mongodb"):
# raise ValueError("MONGODB_URI must be a valid MongoDB connection string")
return v
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
case_sensitive=True,
)
@lru_cache
def get_settings() -> Settings:
"""
Get cached settings instance.
"""
return Settings()
settings = get_settings()