Spaces:
Runtime error
Runtime error
import os | |
import sys | |
from typing import Dict, Any | |
class Config: | |
# Document Procesing | |
DEFAULT_CHUNK_SIZE = 1000 | |
DEFAULT_CHUNK_OVERLAP = 200 | |
DEFAULT_ENCODING = 'utf-8' | |
# Embedding Model | |
DEFAULT_EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2' | |
# Vector Store | |
DEFAULT_PERSIST_DIRECTORY = "./chroma_db" | |
DEFAULT_RETRIEVAL_K = 5 | |
# LLM Settings | |
DEFAULT_TEMPERATURE = 0.3 | |
DEFAULT_CHAIN_TYPE = "stuff" | |
#File Settings | |
SUPPORTED_FILE_TYPES = ["txt"] | |
MAX_FILE_SIZE_MB = 100 | |
def get_doc_processing_config(cls) -> Dict[str, Any]: | |
return { | |
'chunk_size': int(os.getenv('CHUNK_SIZE', cls.DEFAULT_CHUNK_SIZE)), | |
'chunk_overlap': int(os.getenv('CHUNK_OVERLAP', cls.DEFAULT_CHUNK_OVERLAP)), | |
'encoding': os.getenv('ENCODING', cls.DEFAULT_ENCODING) | |
} | |
def get_embedding_config(cls) -> Dict[str, Any]: | |
return { | |
'model_name': os.getenv('EMBEDDING_MODEL', cls.DEFAULT_EMBEDDING_MODEL), | |
} | |
def get_vector_store_config(cls) -> Dict[str, Any]: | |
return { | |
'persist_directory': os.getenv('PERSIST_DIRECTORY', cls.DEFAULT_PERSIST_DIRECTORY), | |
'retrieval_k': int(os.getenv('RETRIEVAL_K', cls.DEFAULT_RETRIEVAL_K)) | |
} | |
def get_llm_config(cls) -> Dict[str, Any]: | |
return { | |
'temperature': float(os.getenv('LLM_TEMPERATURE', cls.DEFAULT_TEMPERATURE)), | |
'chain_type': os.getenv('LLM_CHAIN_TYPE', cls.DEFAULT_CHAIN_TYPE), | |
'api_key': os.getenv('GOOGLE_API_KEY') | |
} | |
def get_file_settings(cls) -> Dict[str, Any]: | |
return { | |
'supported_types': cls.SUPPORTED_FILE_TYPES, | |
'max_size_mb': int(os.getenv('MAX_FILE_SIZE_MB', cls.MAX_FILE_SIZE_MB)) | |
} | |
def get_all_configs(cls) -> Dict[str, Any]: | |
return { | |
'document_processing': cls.get_doc_processing_config(), | |
'embedding': cls.get_embedding_config(), | |
'vector_store': cls.get_vector_store_config(), | |
'llm': cls.get_llm_config(), | |
'file_settings': cls.get_file_settings() | |
} | |
def validate_config(cls) -> bool: | |
llm_config = cls.get_llm_config() | |
if not llm_config['api_key']: | |
return False | |
return True | |
def get_environment_info(cls) -> Dict[str, Any]: | |
return { | |
'python_version': sys.version, | |
'environment_variables': { | |
'GOOGLE_API_KEY': 'SET' if os.getenv('GOOGLE_API_KEY') else 'NOT SET', | |
'CHUNK_SIZE': os.getenv('CHUNK_SIZE', 'DEFAULT'), | |
'EMBEDDING_MODEL': os.getenv('EMBEDDING_MODEL', 'DEFAULT'), | |
'PERSIST_DIRECTORY': os.getenv('PERSIST_DIRECTORY', 'DEFAULT'), | |
} | |
} | |