File size: 3,005 Bytes
816825a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os
import sys
from typing import Dict, Any

class Config:
    
    # Document Procesing
    DEFAULT_CHUNK_SIZE = 1000
    DEFAULT_CHUNK_OVERLAP = 200
    DEFAULT_ENCODING = 'utf-8'

    # Embedding Model
    DEFAULT_EMBEDDING_MODEL = 'sentence-transformers/all-MiniLM-L6-v2' 

    # Vector Store
    DEFAULT_PERSIST_DIRECTORY = "./chroma_db"
    DEFAULT_RETRIEVAL_K = 5 

    # LLM Settings
    DEFAULT_TEMPERATURE = 0.3 
    DEFAULT_CHAIN_TYPE = "stuff" 

    #File Settings
    SUPPORTED_FILE_TYPES = ["txt"]
    MAX_FILE_SIZE_MB = 100

    @classmethod 
    def get_doc_processing_config(cls) -> Dict[str, Any]:
        return {
            'chunk_size': int(os.getenv('CHUNK_SIZE', cls.DEFAULT_CHUNK_SIZE)),
            'chunk_overlap': int(os.getenv('CHUNK_OVERLAP', cls.DEFAULT_CHUNK_OVERLAP)),
            'encoding': os.getenv('ENCODING', cls.DEFAULT_ENCODING)
        }
    
    @classmethod
    def get_embedding_config(cls) -> Dict[str, Any]:
        return {
            'model_name': os.getenv('EMBEDDING_MODEL', cls.DEFAULT_EMBEDDING_MODEL),
        }
    
    @classmethod
    def get_vector_store_config(cls) -> Dict[str, Any]:
        return {
            'persist_directory': os.getenv('PERSIST_DIRECTORY', cls.DEFAULT_PERSIST_DIRECTORY),
            'retrieval_k': int(os.getenv('RETRIEVAL_K', cls.DEFAULT_RETRIEVAL_K))
        }
    
    @classmethod
    def get_llm_config(cls) -> Dict[str, Any]:
        return {
            'temperature': float(os.getenv('LLM_TEMPERATURE', cls.DEFAULT_TEMPERATURE)),
            'chain_type': os.getenv('LLM_CHAIN_TYPE', cls.DEFAULT_CHAIN_TYPE),
            'api_key': os.getenv('GOOGLE_API_KEY')
        }
    
    @classmethod
    def get_file_settings(cls) -> Dict[str, Any]:
        return {
            'supported_types': cls.SUPPORTED_FILE_TYPES,
            'max_size_mb': int(os.getenv('MAX_FILE_SIZE_MB', cls.MAX_FILE_SIZE_MB))
        }
    
    @classmethod
    def get_all_configs(cls) -> Dict[str, Any]:
        return {
            'document_processing': cls.get_doc_processing_config(),
            'embedding': cls.get_embedding_config(),
            'vector_store': cls.get_vector_store_config(),
            'llm': cls.get_llm_config(),
            'file_settings': cls.get_file_settings()
        }
    
    @classmethod
    def validate_config(cls) -> bool:
        llm_config = cls.get_llm_config()

        if not llm_config['api_key']:
            return False
        
        return True
    
    @classmethod
    def get_environment_info(cls) -> Dict[str, Any]: 
        return {
            'python_version': sys.version,
            'environment_variables': {
                'GOOGLE_API_KEY': 'SET' if os.getenv('GOOGLE_API_KEY') else 'NOT SET',
                'CHUNK_SIZE': os.getenv('CHUNK_SIZE', 'DEFAULT'),
                'EMBEDDING_MODEL': os.getenv('EMBEDDING_MODEL', 'DEFAULT'),
                'PERSIST_DIRECTORY': os.getenv('PERSIST_DIRECTORY', 'DEFAULT'),
            }
        }