File size: 3,403 Bytes
71fa0c7
103c97e
 
 
71fa0c7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103c97e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369993f
 
 
 
 
 
 
 
103c97e
 
 
 
 
 
 
 
 
 
 
 
 
 
aad4327
 
 
 
103c97e
 
 
 
 
 
 
 
 
 
71fa0c7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from typing import Optional, Dict, Any, List
from pydantic_settings import BaseSettings, SettingsConfigDict
from pydantic import Field
from pathlib import Path
import fnmatch

class PathPatterns(BaseSettings):
    include: List[str] = Field(
        default=["**/*"],
        description="Glob patterns for paths to include"
    )
    exclude: List[str] = Field(
        default=["**/venv/**", "**/.git/**", "**/__pycache__/**"],
        description="Glob patterns for paths to exclude"
    )

    def should_process_path(self, path: str) -> bool:
        """Check if a path should be processed based on include/exclude patterns"""
        path_str = str(path)
        
        # First check exclusions
        for pattern in self.exclude:
            if fnmatch.fnmatch(path_str, pattern):
                return False
        
        # Then check inclusions
        for pattern in self.include:
            if fnmatch.fnmatch(path_str, pattern):
                return True
        
        return False

class LanguageConfig(BaseSettings):
    enabled: bool = True
    file_extensions: List[str]
    tree_sitter_language: str
    chunk_types: List[str]
    max_file_size: int = Field(
        default=1_000_000,  # 1MB
        description="Maximum file size to process in bytes"
    )

class ParserConfig(BaseSettings):
    languages: Dict[str, LanguageConfig] = Field(
        default={
            "python": LanguageConfig(
                file_extensions=[".py"],
                tree_sitter_language="python",
                chunk_types=["class_definition", "function_definition"]
            )
        }
    )
    path_patterns: PathPatterns = Field(default_factory=PathPatterns)


class LLMConfig(BaseSettings):
    model_name: str = Field(
        default="llama3.2",
        description="Name of the LLM model to use"
    )
    model_provider: str = Field(
        default="ollama",
        description="Model provider (anthropic, openai, ollama, etc)"
    )
    api_key: Optional[str] = Field(
        default=None,
        description="API key for the model provider"
    )
    model_settings: Dict[str, Any] = Field(
        default_factory=dict,
        description="Additional model settings"
    )
    embedding_model: str = Field(
        default="mxbai-embed-large",
        description="Name of the embedding model to use"
    )
    embedding_provider: str = Field(
        default="ollama",
        description="Provider for embeddings (ollama, openai, etc)"
    )

class DBConfig(BaseSettings):
    persist_directory: Path = Field(
        default=Path("./chroma_db"),
        description="Directory to store ChromaDB files"
    )
    collection_name: str = Field(
        default="code_chunks",
        description="Name of the ChromaDB collection"
    )
    embedding_model: str = Field(
        default="sentence-transformers/all-mpnet-base-v2",
        description="Embedding model to use"
    )
    codebase_directory: Path = Field(
        default=Path("./"),
        description="Root directory of the codebase to analyze"
    )

class AppConfig(BaseSettings):
    model_config = SettingsConfigDict(
        env_file='.env',
        env_file_encoding='utf-8',
        env_nested_delimiter='__'
    )
    
    llm: LLMConfig = Field(default_factory=LLMConfig)
    db: DBConfig = Field(default_factory=DBConfig)
    parser: ParserConfig = Field(default_factory=ParserConfig)