Spaces:
Sleeping
Sleeping
File size: 3,403 Bytes
71fa0c7 103c97e 71fa0c7 103c97e 369993f 103c97e aad4327 103c97e 71fa0c7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
from typing import Optional, Dict, Any, List
from pydantic_settings import BaseSettings, SettingsConfigDict
from pydantic import Field
from pathlib import Path
import fnmatch
class PathPatterns(BaseSettings):
include: List[str] = Field(
default=["**/*"],
description="Glob patterns for paths to include"
)
exclude: List[str] = Field(
default=["**/venv/**", "**/.git/**", "**/__pycache__/**"],
description="Glob patterns for paths to exclude"
)
def should_process_path(self, path: str) -> bool:
"""Check if a path should be processed based on include/exclude patterns"""
path_str = str(path)
# First check exclusions
for pattern in self.exclude:
if fnmatch.fnmatch(path_str, pattern):
return False
# Then check inclusions
for pattern in self.include:
if fnmatch.fnmatch(path_str, pattern):
return True
return False
class LanguageConfig(BaseSettings):
enabled: bool = True
file_extensions: List[str]
tree_sitter_language: str
chunk_types: List[str]
max_file_size: int = Field(
default=1_000_000, # 1MB
description="Maximum file size to process in bytes"
)
class ParserConfig(BaseSettings):
languages: Dict[str, LanguageConfig] = Field(
default={
"python": LanguageConfig(
file_extensions=[".py"],
tree_sitter_language="python",
chunk_types=["class_definition", "function_definition"]
)
}
)
path_patterns: PathPatterns = Field(default_factory=PathPatterns)
class LLMConfig(BaseSettings):
model_name: str = Field(
default="llama3.2",
description="Name of the LLM model to use"
)
model_provider: str = Field(
default="ollama",
description="Model provider (anthropic, openai, ollama, etc)"
)
api_key: Optional[str] = Field(
default=None,
description="API key for the model provider"
)
model_settings: Dict[str, Any] = Field(
default_factory=dict,
description="Additional model settings"
)
embedding_model: str = Field(
default="mxbai-embed-large",
description="Name of the embedding model to use"
)
embedding_provider: str = Field(
default="ollama",
description="Provider for embeddings (ollama, openai, etc)"
)
class DBConfig(BaseSettings):
persist_directory: Path = Field(
default=Path("./chroma_db"),
description="Directory to store ChromaDB files"
)
collection_name: str = Field(
default="code_chunks",
description="Name of the ChromaDB collection"
)
embedding_model: str = Field(
default="sentence-transformers/all-mpnet-base-v2",
description="Embedding model to use"
)
codebase_directory: Path = Field(
default=Path("./"),
description="Root directory of the codebase to analyze"
)
class AppConfig(BaseSettings):
model_config = SettingsConfigDict(
env_file='.env',
env_file_encoding='utf-8',
env_nested_delimiter='__'
)
llm: LLMConfig = Field(default_factory=LLMConfig)
db: DBConfig = Field(default_factory=DBConfig)
parser: ParserConfig = Field(default_factory=ParserConfig) |