gabykim commited on
Commit
ad887b7
·
1 Parent(s): 3f04a5e

knowlang cli support

Browse files
pyproject.toml CHANGED
@@ -29,6 +29,8 @@ packages = [
29
  { include = "knowlang", from="src" },
30
  ]
31
 
 
 
32
 
33
  [tool.poetry.group.dev.dependencies]
34
  pytest = "^8.3.4"
 
29
  { include = "knowlang", from="src" },
30
  ]
31
 
32
+ [tool.poetry.scripts]
33
+ knowlang = "knowlang.cli:cli_main"
34
 
35
  [tool.poetry.group.dev.dependencies]
36
  pytest = "^8.3.4"
src/knowlang/__main__.py DELETED
@@ -1,136 +0,0 @@
1
- import argparse
2
- import asyncio
3
- import sys
4
- from pathlib import Path
5
- from typing import List, Optional
6
- from rich.console import Console
7
- from rich.table import Table
8
-
9
- from knowlang.core.types import CodeChunk
10
- from knowlang.configs.config import AppConfig
11
- from knowlang.parser.factory import CodeParserFactory
12
- from knowlang.parser.providers.git import GitProvider
13
- from knowlang.parser.providers.filesystem import FilesystemProvider
14
- from knowlang.summarizer.summarizer import CodeSummarizer
15
- from knowlang.utils.fancy_log import FancyLogger
16
-
17
- LOG = FancyLogger(__name__)
18
- console = Console()
19
-
20
- def parse_args():
21
- parser = argparse.ArgumentParser(
22
- description="Know Lang Bot - Code Analysis Tool",
23
- formatter_class=argparse.RawDescriptionHelpFormatter
24
- )
25
-
26
- parser.add_argument(
27
- "--config",
28
- type=str,
29
- help="Path to custom configuration file",
30
- default=None
31
- )
32
-
33
- parser.add_argument(
34
- "--verbose", "-v",
35
- action="store_true",
36
- help="Enable verbose output"
37
- )
38
-
39
- parser.add_argument(
40
- "--output",
41
- type=str,
42
- choices=["table", "json"],
43
- default="table",
44
- help="Output format (default: table)"
45
- )
46
-
47
- return parser.parse_args()
48
-
49
- def create_config(config_path: Optional[str] = None) -> AppConfig:
50
- """Create configuration, optionally from a file"""
51
- if config_path:
52
- with open(config_path, 'r') as file:
53
- config_data = file.read()
54
- return AppConfig.model_validate_json(config_data)
55
- return AppConfig()
56
-
57
- def display_results_table(chunks : List[CodeChunk]):
58
- """Display parsed chunks in a rich table"""
59
- table = Table(show_header=True, header_style="bold magenta")
60
- table.add_column("Type")
61
- table.add_column("Name")
62
- table.add_column("File")
63
- table.add_column("Lines")
64
- table.add_column("Parent")
65
-
66
- for chunk in chunks:
67
- table.add_row(
68
- chunk.type.value,
69
- chunk.name or "N/A",
70
- str(chunk.file_path),
71
- f"{chunk.start_line}-{chunk.end_line}",
72
- chunk.parent_name or "N/A"
73
- )
74
-
75
- console.print(table)
76
-
77
- def display_results_json(chunks: List[CodeChunk]):
78
- """Display parsed chunks as JSON"""
79
- import json
80
- print(json.dumps([chunk.model_dump() for chunk in chunks], indent=2))
81
-
82
- async def main():
83
- args = parse_args()
84
-
85
- # Setup logging
86
- if args.verbose:
87
- LOG.setLevel("DEBUG")
88
-
89
- try:
90
- # Load configuration
91
- config = create_config(args.config)
92
- # Create parser factory
93
- factory = CodeParserFactory(config)
94
-
95
- # Determine provider
96
- source_path = Path(config.db.codebase_directory)
97
- if (source_path/ '.git').exists():
98
- LOG.info(f"Detected Git repository at {source_path}")
99
- provider = GitProvider(source_path, config)
100
- else:
101
- LOG.info(f"Using filesystem provider for {source_path}")
102
- provider = FilesystemProvider(source_path, config)
103
-
104
- # Process files
105
- total_chunks = []
106
- with console.status("[bold green]Parsing files...") as status:
107
- for idx, file_path in enumerate(provider.get_files()):
108
- status.update(f"[bold yellow] Processed {idx+1} files, [bold green]processing {file_path}...\n")
109
-
110
- parser = factory.get_parser(file_path)
111
- if parser:
112
- chunks = parser.parse_file(file_path)
113
- total_chunks.extend(chunks)
114
-
115
- # Display results
116
- if total_chunks:
117
- LOG.info(f"\nFound {len(total_chunks)} code chunks")
118
- if args.output == "table":
119
- display_results_table(total_chunks)
120
- else:
121
- display_results_json(total_chunks)
122
- else:
123
- LOG.warning("No code chunks found")
124
-
125
- summarizer = CodeSummarizer(config)
126
- await summarizer.process_chunks(total_chunks)
127
-
128
- except Exception as e:
129
- LOG.error(f"Error: {str(e)}")
130
- if args.verbose:
131
- import traceback
132
- traceback.print_exc()
133
- sys.exit(1)
134
-
135
- if __name__ == "__main__":
136
- asyncio.run(main())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/knowlang/chat_bot/__main__.py DELETED
@@ -1,24 +0,0 @@
1
- from knowlang.configs.config import AppConfig
2
- from knowlang.chat_bot.chat_graph import process_chat
3
- import chromadb
4
- import asyncio
5
-
6
- async def test_chat_processing():
7
- config = AppConfig()
8
- db_client = chromadb.PersistentClient(
9
- path=str(config.db.persist_directory)
10
- )
11
- collection = db_client.get_collection(
12
- name=config.db.collection_name
13
- )
14
-
15
- result = await process_chat(
16
- "How are different quantization methods implemented in the transformers library, and what are the key components required to implement a new quantization method?",
17
- collection,
18
- config
19
- )
20
-
21
- print(f"Answer: {result.answer}")
22
-
23
- if __name__ == "__main__":
24
- asyncio.run(test_chat_processing())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/knowlang/cli/__init__.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """CLI entry points for KnowLang."""
2
+ import asyncio
3
+ from typing import Optional, Sequence
4
+
5
+ from knowlang.cli.parser import create_parser
6
+ from knowlang.utils.fancy_log import FancyLogger
7
+
8
+ LOG = FancyLogger(__name__)
9
+
10
+ async def main(args: Optional[Sequence[str]] = None) -> int:
11
+ """Main CLI entry point.
12
+
13
+ Args:
14
+ args: Command line arguments. If None, sys.argv[1:] is used.
15
+
16
+ Returns:
17
+ Exit code (0 for success, non-zero for error)
18
+ """
19
+ parser = create_parser()
20
+ parsed_args = parser.parse_args(args)
21
+
22
+ # Setup logging
23
+ if parsed_args.verbose:
24
+ LOG.setLevel("DEBUG")
25
+
26
+ try:
27
+ # Execute command
28
+ await parsed_args.func(parsed_args)
29
+ return 0
30
+ except Exception as e:
31
+ LOG.error(f"Error: {str(e)}")
32
+ if parsed_args.verbose:
33
+ import traceback
34
+ traceback.print_exc()
35
+ return 1
36
+
37
+ def cli_main() -> None:
38
+ """Entry point for CLI scripts."""
39
+ exit_code = asyncio.run(main())
40
+ exit(exit_code)
src/knowlang/cli/commands/parse.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Command implementation for parsing codebases."""
2
+ from pathlib import Path
3
+ from typing import Optional
4
+
5
+ from knowlang.configs.config import AppConfig
6
+ from knowlang.parser.factory import CodeParserFactory
7
+ from knowlang.parser.providers.git import GitProvider
8
+ from knowlang.parser.providers.filesystem import FilesystemProvider
9
+ from knowlang.summarizer.summarizer import CodeSummarizer
10
+ from knowlang.cli.display.formatters import get_formatter
11
+ from knowlang.cli.display.progress import ProgressTracker
12
+ from knowlang.utils.fancy_log import FancyLogger
13
+ from knowlang.cli.types import ParseCommandArgs
14
+
15
+ LOG = FancyLogger(__name__)
16
+
17
+ def create_config(config_path: Optional[Path] = None) -> AppConfig:
18
+ """Create configuration from file or defaults."""
19
+ if config_path:
20
+ with open(config_path, 'r') as file:
21
+ config_data = file.read()
22
+ return AppConfig.model_validate_json(config_data)
23
+ return AppConfig()
24
+
25
+ async def parse_command(args: ParseCommandArgs) -> None:
26
+ """Execute the parse command.
27
+
28
+ Args:
29
+ args: Typed command line arguments
30
+ """
31
+ # Load configuration
32
+ config = create_config(args.config)
33
+
34
+ # Update codebase directory in config
35
+ config.db.codebase_directory = str(args.path)
36
+
37
+ # Create parser factory
38
+ factory = CodeParserFactory(config)
39
+
40
+ # Determine provider
41
+ source_path = args.path
42
+ if (source_path / '.git').exists():
43
+ LOG.info(f"Detected Git repository at {source_path}")
44
+ provider = GitProvider(source_path, config)
45
+ else:
46
+ LOG.info(f"Using filesystem provider for {source_path}")
47
+ provider = FilesystemProvider(source_path, config)
48
+
49
+ # Process files
50
+ total_chunks = []
51
+ progress = ProgressTracker("Parsing files...")
52
+
53
+ with progress.progress():
54
+ for file_path in provider.get_files():
55
+ progress.update(f"processing {file_path}...")
56
+
57
+ parser = factory.get_parser(file_path)
58
+ if parser:
59
+ chunks = parser.parse_file(file_path)
60
+ total_chunks.extend(chunks)
61
+
62
+ # Display results
63
+ if total_chunks:
64
+ LOG.info(f"\nFound {len(total_chunks)} code chunks")
65
+ formatter = get_formatter(args.output)
66
+ formatter.display_chunks(total_chunks)
67
+ else:
68
+ LOG.warning("No code chunks found")
69
+
70
+ # Process summaries
71
+ summarizer = CodeSummarizer(config)
72
+ await summarizer.process_chunks(total_chunks)
src/knowlang/cli/display/formatters.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Output formatters for CLI results."""
2
+ from typing import List, Protocol
3
+ import json
4
+ from rich.console import Console
5
+ from rich.table import Table
6
+
7
+ from knowlang.core.types import CodeChunk
8
+
9
+ console = Console()
10
+
11
+ class OutputFormatter(Protocol):
12
+ """Protocol for output formatters."""
13
+
14
+ def display_chunks(self, chunks: List[CodeChunk]) -> None:
15
+ """Display code chunks in the appropriate format."""
16
+ ...
17
+
18
+ class TableFormatter:
19
+ """Format output as a rich table."""
20
+
21
+ def display_chunks(self, chunks: List[CodeChunk]) -> None:
22
+ table = Table(show_header=True, header_style="bold magenta")
23
+ table.add_column("Type")
24
+ table.add_column("Name")
25
+ table.add_column("File")
26
+ table.add_column("Lines")
27
+ table.add_column("Parent")
28
+
29
+ for chunk in chunks:
30
+ table.add_row(
31
+ chunk.type.value,
32
+ chunk.name or "N/A",
33
+ str(chunk.file_path),
34
+ f"{chunk.start_line}-{chunk.end_line}",
35
+ chunk.parent_name or "N/A"
36
+ )
37
+
38
+ console.print(table)
39
+
40
+ class JsonFormatter:
41
+ """Format output as JSON."""
42
+
43
+ def display_chunks(self, chunks: List[CodeChunk]) -> None:
44
+ print(json.dumps([chunk.model_dump() for chunk in chunks], indent=2))
45
+
46
+ def get_formatter(format_type: str) -> OutputFormatter:
47
+ """Get the appropriate formatter for the specified format.
48
+
49
+ Args:
50
+ format_type: The type of formatter to use ("table" or "json")
51
+
52
+ Returns:
53
+ An OutputFormatter instance
54
+
55
+ Raises:
56
+ ValueError: If format_type is not recognized
57
+ """
58
+ formatters = {
59
+ "table": TableFormatter,
60
+ "json": JsonFormatter
61
+ }
62
+
63
+ if format_type not in formatters:
64
+ raise ValueError(f"Unknown format type: {format_type}")
65
+
66
+ return formatters[format_type]()
src/knowlang/cli/display/progress.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Progress tracking for CLI operations."""
2
+ from contextlib import contextmanager
3
+ from typing import Iterator
4
+ from rich.console import Console
5
+
6
+ console = Console()
7
+
8
+ class ProgressTracker:
9
+ """Track progress of file processing."""
10
+
11
+ def __init__(self, description: str = "Processing..."):
12
+ self.description = description
13
+ self.status = None
14
+ self.processed_files = 0
15
+
16
+ @contextmanager
17
+ def progress(self) -> Iterator[None]:
18
+ """Context manager for tracking progress."""
19
+ with console.status(f"[bold green]{self.description}") as status:
20
+ self.status = status
21
+ yield
22
+ self.status = None
23
+
24
+ def update(self, message: str) -> None:
25
+ """Update progress status.
26
+
27
+ Args:
28
+ message: Current status message
29
+ """
30
+ if self.status:
31
+ self.processed_files += 1
32
+ self.status.update(
33
+ f"[bold yellow]Processed {self.processed_files} files, "
34
+ f"[bold green]{message}"
35
+ )
src/knowlang/cli/parser.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Argument parsing for KnowLang CLI."""
2
+ import argparse
3
+ from pathlib import Path
4
+ from typing import Union
5
+
6
+ from knowlang.cli.types import ParseCommandArgs, BaseCommandArgs
7
+ from knowlang.cli.commands.parse import parse_command
8
+
9
+ def _convert_to_args(parsed_args: argparse.Namespace) -> Union[ParseCommandArgs, BaseCommandArgs]:
10
+ """Convert parsed namespace to typed arguments."""
11
+ base_args = {
12
+ "verbose": parsed_args.verbose,
13
+ "config": parsed_args.config if hasattr(parsed_args, "config") else None
14
+ }
15
+
16
+ if parsed_args.command == "parse":
17
+ return ParseCommandArgs(
18
+ **base_args,
19
+ path=parsed_args.path,
20
+ output=parsed_args.output,
21
+ command="parse"
22
+ )
23
+
24
+ return BaseCommandArgs(**base_args)
25
+
26
+ def create_parser() -> argparse.ArgumentParser:
27
+ """Create the main argument parser."""
28
+ parser = argparse.ArgumentParser(
29
+ description="KnowLang - Code Understanding Assistant",
30
+ formatter_class=argparse.RawDescriptionHelpFormatter
31
+ )
32
+
33
+ # Global options
34
+ parser.add_argument(
35
+ "--verbose", "-v",
36
+ action="store_true",
37
+ help="Enable verbose output"
38
+ )
39
+
40
+ parser.add_argument(
41
+ "--config",
42
+ type=Path,
43
+ help="Path to custom configuration file",
44
+ default=None
45
+ )
46
+
47
+ # Subcommands
48
+ subparsers = parser.add_subparsers(
49
+ title="commands",
50
+ description="Available commands",
51
+ dest="command"
52
+ )
53
+ subparsers.required = True
54
+
55
+ # Parse command
56
+ parse_parser = subparsers.add_parser(
57
+ "parse",
58
+ help="Parse and index a codebase"
59
+ )
60
+ parse_parser.add_argument(
61
+ "--output",
62
+ type=str,
63
+ choices=["table", "json"],
64
+ default="table",
65
+ help="Output format (default: table)"
66
+ )
67
+ parse_parser.add_argument(
68
+ "path",
69
+ type=Path,
70
+ help="Path to codebase directory or repository URL"
71
+ )
72
+ parse_parser.set_defaults(func=parse_command)
73
+
74
+ return parser
75
+
76
+ def parse_args() -> Union[ParseCommandArgs, BaseCommandArgs]:
77
+ """Parse command line arguments into typed objects."""
78
+ parser = create_parser()
79
+ return _convert_to_args(parser.parse_args())
src/knowlang/cli/types.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Type definitions for CLI arguments."""
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+ from typing import Literal, Optional
5
+
6
+ @dataclass
7
+ class BaseCommandArgs:
8
+ """Base arguments for all commands."""
9
+ verbose: bool
10
+ config: Optional[Path]
11
+
12
+ @dataclass
13
+ class ParseCommandArgs(BaseCommandArgs):
14
+ """Arguments for the parse command."""
15
+ path: Path
16
+ output: Literal["table", "json"]
17
+ command: Literal["parse"] # for command identification