Spaces:
Sleeping
Sleeping
python parser testing refactoring
Browse files
src/know_lang_bot/parser/languages/python/parser.py
CHANGED
@@ -109,12 +109,12 @@ class PythonParser(LanguageParser):
|
|
109 |
LOG.debug(f"Skipping file {file_path}: unsupported extension")
|
110 |
return []
|
111 |
|
112 |
-
# Check file size limit
|
113 |
-
if file_path.stat().st_size > self.language_config.max_file_size:
|
114 |
-
LOG.warning(f"Skipping file {file_path}: exceeds size limit of {self.language_config.max_file_size} bytes")
|
115 |
-
return []
|
116 |
-
|
117 |
try:
|
|
|
|
|
|
|
|
|
|
|
118 |
with open(file_path, 'rb') as f:
|
119 |
source_code = f.read()
|
120 |
|
|
|
109 |
LOG.debug(f"Skipping file {file_path}: unsupported extension")
|
110 |
return []
|
111 |
|
|
|
|
|
|
|
|
|
|
|
112 |
try:
|
113 |
+
# Check file size limit
|
114 |
+
if file_path.stat().st_size > self.language_config.max_file_size:
|
115 |
+
LOG.warning(f"Skipping file {file_path}: exceeds size limit of {self.language_config.max_file_size} bytes")
|
116 |
+
return []
|
117 |
+
|
118 |
with open(file_path, 'rb') as f:
|
119 |
source_code = f.read()
|
120 |
|
tests/conftest.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pytest
|
2 |
+
import tempfile
|
3 |
+
import git
|
4 |
+
from pathlib import Path
|
5 |
+
from typing import Dict
|
6 |
+
from know_lang_bot.config import AppConfig, ParserConfig, LanguageConfig
|
7 |
+
from know_lang_bot.parser.languages.python.parser import PythonParser
|
8 |
+
from tests.test_data.python_files import TEST_FILES
|
9 |
+
|
10 |
+
@pytest.fixture
|
11 |
+
def test_config() -> AppConfig:
|
12 |
+
"""Provides test configuration"""
|
13 |
+
return AppConfig(
|
14 |
+
parser=ParserConfig(
|
15 |
+
languages={
|
16 |
+
"python": LanguageConfig(
|
17 |
+
file_extensions=[".py"],
|
18 |
+
tree_sitter_language="python",
|
19 |
+
max_file_size=1_000_000,
|
20 |
+
chunk_types=["class_definition", "function_definition"]
|
21 |
+
)
|
22 |
+
}
|
23 |
+
)
|
24 |
+
)
|
25 |
+
|
26 |
+
@pytest.fixture
|
27 |
+
def temp_repo():
|
28 |
+
"""Create a temporary git repository with sample Python files"""
|
29 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
30 |
+
repo = git.Repo.init(temp_dir)
|
31 |
+
|
32 |
+
for filename, content in TEST_FILES.items():
|
33 |
+
file_path = Path(temp_dir) / filename
|
34 |
+
file_path.write_text(content)
|
35 |
+
repo.index.add([str(file_path)])
|
36 |
+
|
37 |
+
repo.index.commit("Initial commit")
|
38 |
+
|
39 |
+
yield temp_dir
|
40 |
+
|
41 |
+
@pytest.fixture
|
42 |
+
def python_parser(test_config):
|
43 |
+
"""Provides initialized Python parser"""
|
44 |
+
parser = PythonParser(test_config)
|
45 |
+
parser.setup()
|
46 |
+
return parser
|
tests/parser/test_python_parser.py
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pytest
|
2 |
+
from pathlib import Path
|
3 |
+
from know_lang_bot.core.types import ChunkType
|
4 |
+
from tests.test_data.python_files import (
|
5 |
+
TEST_FILES,
|
6 |
+
INVALID_SYNTAX,
|
7 |
+
SIMPLE_FILE_EXPECTATIONS,
|
8 |
+
NESTED_CLASS_EXPECTATIONS,
|
9 |
+
COMPLEX_FILE_EXPECTATIONS
|
10 |
+
)
|
11 |
+
from know_lang_bot.core.types import CodeChunk
|
12 |
+
from know_lang_bot.parser.languages.python.parser import PythonParser
|
13 |
+
from typing import List
|
14 |
+
import tempfile
|
15 |
+
|
16 |
+
def find_chunk_by_criteria(chunks: List[CodeChunk], **criteria) -> CodeChunk:
|
17 |
+
"""Helper function to find a chunk matching given criteria"""
|
18 |
+
for chunk in chunks:
|
19 |
+
if all(getattr(chunk, k) == v for k, v in criteria.items()):
|
20 |
+
return chunk
|
21 |
+
return None
|
22 |
+
|
23 |
+
def verify_chunk_matches_expectation(
|
24 |
+
chunk: CodeChunk,
|
25 |
+
expected_name: str,
|
26 |
+
expected_docstring: str,
|
27 |
+
expected_content_snippet: str
|
28 |
+
) -> bool:
|
29 |
+
"""Verify that a chunk matches expected values"""
|
30 |
+
return (
|
31 |
+
chunk.name == expected_name and
|
32 |
+
expected_content_snippet in chunk.content and
|
33 |
+
chunk.docstring is not None and
|
34 |
+
expected_docstring in chunk.docstring
|
35 |
+
)
|
36 |
+
|
37 |
+
|
38 |
+
class TestPythonParser:
|
39 |
+
"""Test suite for PythonParser"""
|
40 |
+
|
41 |
+
def test_parser_initialization(self, python_parser: PythonParser):
|
42 |
+
"""Test parser initialization"""
|
43 |
+
assert python_parser.parser is not None
|
44 |
+
assert python_parser.language is not None
|
45 |
+
|
46 |
+
def test_simple_file_parsing(self, python_parser: PythonParser, temp_repo: tempfile.TemporaryDirectory):
|
47 |
+
"""Test parsing a simple Python file with function and class"""
|
48 |
+
chunks = python_parser.parse_file(Path(temp_repo) / "simple.py")
|
49 |
+
|
50 |
+
# Test function
|
51 |
+
function_chunk = find_chunk_by_criteria(
|
52 |
+
chunks,
|
53 |
+
type=ChunkType.FUNCTION,
|
54 |
+
name="hello_world"
|
55 |
+
)
|
56 |
+
assert function_chunk is not None
|
57 |
+
expected = SIMPLE_FILE_EXPECTATIONS['hello_world']
|
58 |
+
assert verify_chunk_matches_expectation(
|
59 |
+
function_chunk,
|
60 |
+
expected.name,
|
61 |
+
expected.docstring,
|
62 |
+
expected.content_snippet
|
63 |
+
)
|
64 |
+
|
65 |
+
# Test class
|
66 |
+
class_chunk = find_chunk_by_criteria(
|
67 |
+
chunks,
|
68 |
+
type=ChunkType.CLASS,
|
69 |
+
name="SimpleClass"
|
70 |
+
)
|
71 |
+
assert class_chunk is not None
|
72 |
+
expected = SIMPLE_FILE_EXPECTATIONS['SimpleClass']
|
73 |
+
assert verify_chunk_matches_expectation(
|
74 |
+
class_chunk,
|
75 |
+
expected.name,
|
76 |
+
expected.docstring,
|
77 |
+
expected.content_snippet
|
78 |
+
)
|
79 |
+
|
80 |
+
def test_complex_file_parsing(self, python_parser: PythonParser, temp_repo: tempfile.TemporaryDirectory):
|
81 |
+
"""Test parsing a complex Python file"""
|
82 |
+
chunks = python_parser.parse_file(Path(temp_repo) / "complex.py")
|
83 |
+
|
84 |
+
# Test complex function
|
85 |
+
complex_func = find_chunk_by_criteria(
|
86 |
+
chunks,
|
87 |
+
type=ChunkType.FUNCTION,
|
88 |
+
name="complex_function"
|
89 |
+
)
|
90 |
+
assert complex_func is not None
|
91 |
+
expected = COMPLEX_FILE_EXPECTATIONS['complex_function']
|
92 |
+
assert verify_chunk_matches_expectation(
|
93 |
+
complex_func,
|
94 |
+
expected.name,
|
95 |
+
expected.docstring,
|
96 |
+
expected.content_snippet
|
97 |
+
)
|
98 |
+
|
99 |
+
# Test complex class
|
100 |
+
complex_class = find_chunk_by_criteria(
|
101 |
+
chunks,
|
102 |
+
type=ChunkType.CLASS,
|
103 |
+
name="ComplexClass"
|
104 |
+
)
|
105 |
+
assert complex_class is not None
|
106 |
+
expected = COMPLEX_FILE_EXPECTATIONS['ComplexClass']
|
107 |
+
assert verify_chunk_matches_expectation(
|
108 |
+
complex_class,
|
109 |
+
expected.name,
|
110 |
+
expected.docstring,
|
111 |
+
expected.content_snippet
|
112 |
+
)
|
113 |
+
|
114 |
+
def test_error_handling(self, python_parser: PythonParser, temp_repo: tempfile.TemporaryDirectory):
|
115 |
+
"""Test error handling for various error cases"""
|
116 |
+
# Test invalid syntax
|
117 |
+
invalid_file = Path(temp_repo) / "invalid.py"
|
118 |
+
invalid_file.write_text(INVALID_SYNTAX)
|
119 |
+
chunks = python_parser.parse_file(invalid_file)
|
120 |
+
assert chunks == []
|
121 |
+
|
122 |
+
# Test non-existent file
|
123 |
+
nonexistent = Path(temp_repo) / "nonexistent.py"
|
124 |
+
chunks = python_parser.parse_file(nonexistent)
|
125 |
+
assert chunks == []
|
126 |
+
|
127 |
+
# Test non-Python file
|
128 |
+
non_python = Path(temp_repo) / "readme.md"
|
129 |
+
non_python.write_text("# README")
|
130 |
+
chunks = python_parser.parse_file(non_python)
|
131 |
+
assert chunks == []
|
132 |
+
|
133 |
+
def test_file_size_limits(self, python_parser: PythonParser, temp_repo: tempfile.TemporaryDirectory):
|
134 |
+
"""Test file size limit enforcement"""
|
135 |
+
large_file = Path(temp_repo) / "large.py"
|
136 |
+
# Create a file larger than the limit
|
137 |
+
large_file.write_text("x = 1\n" * 1_000_000)
|
138 |
+
|
139 |
+
chunks = python_parser.parse_file(large_file)
|
140 |
+
assert chunks == []
|
141 |
+
|
142 |
+
@pytest.mark.parametrize("test_file", TEST_FILES.keys())
|
143 |
+
def test_supported_extensions(self, python_parser: PythonParser, test_file: str):
|
144 |
+
"""Test file extension support"""
|
145 |
+
assert any(test_file.endswith(ext) for ext in python_parser.language_config.file_extensions)
|
tests/{test_constants.py → test_data/python_files.py}
RENAMED
File without changes
|
tests/test_parser.py
DELETED
@@ -1,152 +0,0 @@
|
|
1 |
-
from know_lang_bot.code_parser.parser import CodeChunk, CodeParser, ChunkType
|
2 |
-
from pathlib import Path
|
3 |
-
from tests.test_constants import (
|
4 |
-
SIMPLE_FILE_EXPECTATIONS,
|
5 |
-
NESTED_CLASS_EXPECTATIONS,
|
6 |
-
COMPLEX_FILE_EXPECTATIONS,
|
7 |
-
INVALID_SYNTAX,
|
8 |
-
TEST_FILES,
|
9 |
-
)
|
10 |
-
import pytest
|
11 |
-
import tempfile
|
12 |
-
import git
|
13 |
-
|
14 |
-
|
15 |
-
@pytest.fixture
|
16 |
-
def temp_repo():
|
17 |
-
"""Create a temporary git repository with sample Python files"""
|
18 |
-
with tempfile.TemporaryDirectory() as temp_dir:
|
19 |
-
# Initialize git repo
|
20 |
-
repo = git.Repo.init(temp_dir)
|
21 |
-
|
22 |
-
# Create sample Python files
|
23 |
-
for filename, content in TEST_FILES.items():
|
24 |
-
file_path = Path(temp_dir) / filename
|
25 |
-
file_path.write_text(content)
|
26 |
-
repo.index.add([str(file_path)])
|
27 |
-
|
28 |
-
repo.index.commit("Initial commit")
|
29 |
-
|
30 |
-
yield temp_dir
|
31 |
-
|
32 |
-
def find_chunk_by_criteria(chunks: list[CodeChunk], **criteria) -> CodeChunk:
|
33 |
-
"""Helper function to find a chunk matching given criteria"""
|
34 |
-
for chunk in chunks:
|
35 |
-
if all(getattr(chunk, k) == v for k, v in criteria.items()):
|
36 |
-
return chunk
|
37 |
-
return None
|
38 |
-
|
39 |
-
def test_init_parser(temp_repo):
|
40 |
-
"""Test parser initialization"""
|
41 |
-
parser = CodeParser(temp_repo)
|
42 |
-
assert parser.repo_path == Path(temp_repo)
|
43 |
-
assert parser.language is not None
|
44 |
-
assert parser.parser is not None
|
45 |
-
|
46 |
-
def test_parse_simple_file(temp_repo):
|
47 |
-
"""Test parsing a simple Python file with function and class"""
|
48 |
-
parser = CodeParser(temp_repo)
|
49 |
-
chunks = parser.parse_file(Path(temp_repo) / "simple.py")
|
50 |
-
|
51 |
-
# Test function
|
52 |
-
function_chunk = find_chunk_by_criteria(chunks, type=ChunkType.FUNCTION, name="hello_world")
|
53 |
-
assert function_chunk is not None
|
54 |
-
expected = SIMPLE_FILE_EXPECTATIONS['hello_world']
|
55 |
-
assert expected.content_snippet in function_chunk.content
|
56 |
-
assert function_chunk.docstring is not None
|
57 |
-
assert function_chunk.docstring in expected.docstring
|
58 |
-
|
59 |
-
# Test class
|
60 |
-
class_chunk = find_chunk_by_criteria(chunks, type=ChunkType.CLASS, name="SimpleClass")
|
61 |
-
assert class_chunk is not None
|
62 |
-
expected = SIMPLE_FILE_EXPECTATIONS['SimpleClass']
|
63 |
-
assert expected.content_snippet in class_chunk.content
|
64 |
-
assert class_chunk.docstring is not None
|
65 |
-
assert class_chunk.docstring in expected.docstring
|
66 |
-
|
67 |
-
|
68 |
-
def test_parse_nested_classes(temp_repo):
|
69 |
-
"""Test parsing nested class definitions"""
|
70 |
-
parser = CodeParser(temp_repo)
|
71 |
-
chunks = parser.parse_file(Path(temp_repo) / "nested.py")
|
72 |
-
|
73 |
-
# Test outer class
|
74 |
-
outer_class = find_chunk_by_criteria(chunks, type=ChunkType.CLASS, name="OuterClass")
|
75 |
-
assert outer_class is not None
|
76 |
-
expected = NESTED_CLASS_EXPECTATIONS['OuterClass']
|
77 |
-
assert expected.content_snippet in outer_class.content
|
78 |
-
assert outer_class.docstring is not None
|
79 |
-
assert outer_class.docstring in expected.docstring
|
80 |
-
|
81 |
-
# Verify inner class: Not implemented yet
|
82 |
-
pass
|
83 |
-
|
84 |
-
def test_parse_complex_file(temp_repo):
|
85 |
-
"""Test parsing a complex Python file"""
|
86 |
-
parser = CodeParser(temp_repo)
|
87 |
-
chunks = parser.parse_file(Path(temp_repo) / "complex.py")
|
88 |
-
|
89 |
-
# Test function with type hints
|
90 |
-
complex_func = find_chunk_by_criteria(
|
91 |
-
chunks,
|
92 |
-
type=ChunkType.FUNCTION,
|
93 |
-
name="complex_function"
|
94 |
-
)
|
95 |
-
assert complex_func is not None
|
96 |
-
expected = COMPLEX_FILE_EXPECTATIONS['complex_function']
|
97 |
-
assert expected.content_snippet in complex_func.content
|
98 |
-
assert complex_func.docstring is not None
|
99 |
-
assert complex_func.docstring in expected.docstring
|
100 |
-
|
101 |
-
# Test complex class
|
102 |
-
complex_class = find_chunk_by_criteria(
|
103 |
-
chunks,
|
104 |
-
type=ChunkType.CLASS,
|
105 |
-
name="ComplexClass"
|
106 |
-
)
|
107 |
-
assert complex_class is not None
|
108 |
-
expected = COMPLEX_FILE_EXPECTATIONS['ComplexClass']
|
109 |
-
assert expected.content_snippet in complex_class.content
|
110 |
-
assert complex_class.docstring is not None
|
111 |
-
assert complex_class.docstring in expected.docstring
|
112 |
-
|
113 |
-
|
114 |
-
def test_parse_repository(temp_repo):
|
115 |
-
"""Test parsing entire repository"""
|
116 |
-
parser = CodeParser(temp_repo)
|
117 |
-
chunks = parser.parse_repository()
|
118 |
-
|
119 |
-
file_paths = {chunk.file_path for chunk in chunks}
|
120 |
-
assert len(file_paths) == 3
|
121 |
-
|
122 |
-
# Verify we can find chunks from each test file
|
123 |
-
for filename in TEST_FILES.keys():
|
124 |
-
file_chunks = [c for c in chunks if Path(c.file_path).name == filename]
|
125 |
-
assert len(file_chunks) > 0
|
126 |
-
|
127 |
-
def test_error_handling(temp_repo):
|
128 |
-
"""Test error handling for invalid files"""
|
129 |
-
parser = CodeParser(temp_repo)
|
130 |
-
|
131 |
-
# Test invalid syntax
|
132 |
-
invalid_file = Path(temp_repo) / "invalid.py"
|
133 |
-
invalid_file.write_text(INVALID_SYNTAX)
|
134 |
-
chunks = parser.parse_file(invalid_file)
|
135 |
-
assert chunks == []
|
136 |
-
|
137 |
-
# Test non-existent file
|
138 |
-
nonexistent = Path(temp_repo) / "nonexistent.py"
|
139 |
-
chunks = parser.parse_file(nonexistent)
|
140 |
-
assert chunks == []
|
141 |
-
|
142 |
-
def test_non_python_files(temp_repo):
|
143 |
-
"""Test handling of non-Python files"""
|
144 |
-
parser = CodeParser(temp_repo)
|
145 |
-
|
146 |
-
# Create a non-Python file
|
147 |
-
non_python = Path(temp_repo) / "readme.md"
|
148 |
-
non_python.write_text("# README")
|
149 |
-
|
150 |
-
# Should skip non-Python files
|
151 |
-
chunks = parser.parse_file(non_python)
|
152 |
-
assert chunks == []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|