gabykim commited on
Commit
2182a08
·
1 Parent(s): 604ef86

python parser testing refactoring

Browse files
src/know_lang_bot/parser/languages/python/parser.py CHANGED
@@ -109,12 +109,12 @@ class PythonParser(LanguageParser):
109
  LOG.debug(f"Skipping file {file_path}: unsupported extension")
110
  return []
111
 
112
- # Check file size limit
113
- if file_path.stat().st_size > self.language_config.max_file_size:
114
- LOG.warning(f"Skipping file {file_path}: exceeds size limit of {self.language_config.max_file_size} bytes")
115
- return []
116
-
117
  try:
 
 
 
 
 
118
  with open(file_path, 'rb') as f:
119
  source_code = f.read()
120
 
 
109
  LOG.debug(f"Skipping file {file_path}: unsupported extension")
110
  return []
111
 
 
 
 
 
 
112
  try:
113
+ # Check file size limit
114
+ if file_path.stat().st_size > self.language_config.max_file_size:
115
+ LOG.warning(f"Skipping file {file_path}: exceeds size limit of {self.language_config.max_file_size} bytes")
116
+ return []
117
+
118
  with open(file_path, 'rb') as f:
119
  source_code = f.read()
120
 
tests/conftest.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ import tempfile
3
+ import git
4
+ from pathlib import Path
5
+ from typing import Dict
6
+ from know_lang_bot.config import AppConfig, ParserConfig, LanguageConfig
7
+ from know_lang_bot.parser.languages.python.parser import PythonParser
8
+ from tests.test_data.python_files import TEST_FILES
9
+
10
+ @pytest.fixture
11
+ def test_config() -> AppConfig:
12
+ """Provides test configuration"""
13
+ return AppConfig(
14
+ parser=ParserConfig(
15
+ languages={
16
+ "python": LanguageConfig(
17
+ file_extensions=[".py"],
18
+ tree_sitter_language="python",
19
+ max_file_size=1_000_000,
20
+ chunk_types=["class_definition", "function_definition"]
21
+ )
22
+ }
23
+ )
24
+ )
25
+
26
+ @pytest.fixture
27
+ def temp_repo():
28
+ """Create a temporary git repository with sample Python files"""
29
+ with tempfile.TemporaryDirectory() as temp_dir:
30
+ repo = git.Repo.init(temp_dir)
31
+
32
+ for filename, content in TEST_FILES.items():
33
+ file_path = Path(temp_dir) / filename
34
+ file_path.write_text(content)
35
+ repo.index.add([str(file_path)])
36
+
37
+ repo.index.commit("Initial commit")
38
+
39
+ yield temp_dir
40
+
41
+ @pytest.fixture
42
+ def python_parser(test_config):
43
+ """Provides initialized Python parser"""
44
+ parser = PythonParser(test_config)
45
+ parser.setup()
46
+ return parser
tests/parser/test_python_parser.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+ from pathlib import Path
3
+ from know_lang_bot.core.types import ChunkType
4
+ from tests.test_data.python_files import (
5
+ TEST_FILES,
6
+ INVALID_SYNTAX,
7
+ SIMPLE_FILE_EXPECTATIONS,
8
+ NESTED_CLASS_EXPECTATIONS,
9
+ COMPLEX_FILE_EXPECTATIONS
10
+ )
11
+ from know_lang_bot.core.types import CodeChunk
12
+ from know_lang_bot.parser.languages.python.parser import PythonParser
13
+ from typing import List
14
+ import tempfile
15
+
16
+ def find_chunk_by_criteria(chunks: List[CodeChunk], **criteria) -> CodeChunk:
17
+ """Helper function to find a chunk matching given criteria"""
18
+ for chunk in chunks:
19
+ if all(getattr(chunk, k) == v for k, v in criteria.items()):
20
+ return chunk
21
+ return None
22
+
23
+ def verify_chunk_matches_expectation(
24
+ chunk: CodeChunk,
25
+ expected_name: str,
26
+ expected_docstring: str,
27
+ expected_content_snippet: str
28
+ ) -> bool:
29
+ """Verify that a chunk matches expected values"""
30
+ return (
31
+ chunk.name == expected_name and
32
+ expected_content_snippet in chunk.content and
33
+ chunk.docstring is not None and
34
+ expected_docstring in chunk.docstring
35
+ )
36
+
37
+
38
+ class TestPythonParser:
39
+ """Test suite for PythonParser"""
40
+
41
+ def test_parser_initialization(self, python_parser: PythonParser):
42
+ """Test parser initialization"""
43
+ assert python_parser.parser is not None
44
+ assert python_parser.language is not None
45
+
46
+ def test_simple_file_parsing(self, python_parser: PythonParser, temp_repo: tempfile.TemporaryDirectory):
47
+ """Test parsing a simple Python file with function and class"""
48
+ chunks = python_parser.parse_file(Path(temp_repo) / "simple.py")
49
+
50
+ # Test function
51
+ function_chunk = find_chunk_by_criteria(
52
+ chunks,
53
+ type=ChunkType.FUNCTION,
54
+ name="hello_world"
55
+ )
56
+ assert function_chunk is not None
57
+ expected = SIMPLE_FILE_EXPECTATIONS['hello_world']
58
+ assert verify_chunk_matches_expectation(
59
+ function_chunk,
60
+ expected.name,
61
+ expected.docstring,
62
+ expected.content_snippet
63
+ )
64
+
65
+ # Test class
66
+ class_chunk = find_chunk_by_criteria(
67
+ chunks,
68
+ type=ChunkType.CLASS,
69
+ name="SimpleClass"
70
+ )
71
+ assert class_chunk is not None
72
+ expected = SIMPLE_FILE_EXPECTATIONS['SimpleClass']
73
+ assert verify_chunk_matches_expectation(
74
+ class_chunk,
75
+ expected.name,
76
+ expected.docstring,
77
+ expected.content_snippet
78
+ )
79
+
80
+ def test_complex_file_parsing(self, python_parser: PythonParser, temp_repo: tempfile.TemporaryDirectory):
81
+ """Test parsing a complex Python file"""
82
+ chunks = python_parser.parse_file(Path(temp_repo) / "complex.py")
83
+
84
+ # Test complex function
85
+ complex_func = find_chunk_by_criteria(
86
+ chunks,
87
+ type=ChunkType.FUNCTION,
88
+ name="complex_function"
89
+ )
90
+ assert complex_func is not None
91
+ expected = COMPLEX_FILE_EXPECTATIONS['complex_function']
92
+ assert verify_chunk_matches_expectation(
93
+ complex_func,
94
+ expected.name,
95
+ expected.docstring,
96
+ expected.content_snippet
97
+ )
98
+
99
+ # Test complex class
100
+ complex_class = find_chunk_by_criteria(
101
+ chunks,
102
+ type=ChunkType.CLASS,
103
+ name="ComplexClass"
104
+ )
105
+ assert complex_class is not None
106
+ expected = COMPLEX_FILE_EXPECTATIONS['ComplexClass']
107
+ assert verify_chunk_matches_expectation(
108
+ complex_class,
109
+ expected.name,
110
+ expected.docstring,
111
+ expected.content_snippet
112
+ )
113
+
114
+ def test_error_handling(self, python_parser: PythonParser, temp_repo: tempfile.TemporaryDirectory):
115
+ """Test error handling for various error cases"""
116
+ # Test invalid syntax
117
+ invalid_file = Path(temp_repo) / "invalid.py"
118
+ invalid_file.write_text(INVALID_SYNTAX)
119
+ chunks = python_parser.parse_file(invalid_file)
120
+ assert chunks == []
121
+
122
+ # Test non-existent file
123
+ nonexistent = Path(temp_repo) / "nonexistent.py"
124
+ chunks = python_parser.parse_file(nonexistent)
125
+ assert chunks == []
126
+
127
+ # Test non-Python file
128
+ non_python = Path(temp_repo) / "readme.md"
129
+ non_python.write_text("# README")
130
+ chunks = python_parser.parse_file(non_python)
131
+ assert chunks == []
132
+
133
+ def test_file_size_limits(self, python_parser: PythonParser, temp_repo: tempfile.TemporaryDirectory):
134
+ """Test file size limit enforcement"""
135
+ large_file = Path(temp_repo) / "large.py"
136
+ # Create a file larger than the limit
137
+ large_file.write_text("x = 1\n" * 1_000_000)
138
+
139
+ chunks = python_parser.parse_file(large_file)
140
+ assert chunks == []
141
+
142
+ @pytest.mark.parametrize("test_file", TEST_FILES.keys())
143
+ def test_supported_extensions(self, python_parser: PythonParser, test_file: str):
144
+ """Test file extension support"""
145
+ assert any(test_file.endswith(ext) for ext in python_parser.language_config.file_extensions)
tests/{test_constants.py → test_data/python_files.py} RENAMED
File without changes
tests/test_parser.py DELETED
@@ -1,152 +0,0 @@
1
- from know_lang_bot.code_parser.parser import CodeChunk, CodeParser, ChunkType
2
- from pathlib import Path
3
- from tests.test_constants import (
4
- SIMPLE_FILE_EXPECTATIONS,
5
- NESTED_CLASS_EXPECTATIONS,
6
- COMPLEX_FILE_EXPECTATIONS,
7
- INVALID_SYNTAX,
8
- TEST_FILES,
9
- )
10
- import pytest
11
- import tempfile
12
- import git
13
-
14
-
15
- @pytest.fixture
16
- def temp_repo():
17
- """Create a temporary git repository with sample Python files"""
18
- with tempfile.TemporaryDirectory() as temp_dir:
19
- # Initialize git repo
20
- repo = git.Repo.init(temp_dir)
21
-
22
- # Create sample Python files
23
- for filename, content in TEST_FILES.items():
24
- file_path = Path(temp_dir) / filename
25
- file_path.write_text(content)
26
- repo.index.add([str(file_path)])
27
-
28
- repo.index.commit("Initial commit")
29
-
30
- yield temp_dir
31
-
32
- def find_chunk_by_criteria(chunks: list[CodeChunk], **criteria) -> CodeChunk:
33
- """Helper function to find a chunk matching given criteria"""
34
- for chunk in chunks:
35
- if all(getattr(chunk, k) == v for k, v in criteria.items()):
36
- return chunk
37
- return None
38
-
39
- def test_init_parser(temp_repo):
40
- """Test parser initialization"""
41
- parser = CodeParser(temp_repo)
42
- assert parser.repo_path == Path(temp_repo)
43
- assert parser.language is not None
44
- assert parser.parser is not None
45
-
46
- def test_parse_simple_file(temp_repo):
47
- """Test parsing a simple Python file with function and class"""
48
- parser = CodeParser(temp_repo)
49
- chunks = parser.parse_file(Path(temp_repo) / "simple.py")
50
-
51
- # Test function
52
- function_chunk = find_chunk_by_criteria(chunks, type=ChunkType.FUNCTION, name="hello_world")
53
- assert function_chunk is not None
54
- expected = SIMPLE_FILE_EXPECTATIONS['hello_world']
55
- assert expected.content_snippet in function_chunk.content
56
- assert function_chunk.docstring is not None
57
- assert function_chunk.docstring in expected.docstring
58
-
59
- # Test class
60
- class_chunk = find_chunk_by_criteria(chunks, type=ChunkType.CLASS, name="SimpleClass")
61
- assert class_chunk is not None
62
- expected = SIMPLE_FILE_EXPECTATIONS['SimpleClass']
63
- assert expected.content_snippet in class_chunk.content
64
- assert class_chunk.docstring is not None
65
- assert class_chunk.docstring in expected.docstring
66
-
67
-
68
- def test_parse_nested_classes(temp_repo):
69
- """Test parsing nested class definitions"""
70
- parser = CodeParser(temp_repo)
71
- chunks = parser.parse_file(Path(temp_repo) / "nested.py")
72
-
73
- # Test outer class
74
- outer_class = find_chunk_by_criteria(chunks, type=ChunkType.CLASS, name="OuterClass")
75
- assert outer_class is not None
76
- expected = NESTED_CLASS_EXPECTATIONS['OuterClass']
77
- assert expected.content_snippet in outer_class.content
78
- assert outer_class.docstring is not None
79
- assert outer_class.docstring in expected.docstring
80
-
81
- # Verify inner class: Not implemented yet
82
- pass
83
-
84
- def test_parse_complex_file(temp_repo):
85
- """Test parsing a complex Python file"""
86
- parser = CodeParser(temp_repo)
87
- chunks = parser.parse_file(Path(temp_repo) / "complex.py")
88
-
89
- # Test function with type hints
90
- complex_func = find_chunk_by_criteria(
91
- chunks,
92
- type=ChunkType.FUNCTION,
93
- name="complex_function"
94
- )
95
- assert complex_func is not None
96
- expected = COMPLEX_FILE_EXPECTATIONS['complex_function']
97
- assert expected.content_snippet in complex_func.content
98
- assert complex_func.docstring is not None
99
- assert complex_func.docstring in expected.docstring
100
-
101
- # Test complex class
102
- complex_class = find_chunk_by_criteria(
103
- chunks,
104
- type=ChunkType.CLASS,
105
- name="ComplexClass"
106
- )
107
- assert complex_class is not None
108
- expected = COMPLEX_FILE_EXPECTATIONS['ComplexClass']
109
- assert expected.content_snippet in complex_class.content
110
- assert complex_class.docstring is not None
111
- assert complex_class.docstring in expected.docstring
112
-
113
-
114
- def test_parse_repository(temp_repo):
115
- """Test parsing entire repository"""
116
- parser = CodeParser(temp_repo)
117
- chunks = parser.parse_repository()
118
-
119
- file_paths = {chunk.file_path for chunk in chunks}
120
- assert len(file_paths) == 3
121
-
122
- # Verify we can find chunks from each test file
123
- for filename in TEST_FILES.keys():
124
- file_chunks = [c for c in chunks if Path(c.file_path).name == filename]
125
- assert len(file_chunks) > 0
126
-
127
- def test_error_handling(temp_repo):
128
- """Test error handling for invalid files"""
129
- parser = CodeParser(temp_repo)
130
-
131
- # Test invalid syntax
132
- invalid_file = Path(temp_repo) / "invalid.py"
133
- invalid_file.write_text(INVALID_SYNTAX)
134
- chunks = parser.parse_file(invalid_file)
135
- assert chunks == []
136
-
137
- # Test non-existent file
138
- nonexistent = Path(temp_repo) / "nonexistent.py"
139
- chunks = parser.parse_file(nonexistent)
140
- assert chunks == []
141
-
142
- def test_non_python_files(temp_repo):
143
- """Test handling of non-Python files"""
144
- parser = CodeParser(temp_repo)
145
-
146
- # Create a non-Python file
147
- non_python = Path(temp_repo) / "readme.md"
148
- non_python.write_text("# README")
149
-
150
- # Should skip non-Python files
151
- chunks = parser.parse_file(non_python)
152
- assert chunks == []