Spaces:
Sleeping
Sleeping
""" | |
Unit-тесты для реестра стратегий чанкинга _ChunkingRegistry. | |
""" | |
import pytest | |
from ntr_text_fragmentation.chunking import (ChunkingStrategy, | |
_ChunkingRegistry, | |
chunking_registry, | |
register_chunking_strategy) | |
# Фикстуры | |
class MockStrategy(ChunkingStrategy): | |
"""Мок-стратегия для тестов.""" | |
def chunk(self, document, doc_entity): | |
pass | |
def dechunk(cls, repository, filtered_entities): | |
pass | |
def clean_registry() -> _ChunkingRegistry: | |
"""Фикстура для получения чистого экземпляра реестра.""" | |
# Создаем новый экземпляр, чтобы не влиять на глобальный chunking_registry | |
return _ChunkingRegistry() | |
def populated_registry(clean_registry: _ChunkingRegistry) -> _ChunkingRegistry: | |
"""Фикстура для реестра с зарегистрированными стратегиями.""" | |
clean_registry.register("mock1", MockStrategy) | |
clean_registry.register("mock2", MockStrategy) | |
return clean_registry | |
# Тесты | |
def test_register(clean_registry: _ChunkingRegistry): | |
"""Тест регистрации стратегии.""" | |
assert len(clean_registry) == 0 | |
clean_registry.register("test_strategy", MockStrategy) | |
assert len(clean_registry) == 1 | |
assert "test_strategy" in clean_registry | |
assert clean_registry.get("test_strategy") is MockStrategy | |
def test_get(populated_registry: _ChunkingRegistry): | |
"""Тест получения стратегии по имени.""" | |
strategy = populated_registry.get("mock1") | |
assert strategy is MockStrategy | |
# Тест получения несуществующей стратегии | |
with pytest.raises(KeyError): | |
populated_registry.get("nonexistent") | |
def test_getitem(populated_registry: _ChunkingRegistry): | |
"""Тест получения стратегии через __getitem__.""" | |
strategy = populated_registry["mock1"] | |
assert strategy is MockStrategy | |
# Тест получения несуществующей стратегии | |
with pytest.raises(KeyError): | |
_ = populated_registry["nonexistent"] | |
def test_get_names(populated_registry: _ChunkingRegistry): | |
"""Тест получения списка имен зарегистрированных стратегий.""" | |
names = populated_registry.get_names() | |
assert isinstance(names, list) | |
assert len(names) == 2 | |
assert "mock1" in names | |
assert "mock2" in names | |
def test_len(populated_registry: _ChunkingRegistry): | |
"""Тест получения количества зарегистрированных стратегий.""" | |
assert len(populated_registry) == 2 | |
def test_contains(populated_registry: _ChunkingRegistry): | |
"""Тест проверки наличия стратегии.""" | |
assert "mock1" in populated_registry | |
assert "nonexistent" not in populated_registry | |
# Проверка по самому классу стратегии (экземпляры не хранятся) | |
assert MockStrategy in populated_registry | |
class AnotherStrategy(ChunkingStrategy): # type: ignore | |
def chunk(self, document, doc_entity): pass | |
def dechunk(cls, repository, filtered_entities): pass | |
assert AnotherStrategy not in populated_registry | |
def test_decorator_register(): | |
"""Тест декоратора register_chunking_strategy.""" | |
# Сохраняем текущее состояние глобального реестра | |
original_registry_state = chunking_registry._chunking_strategies.copy() | |
original_len = len(chunking_registry) | |
class DecoratedStrategy(ChunkingStrategy): | |
def chunk(self, document, doc_entity): | |
pass | |
def dechunk(cls, repository, filtered_entities): | |
pass | |
assert len(chunking_registry) == original_len + 1 | |
assert "decorated_strategy" in chunking_registry | |
assert chunking_registry.get("decorated_strategy") is DecoratedStrategy | |
# Тест регистрации с именем по умолчанию (имя класса) | |
class DefaultNameStrategy(ChunkingStrategy): | |
def chunk(self, document, doc_entity): | |
pass | |
def dechunk(cls, repository, filtered_entities): | |
pass | |
assert len(chunking_registry) == original_len + 2 | |
assert "DefaultNameStrategy" in chunking_registry | |
assert chunking_registry.get("DefaultNameStrategy") is DefaultNameStrategy | |
# Восстанавливаем исходное состояние глобального реестра | |
chunking_registry._chunking_strategies = original_registry_state |