Spaces:
Sleeping
Sleeping
File size: 5,079 Bytes
744a170 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
"""
Unit-тесты для реестра стратегий чанкинга _ChunkingRegistry.
"""
import pytest
from ntr_text_fragmentation.chunking import (ChunkingStrategy,
_ChunkingRegistry,
chunking_registry,
register_chunking_strategy)
# Фикстуры
class MockStrategy(ChunkingStrategy):
"""Мок-стратегия для тестов."""
def chunk(self, document, doc_entity):
pass
@classmethod
def dechunk(cls, repository, filtered_entities):
pass
@pytest.fixture
def clean_registry() -> _ChunkingRegistry:
"""Фикстура для получения чистого экземпляра реестра."""
# Создаем новый экземпляр, чтобы не влиять на глобальный chunking_registry
return _ChunkingRegistry()
@pytest.fixture
def populated_registry(clean_registry: _ChunkingRegistry) -> _ChunkingRegistry:
"""Фикстура для реестра с зарегистрированными стратегиями."""
clean_registry.register("mock1", MockStrategy)
clean_registry.register("mock2", MockStrategy)
return clean_registry
# Тесты
def test_register(clean_registry: _ChunkingRegistry):
"""Тест регистрации стратегии."""
assert len(clean_registry) == 0
clean_registry.register("test_strategy", MockStrategy)
assert len(clean_registry) == 1
assert "test_strategy" in clean_registry
assert clean_registry.get("test_strategy") is MockStrategy
def test_get(populated_registry: _ChunkingRegistry):
"""Тест получения стратегии по имени."""
strategy = populated_registry.get("mock1")
assert strategy is MockStrategy
# Тест получения несуществующей стратегии
with pytest.raises(KeyError):
populated_registry.get("nonexistent")
def test_getitem(populated_registry: _ChunkingRegistry):
"""Тест получения стратегии через __getitem__."""
strategy = populated_registry["mock1"]
assert strategy is MockStrategy
# Тест получения несуществующей стратегии
with pytest.raises(KeyError):
_ = populated_registry["nonexistent"]
def test_get_names(populated_registry: _ChunkingRegistry):
"""Тест получения списка имен зарегистрированных стратегий."""
names = populated_registry.get_names()
assert isinstance(names, list)
assert len(names) == 2
assert "mock1" in names
assert "mock2" in names
def test_len(populated_registry: _ChunkingRegistry):
"""Тест получения количества зарегистрированных стратегий."""
assert len(populated_registry) == 2
def test_contains(populated_registry: _ChunkingRegistry):
"""Тест проверки наличия стратегии."""
assert "mock1" in populated_registry
assert "nonexistent" not in populated_registry
# Проверка по самому классу стратегии (экземпляры не хранятся)
assert MockStrategy in populated_registry
class AnotherStrategy(ChunkingStrategy): # type: ignore
def chunk(self, document, doc_entity): pass
@classmethod
def dechunk(cls, repository, filtered_entities): pass
assert AnotherStrategy not in populated_registry
def test_decorator_register():
"""Тест декоратора register_chunking_strategy."""
# Сохраняем текущее состояние глобального реестра
original_registry_state = chunking_registry._chunking_strategies.copy()
original_len = len(chunking_registry)
@register_chunking_strategy("decorated_strategy")
class DecoratedStrategy(ChunkingStrategy):
def chunk(self, document, doc_entity):
pass
@classmethod
def dechunk(cls, repository, filtered_entities):
pass
assert len(chunking_registry) == original_len + 1
assert "decorated_strategy" in chunking_registry
assert chunking_registry.get("decorated_strategy") is DecoratedStrategy
# Тест регистрации с именем по умолчанию (имя класса)
@register_chunking_strategy()
class DefaultNameStrategy(ChunkingStrategy):
def chunk(self, document, doc_entity):
pass
@classmethod
def dechunk(cls, repository, filtered_entities):
pass
assert len(chunking_registry) == original_len + 2
assert "DefaultNameStrategy" in chunking_registry
assert chunking_registry.get("DefaultNameStrategy") is DefaultNameStrategy
# Восстанавливаем исходное состояние глобального реестра
chunking_registry._chunking_strategies = original_registry_state |