Spaces:
Sleeping
Sleeping
import logging | |
import os | |
from logging import Logger | |
from typing import Annotated | |
from fastapi import Depends | |
from ntr_text_fragmentation import InjectionBuilder | |
from sqlalchemy.orm import Session, sessionmaker | |
from common.configuration import Configuration | |
from common.db import session_factory | |
from components.dbo.chunk_repository import ChunkRepository | |
from components.embedding_extraction import EmbeddingExtractor | |
from components.llm.common import LlmParams | |
from components.llm.deepinfra_api import DeepInfraApi | |
from components.services.dataset import DatasetService | |
from components.services.document import DocumentService | |
from components.services.entity import EntityService | |
from components.services.llm_config import LLMConfigService | |
from components.services.llm_prompt import LlmPromptService | |
def get_config() -> Configuration: | |
return Configuration(os.environ.get('CONFIG_PATH', 'config_dev.yaml')) | |
def get_db() -> sessionmaker: | |
return session_factory | |
def get_logger() -> Logger: | |
return logging.getLogger(__name__) | |
def get_embedding_extractor( | |
config: Annotated[Configuration, Depends(get_config)], | |
) -> EmbeddingExtractor: | |
return EmbeddingExtractor( | |
config.db_config.faiss.model_embedding_path, | |
config.db_config.faiss.device, | |
) | |
def get_chunk_repository(db: Annotated[Session, Depends(get_db)]) -> ChunkRepository: | |
return ChunkRepository(db) | |
def get_injection_builder( | |
chunk_repository: Annotated[ChunkRepository, Depends(get_chunk_repository)], | |
) -> InjectionBuilder: | |
return InjectionBuilder(chunk_repository) | |
def get_entity_service( | |
vectorizer: Annotated[EmbeddingExtractor, Depends(get_embedding_extractor)], | |
chunk_repository: Annotated[ChunkRepository, Depends(get_chunk_repository)], | |
config: Annotated[Configuration, Depends(get_config)], | |
) -> EntityService: | |
"""Получение сервиса для работы с сущностями через DI.""" | |
return EntityService(vectorizer, chunk_repository, config) | |
def get_dataset_service( | |
entity_service: Annotated[EntityService, Depends(get_entity_service)], | |
config: Annotated[Configuration, Depends(get_config)], | |
db: Annotated[sessionmaker, Depends(get_db)], | |
) -> DatasetService: | |
"""Получение сервиса для работы с датасетами через DI.""" | |
return DatasetService(entity_service, config, db) | |
def get_document_service( | |
dataset_service: Annotated[DatasetService, Depends(get_dataset_service)], | |
config: Annotated[Configuration, Depends(get_config)], | |
db: Annotated[sessionmaker, Depends(get_db)], | |
) -> DocumentService: | |
return DocumentService(dataset_service, config, db) | |
def get_llm_config_service(db: Annotated[Session, Depends(get_db)]) -> LLMConfigService: | |
return LLMConfigService(db) | |
def get_llm_service( | |
config: Annotated[Configuration, Depends(get_config)], | |
) -> DeepInfraApi: | |
llm_params = LlmParams( | |
**{ | |
"url": config.llm_config.base_url, | |
"model": config.llm_config.model, | |
"tokenizer": config.llm_config.tokenizer, | |
"type": "deepinfra", | |
"default": True, | |
"predict_params": None, # должны задаваться при каждом запросе | |
"api_key": os.environ.get(config.llm_config.api_key_env), | |
"context_length": 128000, | |
} | |
) | |
return DeepInfraApi(params=llm_params) | |
def get_llm_prompt_service(db: Annotated[Session, Depends(get_db)]) -> LlmPromptService: | |
return LlmPromptService(db) | |