"""This module includes classes to define configurations.""" from typing import Any, Dict, List, Optional from pyaml_env import parse_config from pydantic import BaseModel class Query(BaseModel): query: str query_abbreviation: str abbreviations_replaced: Optional[List] = None userName: Optional[str] = None class SemanticChunk(BaseModel): index_answer: int doc_name: str title: str text_answer: str # doc_number: str # TODO Потом поменять название переменной на doc_id везде с чем это будет связанно other_info: List start_index_paragraph: int class FilterChunks(BaseModel): id: str filename: str title: str chunks: List[SemanticChunk] class BusinessProcess(BaseModel): production_activities_section: Optional[str] processes_name: Optional[str] level_process: Optional[str] class Lead(BaseModel): person: Optional[str] leads: Optional[str] class Subordinate(BaseModel): person_name: Optional[str] position: Optional[str] class OrganizationalStructure(BaseModel): position: Optional[str] = None leads: Optional[List[Lead]] = None subordinates: Optional[Subordinate] = None class RocksNN(BaseModel): division: Optional[str] company_name: Optional[str] class RocksNNSearch(BaseModel): division: Optional[str] company_name: Optional[List] class SegmentationSearch(BaseModel): segmentation_model: Optional[str] company_name: Optional[List] class Group(BaseModel): group_name: Optional[str] position_in_group: Optional[str] block: Optional[str] class GroupComposition(BaseModel): person_name: Optional[str] position_in_group: Optional[str] class SearchGroupComposition(BaseModel): group_name: Optional[str] group_composition: Optional[List[GroupComposition]] class PeopleChunks(BaseModel): business_processes: Optional[List[BusinessProcess]] = None organizatinal_structure: Optional[List[OrganizationalStructure]] = None business_curator: Optional[List[RocksNN]] = None groups: Optional[List[Group]] = None person_name: str class SummaryChunks(BaseModel): doc_chunks: Optional[List[FilterChunks]] = None people_search: Optional[List[PeopleChunks]] = None groups_search: Optional[SearchGroupComposition] = None rocks_nn_search: Optional[RocksNNSearch] = None segmentation_search: Optional[SegmentationSearch] = None query_type: str = '[3]' class ElasticConfiguration: def __init__(self, config_data): self.es_host = str(config_data['es_host']) self.es_port = int(config_data['es_port']) self.use_elastic = bool(config_data['use_elastic']) self.people_path = str(config_data['people_path']) class FaissDataConfiguration: def __init__(self, config_data): self.model_embedding_path = str(config_data['model_embedding_path']) self.device = str(config_data['device']) self.path_to_metadata = str(config_data['path_to_metadata']) class ChunksElasticSearchConfiguration: def __init__(self, config_data): self.use_chunks_search = bool(config_data['use_chunks_search']) self.index_name = str(config_data['index_name']) self.k_neighbors = int(config_data['k_neighbors']) class PeopleSearchConfiguration: def __init__(self, config_data): self.use_people_search = bool(config_data['use_people_search']) self.index_name = str(config_data['index_name']) self.k_neighbors = int(config_data['k_neighbors']) class VectorSearchConfiguration: def __init__(self, config_data): self.use_vector_search = bool(config_data['use_vector_search']) self.k_neighbors = int(config_data['k_neighbors']) class GroupsSearchConfiguration: def __init__(self, config_data): self.use_groups_search = bool(config_data['use_groups_search']) self.index_name = str(config_data['index_name']) self.k_neighbors = int(config_data['k_neighbors']) class RocksNNSearchConfiguration: def __init__(self, config_data): self.use_rocks_nn_search = bool(config_data['use_rocks_nn_search']) self.index_name = str(config_data['index_name']) self.k_neighbors = int(config_data['k_neighbors']) class AbbreviationSearchConfiguration: def __init__(self, config_data): self.use_abbreviation_search = bool(config_data['use_abbreviation_search']) self.index_name = str(config_data['index_name']) self.k_neighbors = int(config_data['k_neighbors']) class SegmentationSearchConfiguration: def __init__(self, config_data): self.use_segmentation_search = bool(config_data['use_segmentation_search']) self.index_name = str(config_data['index_name']) self.k_neighbors = int(config_data['k_neighbors']) class SearchConfiguration: def __init__(self, config_data): self.vector_search = VectorSearchConfiguration(config_data['vector_search']) self.people_elastic_search = PeopleSearchConfiguration( config_data['people_elastic_search'] ) self.chunks_elastic_search = ChunksElasticSearchConfiguration( config_data['chunks_elastic_search'] ) self.groups_elastic_search = GroupsSearchConfiguration( config_data['groups_elastic_search'] ) self.rocks_nn_elastic_search = RocksNNSearchConfiguration( config_data['rocks_nn_elastic_search'] ) self.segmentation_elastic_search = SegmentationSearchConfiguration( config_data['segmentation_elastic_search'] ) self.stop_index_names = list(config_data['stop_index_names']) self.abbreviation_search = AbbreviationSearchConfiguration( config_data['abbreviation_search'] ) class FilesConfiguration: def __init__(self, config_data): self.empty_start = bool(config_data['empty_start']) self.regulations_path = str(config_data['regulations_path']) self.default_regulations_path = str(config_data['default_regulations_path']) self.documents_path = str(config_data['documents_path']) class RankingConfiguration: def __init__(self, config_data): self.use_ranging = bool(config_data['use_ranging']) self.alpha = float(config_data['alpha']) self.beta = float(config_data['beta']) self.k_neighbors = int(config_data['k_neighbors']) class DataBaseConfiguration: def __init__(self, config_data): self.elastic = ElasticConfiguration(config_data['elastic']) self.faiss = FaissDataConfiguration(config_data['faiss']) self.search = SearchConfiguration(config_data['search']) self.files = FilesConfiguration(config_data['files']) self.ranker = RankingConfiguration(config_data['ranging']) class LLMConfiguration: def __init__(self, config_data): self.base_url = str(config_data['base_url']) if config_data['base_url'] not in ("", "null", "None") else None self.api_key_env = ( str(config_data['api_key_env']) if config_data['api_key_env'] not in ("", "null", "None") else None ) self.model = str(config_data['model']) self.tokenizer = str(config_data['tokenizer_name']) self.temperature = float(config_data['temperature']) self.top_p = float(config_data['top_p']) self.min_p = float(config_data['min_p']) self.frequency_penalty = float(config_data['frequency_penalty']) self.presence_penalty = float(config_data['presence_penalty']) self.seed = int(config_data['seed']) class CommonConfiguration: def __init__(self, config_data): self.log_file_path = str(config_data['log_file_path']) self.log_sql_path = str(config_data['log_sql_path']) class Configuration: """Encapsulates all configuration parameters.""" def __init__(self, config_file_path: Optional[str] = None): """Creates an instance of the class. There is 1 possibility to load configuration data: - from configuration file using a path; If attribute is not None, the configuration file is used. Args: config_file_path: A path to config file to load configuration data from. """ if config_file_path is not None: self._load_from_config(config_file_path) else: raise ValueError('At least one of config_path must be not None.') def _load_data(self, data: Dict[str, Any]): """Loads configuration data from dictionary. Args: data: A configuration dictionary to load configuration data from. """ self.common_config = CommonConfiguration(data['common']) self.db_config = DataBaseConfiguration(data['bd']) self.llm_config = LLMConfiguration(data['llm']) def _load_from_config(self, config_file_path: str): """Reads configuration file and form configuration dictionary. Args: config_file_path: A configuration dictionary to load configuration data from. """ data = parse_config(config_file_path) self._load_data(data)