Spaces:
Sleeping
Sleeping
"""This module includes classes to define configurations.""" | |
from typing import Any, Dict, List, Optional | |
from pyaml_env import parse_config | |
from pydantic import BaseModel | |
class Query(BaseModel): | |
query: str | |
query_abbreviation: str | |
abbreviations_replaced: Optional[List] = None | |
userName: Optional[str] = None | |
class SemanticChunk(BaseModel): | |
index_answer: int | |
doc_name: str | |
title: str | |
text_answer: str | |
# doc_number: str # TODO Потом поменять название переменной на doc_id везде с чем это будет связанно | |
other_info: List | |
start_index_paragraph: int | |
class FilterChunks(BaseModel): | |
id: str | |
filename: str | |
title: str | |
chunks: List[SemanticChunk] | |
class BusinessProcess(BaseModel): | |
production_activities_section: Optional[str] | |
processes_name: Optional[str] | |
level_process: Optional[str] | |
class Lead(BaseModel): | |
person: Optional[str] | |
leads: Optional[str] | |
class Subordinate(BaseModel): | |
person_name: Optional[str] | |
position: Optional[str] | |
class OrganizationalStructure(BaseModel): | |
position: Optional[str] = None | |
leads: Optional[List[Lead]] = None | |
subordinates: Optional[Subordinate] = None | |
class RocksNN(BaseModel): | |
division: Optional[str] | |
company_name: Optional[str] | |
class RocksNNSearch(BaseModel): | |
division: Optional[str] | |
company_name: Optional[List] | |
class SegmentationSearch(BaseModel): | |
segmentation_model: Optional[str] | |
company_name: Optional[List] | |
class Group(BaseModel): | |
group_name: Optional[str] | |
position_in_group: Optional[str] | |
block: Optional[str] | |
class GroupComposition(BaseModel): | |
person_name: Optional[str] | |
position_in_group: Optional[str] | |
class SearchGroupComposition(BaseModel): | |
group_name: Optional[str] | |
group_composition: Optional[List[GroupComposition]] | |
class PeopleChunks(BaseModel): | |
business_processes: Optional[List[BusinessProcess]] = None | |
organizatinal_structure: Optional[List[OrganizationalStructure]] = None | |
business_curator: Optional[List[RocksNN]] = None | |
groups: Optional[List[Group]] = None | |
person_name: str | |
class SummaryChunks(BaseModel): | |
doc_chunks: Optional[List[FilterChunks]] = None | |
people_search: Optional[List[PeopleChunks]] = None | |
groups_search: Optional[SearchGroupComposition] = None | |
rocks_nn_search: Optional[RocksNNSearch] = None | |
segmentation_search: Optional[SegmentationSearch] = None | |
query_type: str = '[3]' | |
class ElasticConfiguration: | |
def __init__(self, config_data): | |
self.es_host = str(config_data['es_host']) | |
self.es_port = int(config_data['es_port']) | |
self.use_elastic = bool(config_data['use_elastic']) | |
self.people_path = str(config_data['people_path']) | |
class FaissDataConfiguration: | |
def __init__(self, config_data): | |
self.model_embedding_path = str(config_data['model_embedding_path']) | |
self.device = str(config_data['device']) | |
self.path_to_metadata = str(config_data['path_to_metadata']) | |
class ChunksElasticSearchConfiguration: | |
def __init__(self, config_data): | |
self.use_chunks_search = bool(config_data['use_chunks_search']) | |
self.index_name = str(config_data['index_name']) | |
self.k_neighbors = int(config_data['k_neighbors']) | |
class PeopleSearchConfiguration: | |
def __init__(self, config_data): | |
self.use_people_search = bool(config_data['use_people_search']) | |
self.index_name = str(config_data['index_name']) | |
self.k_neighbors = int(config_data['k_neighbors']) | |
class VectorSearchConfiguration: | |
def __init__(self, config_data): | |
self.use_vector_search = bool(config_data['use_vector_search']) | |
self.k_neighbors = int(config_data['k_neighbors']) | |
class GroupsSearchConfiguration: | |
def __init__(self, config_data): | |
self.use_groups_search = bool(config_data['use_groups_search']) | |
self.index_name = str(config_data['index_name']) | |
self.k_neighbors = int(config_data['k_neighbors']) | |
class RocksNNSearchConfiguration: | |
def __init__(self, config_data): | |
self.use_rocks_nn_search = bool(config_data['use_rocks_nn_search']) | |
self.index_name = str(config_data['index_name']) | |
self.k_neighbors = int(config_data['k_neighbors']) | |
class AbbreviationSearchConfiguration: | |
def __init__(self, config_data): | |
self.use_abbreviation_search = bool(config_data['use_abbreviation_search']) | |
self.index_name = str(config_data['index_name']) | |
self.k_neighbors = int(config_data['k_neighbors']) | |
class SegmentationSearchConfiguration: | |
def __init__(self, config_data): | |
self.use_segmentation_search = bool(config_data['use_segmentation_search']) | |
self.index_name = str(config_data['index_name']) | |
self.k_neighbors = int(config_data['k_neighbors']) | |
class SearchConfiguration: | |
def __init__(self, config_data): | |
self.vector_search = VectorSearchConfiguration(config_data['vector_search']) | |
self.people_elastic_search = PeopleSearchConfiguration( | |
config_data['people_elastic_search'] | |
) | |
self.chunks_elastic_search = ChunksElasticSearchConfiguration( | |
config_data['chunks_elastic_search'] | |
) | |
self.groups_elastic_search = GroupsSearchConfiguration( | |
config_data['groups_elastic_search'] | |
) | |
self.rocks_nn_elastic_search = RocksNNSearchConfiguration( | |
config_data['rocks_nn_elastic_search'] | |
) | |
self.segmentation_elastic_search = SegmentationSearchConfiguration( | |
config_data['segmentation_elastic_search'] | |
) | |
self.stop_index_names = list(config_data['stop_index_names']) | |
self.abbreviation_search = AbbreviationSearchConfiguration( | |
config_data['abbreviation_search'] | |
) | |
class FilesConfiguration: | |
def __init__(self, config_data): | |
self.empty_start = bool(config_data['empty_start']) | |
self.regulations_path = str(config_data['regulations_path']) | |
self.default_regulations_path = str(config_data['default_regulations_path']) | |
self.documents_path = str(config_data['documents_path']) | |
class RankingConfiguration: | |
def __init__(self, config_data): | |
self.use_ranging = bool(config_data['use_ranging']) | |
self.alpha = float(config_data['alpha']) | |
self.beta = float(config_data['beta']) | |
self.k_neighbors = int(config_data['k_neighbors']) | |
class DataBaseConfiguration: | |
def __init__(self, config_data): | |
self.elastic = ElasticConfiguration(config_data['elastic']) | |
self.faiss = FaissDataConfiguration(config_data['faiss']) | |
self.search = SearchConfiguration(config_data['search']) | |
self.files = FilesConfiguration(config_data['files']) | |
self.ranker = RankingConfiguration(config_data['ranging']) | |
class LLMConfiguration: | |
def __init__(self, config_data): | |
self.base_url = str(config_data['base_url']) if config_data['base_url'] not in ("", "null", "None") else None | |
self.api_key_env = ( | |
str(config_data['api_key_env']) | |
if config_data['api_key_env'] not in ("", "null", "None") | |
else None | |
) | |
self.model = str(config_data['model']) | |
self.tokenizer = str(config_data['tokenizer_name']) | |
self.temperature = float(config_data['temperature']) | |
self.top_p = float(config_data['top_p']) | |
self.min_p = float(config_data['min_p']) | |
self.frequency_penalty = float(config_data['frequency_penalty']) | |
self.presence_penalty = float(config_data['presence_penalty']) | |
self.seed = int(config_data['seed']) | |
class CommonConfiguration: | |
def __init__(self, config_data): | |
self.log_file_path = str(config_data['log_file_path']) | |
self.log_sql_path = str(config_data['log_sql_path']) | |
class Configuration: | |
"""Encapsulates all configuration parameters.""" | |
def __init__(self, config_file_path: Optional[str] = None): | |
"""Creates an instance of the class. | |
There is 1 possibility to load configuration data: | |
- from configuration file using a path; | |
If attribute is not None, the configuration file is used. | |
Args: | |
config_file_path: A path to config file to load configuration data from. | |
""" | |
if config_file_path is not None: | |
self._load_from_config(config_file_path) | |
else: | |
raise ValueError('At least one of config_path must be not None.') | |
def _load_data(self, data: Dict[str, Any]): | |
"""Loads configuration data from dictionary. | |
Args: | |
data: A configuration dictionary to load configuration data from. | |
""" | |
self.common_config = CommonConfiguration(data['common']) | |
self.db_config = DataBaseConfiguration(data['bd']) | |
self.llm_config = LLMConfiguration(data['llm']) | |
def _load_from_config(self, config_file_path: str): | |
"""Reads configuration file and form configuration dictionary. | |
Args: | |
config_file_path: A configuration dictionary to load configuration data from. | |
""" | |
data = parse_config(config_file_path) | |
self._load_data(data) | |