generic-chatbot-backend / common /configuration.py
muryshev's picture
init
57cf043
raw
history blame
9.27 kB
"""This module includes classes to define configurations."""
from typing import Any, Dict, List, Optional
from pyaml_env import parse_config
from pydantic import BaseModel
class Query(BaseModel):
query: str
query_abbreviation: str
abbreviations_replaced: Optional[List] = None
userName: Optional[str] = None
class SemanticChunk(BaseModel):
index_answer: int
doc_name: str
title: str
text_answer: str
# doc_number: str # TODO Потом поменять название переменной на doc_id везде с чем это будет связанно
other_info: List
start_index_paragraph: int
class FilterChunks(BaseModel):
id: str
filename: str
title: str
chunks: List[SemanticChunk]
class BusinessProcess(BaseModel):
production_activities_section: Optional[str]
processes_name: Optional[str]
level_process: Optional[str]
class Lead(BaseModel):
person: Optional[str]
leads: Optional[str]
class Subordinate(BaseModel):
person_name: Optional[str]
position: Optional[str]
class OrganizationalStructure(BaseModel):
position: Optional[str] = None
leads: Optional[List[Lead]] = None
subordinates: Optional[Subordinate] = None
class RocksNN(BaseModel):
division: Optional[str]
company_name: Optional[str]
class RocksNNSearch(BaseModel):
division: Optional[str]
company_name: Optional[List]
class SegmentationSearch(BaseModel):
segmentation_model: Optional[str]
company_name: Optional[List]
class Group(BaseModel):
group_name: Optional[str]
position_in_group: Optional[str]
block: Optional[str]
class GroupComposition(BaseModel):
person_name: Optional[str]
position_in_group: Optional[str]
class SearchGroupComposition(BaseModel):
group_name: Optional[str]
group_composition: Optional[List[GroupComposition]]
class PeopleChunks(BaseModel):
business_processes: Optional[List[BusinessProcess]] = None
organizatinal_structure: Optional[List[OrganizationalStructure]] = None
business_curator: Optional[List[RocksNN]] = None
groups: Optional[List[Group]] = None
person_name: str
class SummaryChunks(BaseModel):
doc_chunks: Optional[List[FilterChunks]] = None
people_search: Optional[List[PeopleChunks]] = None
groups_search: Optional[SearchGroupComposition] = None
rocks_nn_search: Optional[RocksNNSearch] = None
segmentation_search: Optional[SegmentationSearch] = None
query_type: str = '[3]'
class ElasticConfiguration:
def __init__(self, config_data):
self.es_host = str(config_data['es_host'])
self.es_port = int(config_data['es_port'])
self.use_elastic = bool(config_data['use_elastic'])
self.people_path = str(config_data['people_path'])
class FaissDataConfiguration:
def __init__(self, config_data):
self.model_embedding_path = str(config_data['model_embedding_path'])
self.device = str(config_data['device'])
self.path_to_metadata = str(config_data['path_to_metadata'])
class ChunksElasticSearchConfiguration:
def __init__(self, config_data):
self.use_chunks_search = bool(config_data['use_chunks_search'])
self.index_name = str(config_data['index_name'])
self.k_neighbors = int(config_data['k_neighbors'])
class PeopleSearchConfiguration:
def __init__(self, config_data):
self.use_people_search = bool(config_data['use_people_search'])
self.index_name = str(config_data['index_name'])
self.k_neighbors = int(config_data['k_neighbors'])
class VectorSearchConfiguration:
def __init__(self, config_data):
self.use_vector_search = bool(config_data['use_vector_search'])
self.k_neighbors = int(config_data['k_neighbors'])
class GroupsSearchConfiguration:
def __init__(self, config_data):
self.use_groups_search = bool(config_data['use_groups_search'])
self.index_name = str(config_data['index_name'])
self.k_neighbors = int(config_data['k_neighbors'])
class RocksNNSearchConfiguration:
def __init__(self, config_data):
self.use_rocks_nn_search = bool(config_data['use_rocks_nn_search'])
self.index_name = str(config_data['index_name'])
self.k_neighbors = int(config_data['k_neighbors'])
class AbbreviationSearchConfiguration:
def __init__(self, config_data):
self.use_abbreviation_search = bool(config_data['use_abbreviation_search'])
self.index_name = str(config_data['index_name'])
self.k_neighbors = int(config_data['k_neighbors'])
class SegmentationSearchConfiguration:
def __init__(self, config_data):
self.use_segmentation_search = bool(config_data['use_segmentation_search'])
self.index_name = str(config_data['index_name'])
self.k_neighbors = int(config_data['k_neighbors'])
class SearchConfiguration:
def __init__(self, config_data):
self.vector_search = VectorSearchConfiguration(config_data['vector_search'])
self.people_elastic_search = PeopleSearchConfiguration(
config_data['people_elastic_search']
)
self.chunks_elastic_search = ChunksElasticSearchConfiguration(
config_data['chunks_elastic_search']
)
self.groups_elastic_search = GroupsSearchConfiguration(
config_data['groups_elastic_search']
)
self.rocks_nn_elastic_search = RocksNNSearchConfiguration(
config_data['rocks_nn_elastic_search']
)
self.segmentation_elastic_search = SegmentationSearchConfiguration(
config_data['segmentation_elastic_search']
)
self.stop_index_names = list(config_data['stop_index_names'])
self.abbreviation_search = AbbreviationSearchConfiguration(
config_data['abbreviation_search']
)
class FilesConfiguration:
def __init__(self, config_data):
self.empty_start = bool(config_data['empty_start'])
self.regulations_path = str(config_data['regulations_path'])
self.default_regulations_path = str(config_data['default_regulations_path'])
self.documents_path = str(config_data['documents_path'])
class RankingConfiguration:
def __init__(self, config_data):
self.use_ranging = bool(config_data['use_ranging'])
self.alpha = float(config_data['alpha'])
self.beta = float(config_data['beta'])
self.k_neighbors = int(config_data['k_neighbors'])
class DataBaseConfiguration:
def __init__(self, config_data):
self.elastic = ElasticConfiguration(config_data['elastic'])
self.faiss = FaissDataConfiguration(config_data['faiss'])
self.search = SearchConfiguration(config_data['search'])
self.files = FilesConfiguration(config_data['files'])
self.ranker = RankingConfiguration(config_data['ranging'])
class LLMConfiguration:
def __init__(self, config_data):
self.base_url = str(config_data['base_url']) if config_data['base_url'] not in ("", "null", "None") else None
self.api_key_env = (
str(config_data['api_key_env'])
if config_data['api_key_env'] not in ("", "null", "None")
else None
)
self.model = str(config_data['model'])
self.tokenizer = str(config_data['tokenizer_name'])
self.temperature = float(config_data['temperature'])
self.top_p = float(config_data['top_p'])
self.min_p = float(config_data['min_p'])
self.frequency_penalty = float(config_data['frequency_penalty'])
self.presence_penalty = float(config_data['presence_penalty'])
self.seed = int(config_data['seed'])
class CommonConfiguration:
def __init__(self, config_data):
self.log_file_path = str(config_data['log_file_path'])
self.log_sql_path = str(config_data['log_sql_path'])
class Configuration:
"""Encapsulates all configuration parameters."""
def __init__(self, config_file_path: Optional[str] = None):
"""Creates an instance of the class.
There is 1 possibility to load configuration data:
- from configuration file using a path;
If attribute is not None, the configuration file is used.
Args:
config_file_path: A path to config file to load configuration data from.
"""
if config_file_path is not None:
self._load_from_config(config_file_path)
else:
raise ValueError('At least one of config_path must be not None.')
def _load_data(self, data: Dict[str, Any]):
"""Loads configuration data from dictionary.
Args:
data: A configuration dictionary to load configuration data from.
"""
self.common_config = CommonConfiguration(data['common'])
self.db_config = DataBaseConfiguration(data['bd'])
self.llm_config = LLMConfiguration(data['llm'])
def _load_from_config(self, config_file_path: str):
"""Reads configuration file and form configuration dictionary.
Args:
config_file_path: A configuration dictionary to load configuration data from.
"""
data = parse_config(config_file_path)
self._load_data(data)