import logging from enum import Enum def configure_logging(level=logging.INFO, config_file_path='./common.log'): formatter = logging.Formatter("[%(asctime)s.%(msecs)03d] %(module)30s:%(lineno)4d %(levelname)-7s - %(message)s") console_handler = logging.StreamHandler() console_handler.setLevel(level) console_handler.setFormatter(formatter) logging.basicConfig( filename=config_file_path, filemode="a", level=level, datefmt="%Y-%m-%d %H:%M:%S", format="[%(asctime)s.%(msecs)03d] %(module)30s:%(lineno)4d %(levelname)-7s - %(message)s", handlers=[console_handler] ) def get_elastic_query(query): return { "query": { "multi_match": { "query": f"{query}", "fields": ["text"], "fuzziness": "AUTO", "analyzer": "russian", } } } def get_elastic_people_query(query): has_business_curator = ( "бизнес куратор" in query.lower() or "бизнес-куратор" in query.lower() or "куратор" in query.lower() ) business_curator_boost = 30 if has_business_curator else 15 return { "query": { "bool": { "should": [ { "multi_match": { "query": f"{query}", "fields": ["person_name^3"], "fuzziness": "AUTO", "analyzer": "standard", } }, { "nested": { "path": "business_processes", "query": { "multi_match": { "query": f"{query}", "fields": [ "business_processes.production_activities_section", "business_processes.processes_name", ], "fuzziness": "AUTO", "analyzer": "standard", } }, } }, { "nested": { "path": "organizatinal_structure", "query": { "multi_match": { "query": f"{query}", "fields": ["organizatinal_structure.position^2"], "fuzziness": "AUTO", "analyzer": "standard", } }, } }, { "nested": { "path": "business_curator", "query": { "multi_match": { "query": f"{query}", "fields": [ f"business_curator.company_name^{business_curator_boost}" ], "fuzziness": "AUTO", "analyzer": "standard", } }, } }, ] } }, "min_score": 13.0, } def get_elastic_group_query(query): return { "query": { "bool": { "should": [ { "multi_match": { "query": f"{query}", "fields": ["group_name"], "fuzziness": "AUTO", "analyzer": "standard", } }, { "multi_match": { "query": "персонального состава Персональный состав Комитета ПАО ГМК Норильский никель Рабочей группы", "fields": ["group_name"], "operator": "or", "boost": 0.1, } }, ] } }, "min_score": 7.5, } def get_elastic_rocks_nn_query(query): return { "query": { "function_score": { "query": { "multi_match": { "query": f"{query}", "fields": ["division_name", "division_name_2", "company_name"], "fuzziness": "AUTO", "analyzer": "custom_analyzer", } }, "functions": [{"filter": {"term": {"_id": "3"}}, "weight": 0.5}], "boost_mode": "multiply", } }, "min_score": 0.5, } def get_elastic_segmentation_query(query): return { "query": { "bool": { "should": [ { "multi_match": { "query": f"{query}", "fields": [ "segmentation_model", "segmentation_model2", "company_name", ], "fuzziness": "AUTO", "analyzer": "russian", } }, { "multi_match": { "query": "модели сегментации модель сегментации", "fields": ["segmentation_model", "segmentation_model2"], "operator": "or", "boost": 0.1, } }, ] } }, "min_score": 1.0, } def get_elastic_abbreviation_query(query): return { "query": { "multi_match": { "query": f"{query}", "fuzziness": "AUTO", "fields": ["text"], "analyzer": "russian", } } } def combine_answer(answer): """ Args: answer: Returns: """ answer_combined = {} indexes = [] for key in answer: if key != 'people_search': for answer_key in answer[key]: answer_value = answer[key][answer_key] filename_i = answer_value["doc_name"] title_i = answer_value["title"] if ( filename_i in answer_combined and answer_value['index_answer'] not in indexes ): answer_combined[filename_i]["chunks"].append(answer_value) else: answer_combined[filename_i] = { "filename": filename_i, "title": title_i, "chunks": [answer_value], } indexes.append(answer_value['index_answer']) return list(answer_combined.values()) class TypeQuestion(Enum): TYPE_ONE = '[1]' TYPE_TWO = '[2]' TYPE_THREE = '[3]' def get_source_format(filename: str) -> str: """ Получает формат файла из имени файла. """ format_ = filename.split('.')[-1] return format_.upper()