Spaces:
Sleeping
Sleeping
from typing import List, Dict, Optional, Tuple | |
import requests | |
from logging import Logger | |
from common.configuration import SemanticChunk | |
from common.configuration import SegmentationSearch | |
from common.configuration import SummaryChunks | |
from common.configuration import FilterChunks | |
from common.configuration import RocksNNSearch | |
from common.configuration import PeopleChunks | |
from common.configuration import SearchGroupComposition | |
def aggregate_answers(vector_answer: Optional[Dict] = None, | |
people_answer: Optional[List] = None, | |
chunks_answer: Optional[List] = None, | |
groups_answer: Optional[List] = None, | |
rocks_nn_answer: Optional[List] = None, | |
segmentation_answer: Optional[List] = None) -> Dict: | |
""" | |
Args: | |
vector_answer: | |
people_answer: | |
chunks_answer: | |
groups_answer: | |
rocks_nn_answer: | |
segmentation_answer: | |
Returns: | |
""" | |
answer = {} | |
if vector_answer is not None or chunks_answer is not None: | |
answer['doc_chunks'] = combine_answer([vector_answer, chunks_answer]) | |
if people_answer is not None: | |
answer['people_search'] = [PeopleChunks(**answer_dict['_source']) for answer_dict in people_answer] | |
if groups_answer is not None: | |
answer['groups_search'] = SearchGroupComposition(**groups_answer[0]['_source']) | |
if rocks_nn_answer is not None: | |
answer['rocks_nn_search'] = RocksNNSearch(division=rocks_nn_answer[0]['_source']['division_name'], | |
company_name=rocks_nn_answer[0]['_source']['company_name']) | |
if segmentation_answer is not None: | |
answer['segmentation_search'] = SegmentationSearch(**segmentation_answer[0]['_source']) | |
return answer | |
def combine_answer(answers): | |
""" | |
Args: | |
answers: | |
Returns: | |
""" | |
answer_combined = [] | |
answer_file_names = [] | |
indexes = [] | |
for answer in answers: | |
if answer is not None: | |
for key in answer: | |
if answer[key]["doc_name"] in answer_file_names: | |
if answer[key]['start_index_paragraph'] not in indexes: | |
obj_index = answer_file_names.index(answer[key]["doc_name"]) | |
answer_combined[obj_index].chunks.append(SemanticChunk(**answer[key])) | |
else: | |
answer_combined.append(FilterChunks( | |
id=str(answer[key]['id']), | |
filename=answer[key]["doc_name"], | |
title=answer[key]["title"], | |
chunks=[SemanticChunk(**answer[key])])) | |
answer_file_names.append(answer[key]["doc_name"]) | |
indexes.append(answer[key]['start_index_paragraph']) | |
return answer_combined | |
def preprocessed_chunks(answer_chunks: SummaryChunks, llm_host_tokens: str, logger: Logger) -> str: | |
output_text = '' | |
count = 0 | |
count_tokens = 0 | |
if answer_chunks.doc_chunks is not None: | |
for doc in answer_chunks.doc_chunks: | |
output_text += f'Документ: [{count + 1}]\n' | |
if doc.title != 'unknown': | |
output_text += f'Название документа: {doc.title}\n' | |
else: | |
output_text += f'Название документа: {doc.filename}\n' | |
for chunk in doc.chunks: | |
if len(chunk.other_info): | |
output_text += '...\n' | |
for i in chunk.other_info: | |
output_text += f'{i}'.replace('', '-') | |
output_text += '...\n' | |
else: | |
output_text += '...\n' | |
output_text += f'{chunk.text_answer}' | |
output_text += '...\n' | |
count_tokens = len(output_text) * 2 | |
#TODO: в deepinfra нет такой возможности. Нужно прокинуть токенизатор | |
#len(requests.post(url=f'{llm_host_tokens}', json={"content": output_text}).json()['tokens']) | |
if count_tokens > 20000: | |
logger.info('Количество токенов превысило значение 20k! Оставшиеся чанки отброшены!') | |
break | |
if count_tokens > 20000: | |
output_text += '\n\\\n\n' | |
count += 1 | |
break | |
output_text += '\n\\\n\n' | |
count += 1 | |
if answer_chunks.people_search is not None: | |
for doc in answer_chunks.people_search: | |
output_text += f'Документ: [{count + 1}]\n' | |
output_text += f'Название документа: Информация о сотруднике {doc.person_name}\n' | |
output_text += f'Информация о сотруднике {doc.person_name}\n' | |
if doc.organizatinal_structure is not None: | |
for organizatinal_structure in doc.organizatinal_structure: | |
output_text += '[\n' | |
if organizatinal_structure.position != 'undefined': | |
output_text += f'Должность: {organizatinal_structure.position}' | |
if organizatinal_structure.leads is not None: | |
output_text += f'\nРуководит следующими сотрудниками:\n' | |
for lead in organizatinal_structure.leads: | |
if lead.person != "undefined": | |
output_text += f'{lead.person}\n' | |
if organizatinal_structure.subordinates is not None: | |
if organizatinal_structure.subordinates.person_name != "undefined": | |
output_text += f'Руководителем {doc.person_name} является {organizatinal_structure.subordinates.person_name}' | |
output_text += '\n]\n' | |
if doc.business_processes is not None: | |
if len(doc.business_processes) >= 2: | |
output_text += f'Отвечает за Бизнес процессы:\n' | |
else: | |
output_text += f'Отвечает за Бизнес процесс: ' | |
for process in doc.business_processes: | |
output_text += f'{process.processes_name}\n' | |
if doc.business_curator is not None: | |
output_text += 'Является Бизнес-куратором (РОКС НН):\n' | |
for curator in doc.business_curator: | |
output_text += f'{curator.company_name}\n' | |
if doc.groups is not None: | |
output_text += '\nВходит в состав групп, комитетов, координационных советов (КО):\n' | |
for group in doc.groups: | |
if 'Члены' in group.position_in_group: | |
output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group.replace("Члены", "Член")}\n' | |
else: | |
output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group}\n' | |
output_text += f'\n\\\n\n' | |
count += 1 | |
if answer_chunks.groups_search is not None: | |
output_text += f'Документ: [{count + 1}]\n' | |
output_text += f'Название документа: Информация о группе\n' | |
output_text += f'Название группы: {answer_chunks.groups_search.group_name}\n' | |
if len(answer_chunks.groups_search.group_composition) > 1: | |
output_text += f'\t ФИО \t\t\t| Должность внутри группы\n' | |
for person_data in answer_chunks.groups_search.group_composition: | |
if 'Члены' in person_data.position_in_group: | |
output_text += f'{person_data.person_name:<{20}}| {person_data.position_in_group.replace("Члены", "Член")}\n' | |
else: | |
output_text += f'{person_data.person_name:<{20}}| {person_data.position_in_group}\n' | |
output_text += f'\n\\\n\n' | |
count += 1 | |
if answer_chunks.rocks_nn_search is not None: | |
output_text += f'Документ: [{count + 1}]\n' | |
output_text += f'Название документа: Информация о {answer_chunks.rocks_nn_search.division}\n' | |
output_text += f'Название документа: В РОКС НН {answer_chunks.rocks_nn_search.division} входят:\n' | |
for company_name in answer_chunks.rocks_nn_search.company_name: | |
output_text += f'{company_name}\n' | |
output_text += f'\n\\\n\n' | |
count += 1 | |
if answer_chunks.segmentation_search is not None: | |
output_text += f'Документ: [{count + 1}]\n' | |
output_text += f'Название документа: {answer_chunks.segmentation_search.segmentation_model}\n' | |
output_text += f'Название документа: В {answer_chunks.segmentation_search.segmentation_model} входят:\n' | |
for company_name in answer_chunks.segmentation_search.company_name: | |
output_text += f'{company_name}\n' | |
output_text += f'\n\\\n\n' | |
count += 1 | |
output_text = output_text.replace('\uf02d', '-').replace('', '-') | |
return output_text | |