Spaces:
Sleeping
Sleeping
import os | |
import re | |
from logging import Logger | |
from typing import List, Union | |
from openai import OpenAI | |
from common.configuration import FilterChunks, LLMConfiguration, SummaryChunks | |
from components.nmd.aggregate_answers import preprocessed_chunks | |
class LLMChunkSearch: | |
def __init__(self, config: LLMConfiguration, prompt: str, logger: Logger): | |
self.config = config | |
self.logger = logger | |
self.prompt = prompt | |
self.pattern = r'\d+' | |
self.pattern_list = [ | |
r'\[\d+\]', | |
r'Ответ: [1-9]', | |
r'Ответ [1-9]', | |
r'Ответ[1-9]', | |
r'Ответ:[1-9]', | |
r'Ответ: \[\d+\]', | |
] | |
# Initialize OpenAI client | |
if self.config.base_url is not None: | |
self.client = OpenAI( | |
base_url=self.config.base_url, | |
api_key=os.getenv(self.config.api_key_env) | |
) | |
else: | |
self.client = None | |
def llm_chunk_search(self, query: str, answer_chunks: SummaryChunks, prompt: str): | |
""" | |
Args: | |
query: User query | |
answer_chunks: Retrieved chunks to process | |
prompt: System prompt template | |
Returns: | |
Tuple containing processed chunks, LLM response, prompt used, and token count | |
""" | |
text_chunks = preprocessed_chunks( | |
answer_chunks, self.config.base_url, self.logger | |
) | |
self.logger.info('Searching LLM Chunks') | |
if self.client is None: | |
return ( | |
text_chunks, | |
self.__postprocessing_answer_llm(answer_chunks), | |
prompt, | |
0 | |
) | |
llm_prompt = prompt.format(query=query, answer=text_chunks) | |
for i in range(5): | |
try: | |
response = self.client.chat.completions.create( | |
model=self.config.model, | |
messages=[ | |
{"role": "system", "content": prompt}, | |
{"role": "user", "content": query} | |
], | |
temperature=self.config.temperature, | |
top_p=self.config.top_p, | |
frequency_penalty=self.config.frequency_penalty, | |
presence_penalty=self.config.presence_penalty, | |
seed=self.config.seed | |
) | |
answer_llm = response.choices[0].message.content | |
count_tokens = response.usage.total_tokens | |
self.logger.info(f'Answer LLM {answer_llm}') | |
# Process the response | |
if re.search('%%', answer_llm): | |
index = re.search('%%', answer_llm).span()[1] | |
answer_llm = answer_llm[index:] | |
if re.search('Конец ответа', answer_llm): | |
index = re.search('Конец ответа', answer_llm).span()[1] | |
answer_llm = answer_llm[:index] | |
return text_chunks, answer_llm, llm_prompt, count_tokens | |
except Exception as e: | |
self.logger.error(f"Attempt {i+1} failed: {str(e)}") | |
if i == 4: | |
self.logger.error("All attempts failed") | |
return ( | |
text_chunks, | |
self.__postprocessing_answer_llm(answer_chunks), | |
llm_prompt, | |
0 | |
) | |
def __postprocessing_answer_llm(answer_chunks: Union[SummaryChunks, List]) -> str: | |
""" | |
Postprocess the answer chunks into a formatted string | |
Args: | |
answer_chunks: Chunks to process | |
Returns: | |
Formatted string response | |
""" | |
output_text = '' | |
if isinstance(answer_chunks, SummaryChunks): | |
if len(answer_chunks.doc_chunks) == 0: | |
# TODO: Протестировать как работает и исправить на уведомление о БД и ли | |
return 'БАЗА ДАННЫХ ПУСТА' | |
if answer_chunks.doc_chunks is not None: | |
doc = answer_chunks.doc_chunks[0] | |
output_text += f'Документ: [1]\n' | |
if doc.title != 'unknown': | |
output_text += f'Название документа: {doc.title}\n' | |
else: | |
output_text += f'Название документа: {doc.filename}\n' | |
for chunk in doc.chunks: | |
if len(chunk.other_info): | |
for i in chunk.other_info: | |
output_text += f'{i}' | |
else: | |
output_text += f'{chunk.text_answer}' | |
output_text += '\n\n' | |
else: | |
doc = answer_chunks.people_search[0] | |
output_text += ( | |
f'Название документа: Информация о сотруднике {doc.person_name}\n' | |
) | |
if doc.organizatinal_structure is not None: | |
for organizatinal_structure in doc.organizatinal_structure: | |
output_text += '(' | |
if organizatinal_structure.position != 'undefined': | |
output_text += ( | |
f'Должность: {organizatinal_structure.position}\n' | |
) | |
if organizatinal_structure.leads is not None: | |
output_text += f'Руководит следующими сотрудниками:\n' | |
for lead in organizatinal_structure.leads: | |
if lead.person != "undefined": | |
output_text += f'{lead.person}\n' | |
if ( | |
organizatinal_structure.subordinates.person_name | |
!= "undefined" | |
): | |
output_text += f'Руководителем {doc.person_name} является {organizatinal_structure.subordinates.person_name}\n' | |
output_text += ')' | |
if doc.business_processes is not None: | |
if len(doc.business_processes) >= 2: | |
output_text += f'Отвечает за Бизнес процессы:\n' | |
else: | |
output_text += f'Отвечает за Бизнес процесс: ' | |
for process in doc.business_processes: | |
output_text += f'{process.processes_name}\n' | |
if doc.business_curator is not None: | |
output_text += 'Является Бизнес-куратором (РОКС НН):\n' | |
for curator in doc.business_curator: | |
output_text += f'{curator.company_name}' | |
if doc.groups is not None: | |
if len(doc.groups) >= 2: | |
output_text += 'Входит в состав групп:\n' | |
else: | |
output_text += 'Входит в состав группы:\n' | |
for group in doc.groups: | |
if 'Члены' in group.position_in_group: | |
output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group.replace("Члены", "Член")}\n' | |
else: | |
output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group}\n' | |
output_text += f'\\\n\n' | |
else: | |
if isinstance(answer_chunks[0], FilterChunks): | |
doc = answer_chunks[0] | |
output_text += f'Документ: [1]\n' | |
if doc.title != 'unknown': | |
output_text += f'Название документа: {doc.title}\n' | |
for chunk in doc.chunks: | |
if len(chunk.other_info): | |
for i in chunk.other_info: | |
output_text += f'{i}' | |
else: | |
output_text += f'{chunk.text_answer}' | |
output_text += '\n\n' | |
else: | |
doc = answer_chunks[0] | |
output_text += f'Информация о сотруднике {doc.person_name}\n' | |
if doc.organizatinal_structure is not None: | |
for organizatinal_structure in doc.organizatinal_structure: | |
output_text += ( | |
f'Должность: {organizatinal_structure.position}\n' | |
) | |
if organizatinal_structure.leads is not None: | |
output_text += f'Руководит следующими сотрудниками:\n' | |
for lead in organizatinal_structure.leads: | |
if lead.person != "undefined": | |
output_text += f'{lead.person}\n' | |
if ( | |
organizatinal_structure.subordinates.person_name | |
!= "undefined" | |
): | |
output_text += f'Руководителем {doc.person_name} является {organizatinal_structure.subordinates.person_name}\n' | |
if doc.business_processes is not None: | |
if len(doc.business_processes) >= 2: | |
output_text += f'Отвечает за Бизнес процессы:\n' | |
else: | |
output_text += f'Отвечает за Бизнес процесс: ' | |
for process in doc.business_processes: | |
output_text += f'{process.processes_name}\n' | |
if doc.business_curator is not None: | |
output_text += 'Является Бизнес-куратором (РОКС НН):\n' | |
for curator in doc.business_curator: | |
output_text += f'{curator.company_name}' | |
if doc.groups is not None: | |
if len(doc.groups) >= 2: | |
output_text += 'Входит в состав групп:\n' | |
else: | |
output_text += 'Входит в состав группы:\n' | |
for group in doc.groups: | |
if 'Члены' in group.position_in_group: | |
output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group.replace("Члены", "Член")}\n' | |
else: | |
output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group}\n' | |
output_text += f'\\\n\n' | |
return output_text | |