import os import re from logging import Logger from typing import List, Union from openai import OpenAI from common.configuration import FilterChunks, LLMConfiguration, SummaryChunks from components.nmd.aggregate_answers import preprocessed_chunks class LLMChunkSearch: def __init__(self, config: LLMConfiguration, prompt: str, logger: Logger): self.config = config self.logger = logger self.prompt = prompt self.pattern = r'\d+' self.pattern_list = [ r'\[\d+\]', r'Ответ: [1-9]', r'Ответ [1-9]', r'Ответ[1-9]', r'Ответ:[1-9]', r'Ответ: \[\d+\]', ] # Initialize OpenAI client if self.config.base_url is not None: self.client = OpenAI( base_url=self.config.base_url, api_key=os.getenv(self.config.api_key_env) ) else: self.client = None def llm_chunk_search(self, query: str, answer_chunks: SummaryChunks, prompt: str): """ Args: query: User query answer_chunks: Retrieved chunks to process prompt: System prompt template Returns: Tuple containing processed chunks, LLM response, prompt used, and token count """ text_chunks = preprocessed_chunks( answer_chunks, self.config.base_url, self.logger ) self.logger.info('Searching LLM Chunks') if self.client is None: return ( text_chunks, self.__postprocessing_answer_llm(answer_chunks), prompt, 0 ) llm_prompt = prompt.format(query=query, answer=text_chunks) for i in range(5): try: response = self.client.chat.completions.create( model=self.config.model, messages=[ {"role": "system", "content": prompt}, {"role": "user", "content": query} ], temperature=self.config.temperature, top_p=self.config.top_p, frequency_penalty=self.config.frequency_penalty, presence_penalty=self.config.presence_penalty, seed=self.config.seed ) answer_llm = response.choices[0].message.content count_tokens = response.usage.total_tokens self.logger.info(f'Answer LLM {answer_llm}') # Process the response if re.search('%%', answer_llm): index = re.search('%%', answer_llm).span()[1] answer_llm = answer_llm[index:] if re.search('Конец ответа', answer_llm): index = re.search('Конец ответа', answer_llm).span()[1] answer_llm = answer_llm[:index] return text_chunks, answer_llm, llm_prompt, count_tokens except Exception as e: self.logger.error(f"Attempt {i+1} failed: {str(e)}") if i == 4: self.logger.error("All attempts failed") return ( text_chunks, self.__postprocessing_answer_llm(answer_chunks), llm_prompt, 0 ) @staticmethod def __postprocessing_answer_llm(answer_chunks: Union[SummaryChunks, List]) -> str: """ Postprocess the answer chunks into a formatted string Args: answer_chunks: Chunks to process Returns: Formatted string response """ output_text = '' if isinstance(answer_chunks, SummaryChunks): if len(answer_chunks.doc_chunks) == 0: # TODO: Протестировать как работает и исправить на уведомление о БД и ли return 'БАЗА ДАННЫХ ПУСТА' if answer_chunks.doc_chunks is not None: doc = answer_chunks.doc_chunks[0] output_text += f'Документ: [1]\n' if doc.title != 'unknown': output_text += f'Название документа: {doc.title}\n' else: output_text += f'Название документа: {doc.filename}\n' for chunk in doc.chunks: if len(chunk.other_info): for i in chunk.other_info: output_text += f'{i}' else: output_text += f'{chunk.text_answer}' output_text += '\n\n' else: doc = answer_chunks.people_search[0] output_text += ( f'Название документа: Информация о сотруднике {doc.person_name}\n' ) if doc.organizatinal_structure is not None: for organizatinal_structure in doc.organizatinal_structure: output_text += '(' if organizatinal_structure.position != 'undefined': output_text += ( f'Должность: {organizatinal_structure.position}\n' ) if organizatinal_structure.leads is not None: output_text += f'Руководит следующими сотрудниками:\n' for lead in organizatinal_structure.leads: if lead.person != "undefined": output_text += f'{lead.person}\n' if ( organizatinal_structure.subordinates.person_name != "undefined" ): output_text += f'Руководителем {doc.person_name} является {organizatinal_structure.subordinates.person_name}\n' output_text += ')' if doc.business_processes is not None: if len(doc.business_processes) >= 2: output_text += f'Отвечает за Бизнес процессы:\n' else: output_text += f'Отвечает за Бизнес процесс: ' for process in doc.business_processes: output_text += f'{process.processes_name}\n' if doc.business_curator is not None: output_text += 'Является Бизнес-куратором (РОКС НН):\n' for curator in doc.business_curator: output_text += f'{curator.company_name}' if doc.groups is not None: if len(doc.groups) >= 2: output_text += 'Входит в состав групп:\n' else: output_text += 'Входит в состав группы:\n' for group in doc.groups: if 'Члены' in group.position_in_group: output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group.replace("Члены", "Член")}\n' else: output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group}\n' output_text += f'\\\n\n' else: if isinstance(answer_chunks[0], FilterChunks): doc = answer_chunks[0] output_text += f'Документ: [1]\n' if doc.title != 'unknown': output_text += f'Название документа: {doc.title}\n' for chunk in doc.chunks: if len(chunk.other_info): for i in chunk.other_info: output_text += f'{i}' else: output_text += f'{chunk.text_answer}' output_text += '\n\n' else: doc = answer_chunks[0] output_text += f'Информация о сотруднике {doc.person_name}\n' if doc.organizatinal_structure is not None: for organizatinal_structure in doc.organizatinal_structure: output_text += ( f'Должность: {organizatinal_structure.position}\n' ) if organizatinal_structure.leads is not None: output_text += f'Руководит следующими сотрудниками:\n' for lead in organizatinal_structure.leads: if lead.person != "undefined": output_text += f'{lead.person}\n' if ( organizatinal_structure.subordinates.person_name != "undefined" ): output_text += f'Руководителем {doc.person_name} является {organizatinal_structure.subordinates.person_name}\n' if doc.business_processes is not None: if len(doc.business_processes) >= 2: output_text += f'Отвечает за Бизнес процессы:\n' else: output_text += f'Отвечает за Бизнес процесс: ' for process in doc.business_processes: output_text += f'{process.processes_name}\n' if doc.business_curator is not None: output_text += 'Является Бизнес-куратором (РОКС НН):\n' for curator in doc.business_curator: output_text += f'{curator.company_name}' if doc.groups is not None: if len(doc.groups) >= 2: output_text += 'Входит в состав групп:\n' else: output_text += 'Входит в состав группы:\n' for group in doc.groups: if 'Члены' in group.position_in_group: output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group.replace("Члены", "Член")}\n' else: output_text += f'{group.group_name}. Должность внутри группы: {group.position_in_group}\n' output_text += f'\\\n\n' return output_text