muryshev commited on
Commit
e474712
·
1 Parent(s): 6f462bc
components/llm/common.py CHANGED
@@ -72,7 +72,7 @@ class LlmApi:
72
  class Message(BaseModel):
73
  role: str
74
  content: str
75
- searchResults: List[str]
76
 
77
  class ChatRequest(BaseModel):
78
  history: List[Message]
 
72
  class Message(BaseModel):
73
  role: str
74
  content: str
75
+ searchResults: str
76
 
77
  class ChatRequest(BaseModel):
78
  history: List[Message]
components/llm/deepinfra_api.py CHANGED
@@ -310,8 +310,8 @@ class DeepInfraApi(LlmApi):
310
  Yields:
311
  str: Токены ответа LLM.
312
  """
313
- params
314
- async with httpx.AsyncClient() as client:
315
  request_data = self.create_chat_request(request, system_prompt, params)
316
  request_data["stream"] = True
317
 
@@ -319,7 +319,7 @@ class DeepInfraApi(LlmApi):
319
  "POST",
320
  f"{self.params.url}/v1/openai/chat/completions",
321
  json=request_data,
322
- headers=super().create_headers()
323
  ) as response:
324
  if response.status_code != 200:
325
  error_content = await response.aread()
 
310
  Yields:
311
  str: Токены ответа LLM.
312
  """
313
+ timeout = httpx.Timeout(connect=30.0, read=None, pool=None, write=None, timeout=None)
314
+ async with httpx.AsyncClient(timeout=timeout) as client:
315
  request_data = self.create_chat_request(request, system_prompt, params)
316
  request_data["stream"] = True
317
 
 
319
  "POST",
320
  f"{self.params.url}/v1/openai/chat/completions",
321
  json=request_data,
322
+ headers=super().create_headers(),
323
  ) as response:
324
  if response.status_code != 200:
325
  error_content = await response.aread()
components/llm/utils.py CHANGED
@@ -19,7 +19,7 @@ def convert_to_openai_format(request: ChatRequest, system_prompt: str) -> List[D
19
  for message in request.history:
20
  content = message.content
21
  if message.searchResults:
22
- search_results = "\n".join(message.searchResults)
23
  content += f"\n<search-results>\n{search_results}\n</search-results>"
24
 
25
  openai_history.append({
@@ -45,7 +45,7 @@ def append_llm_response_to_history(history: ChatRequest, llm_response: str) -> C
45
  assistant_message = Message(
46
  role="assistant",
47
  content=llm_response,
48
- searchResults=[] # Пустой список, если searchResults не предоставлены
49
  )
50
 
51
  # Добавляем сообщение в историю
 
19
  for message in request.history:
20
  content = message.content
21
  if message.searchResults:
22
+ search_results = "\n" + message.searchResults
23
  content += f"\n<search-results>\n{search_results}\n</search-results>"
24
 
25
  openai_history.append({
 
45
  assistant_message = Message(
46
  role="assistant",
47
  content=llm_response,
48
+ searchResults="" # Пустой список, если searchResults не предоставлены
49
  )
50
 
51
  # Добавляем сообщение в историю
components/services/dataset.py CHANGED
@@ -586,7 +586,6 @@ class DatasetService:
586
 
587
  def get_current_dataset(self) -> Dataset | None:
588
  with self.db() as session:
589
- print(session)
590
  result = session.query(Dataset).filter(Dataset.is_active == True).first()
591
  return result
592
 
 
586
 
587
  def get_current_dataset(self) -> Dataset | None:
588
  with self.db() as session:
 
589
  result = session.query(Dataset).filter(Dataset.is_active == True).first()
590
  return result
591
 
config_dev.yaml CHANGED
@@ -67,7 +67,7 @@ bd:
67
  llm:
68
  base_url: !ENV ${LLM_BASE_URL:https://api.deepinfra.com}
69
  api_key_env: !ENV ${API_KEY_ENV:DEEPINFRA_API_KEY}
70
- model: !ENV ${MODEL_NAME:meta-llama/Llama-3.3-70B-Instruct-Turbo}
71
  tokenizer_name: !ENV ${TOKENIZER_NAME:unsloth/Llama-3.3-70B-Instruct}
72
  temperature: 0.14
73
  top_p: 0.95
 
67
  llm:
68
  base_url: !ENV ${LLM_BASE_URL:https://api.deepinfra.com}
69
  api_key_env: !ENV ${API_KEY_ENV:DEEPINFRA_API_KEY}
70
+ model: !ENV ${MODEL_NAME:meta-llama/Llama-3.3-70B-Instruct}
71
  tokenizer_name: !ENV ${TOKENIZER_NAME:unsloth/Llama-3.3-70B-Instruct}
72
  temperature: 0.14
73
  top_p: 0.95
routes/llm.py CHANGED
@@ -67,6 +67,46 @@ def insert_search_results_to_message(
67
  msg.content = new_content
68
  return True
69
  return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  async def sse_generator(request: ChatRequest, llm_api: DeepInfraApi, system_prompt: str,
72
  predict_params: LlmPredictParams,
@@ -75,9 +115,13 @@ async def sse_generator(request: ChatRequest, llm_api: DeepInfraApi, system_prom
75
  """
76
  Генератор для стриминга ответа LLM через SSE.
77
  """
 
78
  # Обработка поиска
79
  last_query = get_last_user_message(request)
 
 
80
  if last_query:
 
81
  dataset = dataset_service.get_current_dataset()
82
  if dataset is None:
83
  raise HTTPException(status_code=400, detail="Dataset not found")
@@ -86,17 +130,22 @@ async def sse_generator(request: ChatRequest, llm_api: DeepInfraApi, system_prom
86
  text_chunks = entity_service.build_text(chunks, scores)
87
  search_results_event = {
88
  "event": "search_results",
89
- "data": f"\n<search-results>\n{text_chunks}\n</search-results>"
90
  }
91
  yield f"data: {json.dumps(search_results_event, ensure_ascii=False)}\n\n"
92
 
93
- new_message = f'{last_query.content}\n<search-results>\n{text_chunks}\n</search-results>'
94
- insert_search_results_to_message(request, new_message)
95
-
 
 
 
 
 
96
  # Стриминг токенов ответа
97
- async for token in llm_api.get_predict_chat_generator(request, system_prompt, predict_params):
98
  token_event = {"event": "token", "data": token}
99
- logger.info(f"Streaming token: {token}")
100
  yield f"data: {json.dumps(token_event, ensure_ascii=False)}\n\n"
101
 
102
  # Финальное событие
 
67
  msg.content = new_content
68
  return True
69
  return False
70
+
71
+ def try_insert_search_results(
72
+ chat_request: ChatRequest, search_results: str
73
+ ) -> bool:
74
+ for msg in reversed(chat_request.history):
75
+ if msg.role == "user" and not msg.searchResults:
76
+ msg.searchResults = search_results
77
+ return True
78
+ return False
79
+
80
+ def collapse_history_to_first_message(chat_request: ChatRequest) -> ChatRequest:
81
+ """
82
+ Сворачивает историю в первое сообщение и возвращает новый объект ChatRequest.
83
+ Формат:
84
+ <search-results>[Источник] - текст</search-results>
85
+ role: текст сообщения
86
+ """
87
+ if not chat_request.history:
88
+ return ChatRequest(history=[])
89
+
90
+ # Собираем историю в одну строку
91
+ collapsed_content = []
92
+ for msg in chat_request.history:
93
+ # Добавляем search-results, если они есть
94
+ if msg.searchResults:
95
+ collapsed_content.append(f"<search-results>{msg.searchResults}</search-results>")
96
+ # Добавляем текст сообщения с указанием роли
97
+ if msg.content.strip():
98
+ collapsed_content.append(f"{msg.role}: {msg.content.strip()}")
99
+
100
+ # Формируем финальный текст с переносами строк
101
+ new_content = "\n".join(collapsed_content)
102
+
103
+ # Создаем новое сообщение и новый объект ChatRequest
104
+ new_message = Message(
105
+ role='user',
106
+ content=new_content,
107
+ searchResults=''
108
+ )
109
+ return ChatRequest(history=[new_message])
110
 
111
  async def sse_generator(request: ChatRequest, llm_api: DeepInfraApi, system_prompt: str,
112
  predict_params: LlmPredictParams,
 
115
  """
116
  Генератор для стриминга ответа LLM через SSE.
117
  """
118
+
119
  # Обработка поиска
120
  last_query = get_last_user_message(request)
121
+
122
+
123
  if last_query:
124
+
125
  dataset = dataset_service.get_current_dataset()
126
  if dataset is None:
127
  raise HTTPException(status_code=400, detail="Dataset not found")
 
130
  text_chunks = entity_service.build_text(chunks, scores)
131
  search_results_event = {
132
  "event": "search_results",
133
+ "data": f"{text_chunks}"
134
  }
135
  yield f"data: {json.dumps(search_results_event, ensure_ascii=False)}\n\n"
136
 
137
+ # new_message = f'<search-results>\n{text_chunks}\n</search-results>\n{last_query.content}'
138
+
139
+ try_insert_search_results(request, text_chunks)
140
+
141
+
142
+ # Сворачиваем историю в первое сообщение
143
+ collapsed_request = collapse_history_to_first_message(request)
144
+
145
  # Стриминг токенов ответа
146
+ async for token in llm_api.get_predict_chat_generator(collapsed_request, system_prompt, predict_params):
147
  token_event = {"event": "token", "data": token}
148
+ # logger.info(f"Streaming token: {token}")
149
  yield f"data: {json.dumps(token_event, ensure_ascii=False)}\n\n"
150
 
151
  # Финальное событие