Shreyas094 commited on
Commit
b951f8f
·
verified ·
1 Parent(s): 3c6b68b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -23
app.py CHANGED
@@ -21,6 +21,15 @@ from langchain_community.llms import HuggingFaceHub
21
  from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings, LlmStructuredOutputType
22
  from pydantic import BaseModel, Field
23
  from llama_cpp_agent.llm_output_settings import LlmStructuredOutputType
 
 
 
 
 
 
 
 
 
24
 
25
  print("Available LlmStructuredOutputType options:")
26
  for option in LlmStructuredOutputType:
@@ -216,55 +225,114 @@ def get_messages_formatter_type(model_name):
216
  def respond(
217
  message,
218
  history: list[tuple[str, str]],
 
219
  system_message,
220
  max_tokens,
221
  temperature,
222
  top_p,
 
223
  repeat_penalty,
224
- top_k=50,
225
- max_tokens_per_summary=2048
226
  ):
227
- model = get_model(temperature, top_p, repeat_penalty, top_k, max_tokens, max_tokens_per_summary)
228
- chat_template = MessagesFormatterType.MISTRAL
229
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  search_tool = WebSearchTool(
231
- llm_provider=model,
232
  message_formatter_type=chat_template,
233
  max_tokens_search_results=12000,
234
  max_tokens_per_summary=2048,
235
  )
236
 
237
- messages = BasicChatHistory()
 
 
 
 
 
238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  for msn in history:
240
  user = {"role": Roles.user, "content": msn[0]}
241
  assistant = {"role": Roles.assistant, "content": msn[1]}
242
  messages.add_message(user)
243
  messages.add_message(assistant)
244
 
245
- # Perform web search
246
- search_result = search_tool.run(message)
 
 
 
 
 
 
247
 
248
  outputs = ""
 
 
 
 
 
 
 
 
 
 
249
 
250
- # Generate response
251
- response_prompt = f"""Write a detailed and complete research document that fulfills the following user request: '{message}', based on the information from the web below.
252
-
253
- {search_result}
254
 
255
- Respond in a clear and concise manner, citing sources where appropriate."""
 
 
256
 
257
- response = model(response_prompt)
258
- outputs += response
 
 
 
 
 
 
 
259
 
260
- # Generate citations
261
- citation_prompt = "Cite the sources you used in your response."
262
- citing_sources = model(citation_prompt)
263
-
264
  outputs += "\n\nSources:\n"
265
- outputs += citing_sources
266
-
267
- return outputs
268
 
269
 
270
  # Gradio interface
 
21
  from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings, LlmStructuredOutputType
22
  from pydantic import BaseModel, Field
23
  from llama_cpp_agent.llm_output_settings import LlmStructuredOutputType
24
+ from llama_cpp import Llama
25
+ from llama_cpp_agent import LlamaCppPythonProvider, LlamaCppAgent
26
+ from llama_cpp_agent.chat_history import BasicChatHistory
27
+ from llama_cpp_agent.chat_history.messages import Roles
28
+ from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings, LlmStructuredOutputType
29
+ from llama_cpp_agent.tools import WebSearchTool
30
+ from llama_cpp_agent.prompt_templates import web_search_system_prompt, research_system_prompt
31
+ from pydantic import BaseModel, Field
32
+ from typing import List
33
 
34
  print("Available LlmStructuredOutputType options:")
35
  for option in LlmStructuredOutputType:
 
225
  def respond(
226
  message,
227
  history: list[tuple[str, str]],
228
+ model,
229
  system_message,
230
  max_tokens,
231
  temperature,
232
  top_p,
233
+ top_k,
234
  repeat_penalty,
 
 
235
  ):
236
+ global llm
237
+ global llm_model
238
+
239
+ chat_template = get_messages_formatter_type(model)
240
+
241
+ if llm is None or llm_model != model:
242
+ llm = Llama(
243
+ model_path=f"models/{model}",
244
+ flash_attn=True,
245
+ n_gpu_layers=81,
246
+ n_batch=1024,
247
+ n_ctx=get_context_by_model(model),
248
+ )
249
+ llm_model = model
250
+
251
+ provider = LlamaCppPythonProvider(llm)
252
+ logging.info(f"Loaded chat examples: {chat_template}")
253
+
254
  search_tool = WebSearchTool(
255
+ llm_provider=provider,
256
  message_formatter_type=chat_template,
257
  max_tokens_search_results=12000,
258
  max_tokens_per_summary=2048,
259
  )
260
 
261
+ web_search_agent = LlamaCppAgent(
262
+ provider,
263
+ system_prompt=web_search_system_prompt,
264
+ predefined_messages_formatter_type=chat_template,
265
+ debug_output=True,
266
+ )
267
 
268
+ answer_agent = LlamaCppAgent(
269
+ provider,
270
+ system_prompt=research_system_prompt,
271
+ predefined_messages_formatter_type=chat_template,
272
+ debug_output=True,
273
+ )
274
+
275
+ settings = provider.get_provider_default_settings()
276
+ settings.stream = False
277
+ settings.temperature = temperature
278
+ settings.top_k = top_k
279
+ settings.top_p = top_p
280
+ settings.max_tokens = max_tokens
281
+ settings.repeat_penalty = repeat_penalty
282
+
283
+ output_settings = LlmStructuredOutputSettings.from_functions(
284
+ [search_tool.get_tool()]
285
+ )
286
+
287
+ messages = BasicChatHistory()
288
  for msn in history:
289
  user = {"role": Roles.user, "content": msn[0]}
290
  assistant = {"role": Roles.assistant, "content": msn[1]}
291
  messages.add_message(user)
292
  messages.add_message(assistant)
293
 
294
+ result = web_search_agent.get_chat_response(
295
+ message,
296
+ llm_sampling_settings=settings,
297
+ structured_output_settings=output_settings,
298
+ add_message_to_chat_history=False,
299
+ add_response_to_chat_history=False,
300
+ print_output=False,
301
+ )
302
 
303
  outputs = ""
304
+ settings.stream = True
305
+
306
+ response_text = answer_agent.get_chat_response(
307
+ f"Write a detailed and complete research document that fulfills the following user request: '{message}', based on the information from the web below.\n\n" + result[0]["return_value"],
308
+ role=Roles.tool,
309
+ llm_sampling_settings=settings,
310
+ chat_history=messages,
311
+ returns_streaming_generator=True,
312
+ print_output=False,
313
+ )
314
 
315
+ for text in response_text:
316
+ outputs += text
317
+ yield outputs
 
318
 
319
+ output_settings = LlmStructuredOutputSettings.from_pydantic_models(
320
+ [CitingSources], LlmStructuredOutputType.object_instance
321
+ )
322
 
323
+ citing_sources = answer_agent.get_chat_response(
324
+ "Cite the sources you used in your response.",
325
+ role=Roles.tool,
326
+ llm_sampling_settings=settings,
327
+ chat_history=messages,
328
+ returns_streaming_generator=False,
329
+ structured_output_settings=output_settings,
330
+ print_output=False,
331
+ )
332
 
 
 
 
 
333
  outputs += "\n\nSources:\n"
334
+ outputs += "\n".join(citing_sources.sources)
335
+ yield outputs
 
336
 
337
 
338
  # Gradio interface