Sentinel-AI-Beta-Test

Sleeping

App Files Files Community

Shreyas094 commited on Jul 23, 2024

Commit

b951f8f

verified ·

1 Parent(s): 3c6b68b

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -23

app.py CHANGED Viewed

@@ -21,6 +21,15 @@ from langchain_community.llms import HuggingFaceHub
 from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings, LlmStructuredOutputType
 from pydantic import BaseModel, Field
 from llama_cpp_agent.llm_output_settings import LlmStructuredOutputType
 print("Available LlmStructuredOutputType options:")
 for option in LlmStructuredOutputType:
@@ -216,55 +225,114 @@ def get_messages_formatter_type(model_name):
 def respond(
     message,
     history: list[tuple[str, str]],
     system_message,
     max_tokens,
     temperature,
     top_p,
     repeat_penalty,
-    top_k=50,
-    max_tokens_per_summary=2048
 ):
-    model = get_model(temperature, top_p, repeat_penalty, top_k, max_tokens, max_tokens_per_summary)
-    chat_template = MessagesFormatterType.MISTRAL
     search_tool = WebSearchTool(
-        llm_provider=model,
         message_formatter_type=chat_template,
         max_tokens_search_results=12000,
         max_tokens_per_summary=2048,
     )
-    messages = BasicChatHistory()
     for msn in history:
         user = {"role": Roles.user, "content": msn[0]}
         assistant = {"role": Roles.assistant, "content": msn[1]}
         messages.add_message(user)
         messages.add_message(assistant)
-    # Perform web search
-    search_result = search_tool.run(message)
     outputs = ""
-    # Generate response
-    response_prompt = f"""Write a detailed and complete research document that fulfills the following user request: '{message}', based on the information from the web below.
-    {search_result}
-    Respond in a clear and concise manner, citing sources where appropriate."""
-    response = model(response_prompt)
-    outputs += response
-    # Generate citations
-    citation_prompt = "Cite the sources you used in your response."
-    citing_sources = model(citation_prompt)
     outputs += "\n\nSources:\n"
-    outputs += citing_sources
-    return outputs
 # Gradio interface

 from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings, LlmStructuredOutputType
 from pydantic import BaseModel, Field
 from llama_cpp_agent.llm_output_settings import LlmStructuredOutputType
+from llama_cpp import Llama
+from llama_cpp_agent import LlamaCppPythonProvider, LlamaCppAgent
+from llama_cpp_agent.chat_history import BasicChatHistory
+from llama_cpp_agent.chat_history.messages import Roles
+from llama_cpp_agent.llm_output_settings import LlmStructuredOutputSettings, LlmStructuredOutputType
+from llama_cpp_agent.tools import WebSearchTool
+from llama_cpp_agent.prompt_templates import web_search_system_prompt, research_system_prompt
+from pydantic import BaseModel, Field
+from typing import List
 print("Available LlmStructuredOutputType options:")
 for option in LlmStructuredOutputType:
 def respond(
     message,
     history: list[tuple[str, str]],
+    model,
     system_message,
     max_tokens,
     temperature,
     top_p,
+    top_k,
     repeat_penalty,
 ):
+    global llm
+    global llm_model
+    chat_template = get_messages_formatter_type(model)
+    if llm is None or llm_model != model:
+        llm = Llama(
+            model_path=f"models/{model}",
+            flash_attn=True,
+            n_gpu_layers=81,
+            n_batch=1024,
+            n_ctx=get_context_by_model(model),
+        )
+        llm_model = model
+    provider = LlamaCppPythonProvider(llm)
+    logging.info(f"Loaded chat examples: {chat_template}")
     search_tool = WebSearchTool(
+        llm_provider=provider,
         message_formatter_type=chat_template,
         max_tokens_search_results=12000,
         max_tokens_per_summary=2048,
     )
+    web_search_agent = LlamaCppAgent(
+        provider,
+        system_prompt=web_search_system_prompt,
+        predefined_messages_formatter_type=chat_template,
+        debug_output=True,
+    )
+    answer_agent = LlamaCppAgent(
+        provider,
+        system_prompt=research_system_prompt,
+        predefined_messages_formatter_type=chat_template,
+        debug_output=True,
+    )
+    settings = provider.get_provider_default_settings()
+    settings.stream = False
+    settings.temperature = temperature
+    settings.top_k = top_k
+    settings.top_p = top_p
+    settings.max_tokens = max_tokens
+    settings.repeat_penalty = repeat_penalty
+    output_settings = LlmStructuredOutputSettings.from_functions(
+        [search_tool.get_tool()]
+    )
+    messages = BasicChatHistory()
     for msn in history:
         user = {"role": Roles.user, "content": msn[0]}
         assistant = {"role": Roles.assistant, "content": msn[1]}
         messages.add_message(user)
         messages.add_message(assistant)
+    result = web_search_agent.get_chat_response(
+        message,
+        llm_sampling_settings=settings,
+        structured_output_settings=output_settings,
+        add_message_to_chat_history=False,
+        add_response_to_chat_history=False,
+        print_output=False,
+    )
     outputs = ""
+    settings.stream = True
+    response_text = answer_agent.get_chat_response(
+        f"Write a detailed and complete research document that fulfills the following user request: '{message}', based on the information from the web below.\n\n" + result[0]["return_value"],
+        role=Roles.tool,
+        llm_sampling_settings=settings,
+        chat_history=messages,
+        returns_streaming_generator=True,
+        print_output=False,
+    )
+    for text in response_text:
+        outputs += text
+        yield outputs
+    output_settings = LlmStructuredOutputSettings.from_pydantic_models(
+        [CitingSources], LlmStructuredOutputType.object_instance
+    )
+    citing_sources = answer_agent.get_chat_response(
+        "Cite the sources you used in your response.",
+        role=Roles.tool,
+        llm_sampling_settings=settings,
+        chat_history=messages,
+        returns_streaming_generator=False,
+        structured_output_settings=output_settings,
+        print_output=False,
+    )
     outputs += "\n\nSources:\n"
+    outputs += "\n".join(citing_sources.sources)
+    yield outputs
 # Gradio interface