Qwen2.5-0.5B-Rag-Thinking-Flan-T5

Running

App Files Files Community

Akjava commited on Mar 17

Commit

fbeaa20

verified ·

1 Parent(s): e761993

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -4

app.py CHANGED Viewed

@@ -14,11 +14,75 @@ from llama_cpp_agent.chat_history import BasicChatHistory
 from llama_cpp_agent.chat_history.messages import Roles
 import gradio as gr
 from huggingface_hub import hf_hub_download
-from typing import List, Tuple
 from logger import logging
 from exception import CustomExceptionHandling
 # Download gguf model files
 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
@@ -88,10 +152,21 @@ def respond(
             llm_model = model
         provider = LlamaCppPythonProvider(llm)
         # Create the agent
         agent = LlamaCppAgent(
             provider,
-            system_prompt=f"{system_message}",
             predefined_messages_formatter_type=MessagesFormatterType.GEMMA_2,
             debug_output=True,
         )
@@ -116,7 +191,7 @@ def respond(
         # Get the response stream
         stream = agent.get_chat_response(
-            message,
             llm_sampling_settings=settings,
             chat_history=messages,
             returns_streaming_generator=True,
@@ -141,7 +216,7 @@ def respond(
 # Create a chat interface
 demo = gr.ChatInterface(
     respond,
-    examples=[["What is the capital of France?"], ["Tell me something about artificial intelligence."], ["What is gravity?"]],
     additional_inputs_accordion=gr.Accordion(
         label="⚙️ Parameters", open=False, render=False
     ),

 from llama_cpp_agent.chat_history.messages import Roles
 import gradio as gr
 from huggingface_hub import hf_hub_download
+from typing import List, Tuple,Dict,Optional
 from logger import logging
 from exception import CustomExceptionHandling
+from smolagents.gradio_ui import GradioUI
+from smolagents import (
+    CodeAgent,
+    GoogleSearchTool,
+    Model,
+    Tool,
+    LiteLLMModel,
+    ToolCallingAgent,
+    ChatMessage,tool,MessageRole
+)
+cache_file = "docs_processed.joblib"
+if os.path.exists(cache_file):
+    docs_processed = joblib.load(cache_file)
+    print("Loaded docs_processed from cache.")
+else:
+    knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train")
+    source_docs = [
+        Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]}) for doc in knowledge_base
+    ]
+    text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=400,
+        chunk_overlap=20,
+        add_start_index=True,
+        strip_whitespace=True,
+        separators=["\n\n", "\n", ".", " ", ""],
+    )
+    docs_processed = text_splitter.split_documents(source_docs)
+    joblib.dump(docs_processed, cache_file)
+    print("Created and saved docs_processed to cache.")
+class RetrieverTool(Tool):
+    name = "retriever"
+    description = "Uses semantic search to retrieve the parts of documentation that could be most relevant to answer your query."
+    inputs = {
+        "query": {
+            "type": "string",
+            "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.",
+        }
+    }
+    output_type = "string"
+    def __init__(self, docs, **kwargs):
+        super().__init__(**kwargs)
+        self.retriever = BM25Retriever.from_documents(
+            docs,
+            k=7,
+        )
+    def forward(self, query: str) -> str:
+        assert isinstance(query, str), "Your search query must be a string"
+        docs = self.retriever.invoke(
+            query,
+        )
+        return "\nRetrieved documents:\n" + "".join(
+            [
+                f"\n\n===== Document {str(i)} =====\n" + str(doc.page_content)
+                for i, doc in enumerate(docs)
+            ]
+        )
+retriever_tool = RetrieverTool(docs_processed)
 # Download gguf model files
 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
             llm_model = model
         provider = LlamaCppPythonProvider(llm)
+        text = retriever_tool(query=f"{message}")
+        retriever_system="""
+        You are an AI assistant that answers questions based on documents provided by the user.  Wait for the user to send a document. Once you receive the document, carefully read its contents and then answer the following question:
+Question: $s
+[Wait for user's message containing the document]
+        """ % message
         # Create the agent
         agent = LlamaCppAgent(
             provider,
+            system_prompt=f"{retriever_system}",
             predefined_messages_formatter_type=MessagesFormatterType.GEMMA_2,
             debug_output=True,
         )
         # Get the response stream
         stream = agent.get_chat_response(
+            text,
             llm_sampling_settings=settings,
             chat_history=messages,
             returns_streaming_generator=True,
 # Create a chat interface
 demo = gr.ChatInterface(
     respond,
+    examples=[["What is the Transform?"], ["Tell me About Huggng."], ["How to upload dataset?"]],
     additional_inputs_accordion=gr.Accordion(
         label="⚙️ Parameters", open=False, render=False
     ),