Qwen2.5-0.5B-Rag-Thinking-Flan-T5

Running

App Files Files Community

Akjava commited on Mar 17

Commit

a05ab4b

verified ·

1 Parent(s): ba0ce3e

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -3

app.py CHANGED Viewed

@@ -110,6 +110,55 @@ description = """Gemma 3 is a family of lightweight, multimodal open models that
 llm = None
 llm_model = None
 def respond(
     message: str,
     history: List[Tuple[str, str]],
@@ -150,14 +199,16 @@ def respond(
                 flash_attn=False,
                 n_gpu_layers=0,
                 n_batch=8,
-                n_ctx=2048,
                 n_threads=2,
                 n_threads_batch=2,
             )
             llm_model = model
         provider = LlamaCppPythonProvider(llm)
-        text = retriever_tool(query=f"{message}")
         retriever_system="""
         You are an AI assistant that answers questions based on documents provided by the user.  Wait for the user to send a document. Once you receive the document, carefully read its contents and then answer the following question:
@@ -239,7 +290,7 @@ demo = gr.ChatInterface(
             value="You are a helpful assistant.",
             label="System Prompt",
             info="Define the AI assistant's personality and behavior",
-            lines=2,
         ),
         gr.Slider(
             minimum=512,

 llm = None
 llm_model = None
+query_system = """
+You are a query rewriter. Your task is to convert a user's question into a concise search query suitable for information retrieval.
+The goal is to identify the most important keywords for a search engine.
+Here are some examples:
+User Question: What is transformer?
+Search Query: transformer
+User Question: How does a transformer model work in natural language processing?
+Search Query: transformer model natural language processing
+User Question: What are the advantages of using transformers over recurrent neural networks?
+Search Query: transformer vs recurrent neural network advantages
+User Question: Explain the attention mechanism in transformers.
+Search Query: transformer attention mechanism
+User Question: What are the different types of transformer architectures?
+Search Query: transformer architectures
+User Question: What is the history of the transformer model?
+Search Query: transformer model history
+"""
+def to_query(provider,message):
+    try:
+        agent = LlamaCppAgent(
+                provider,
+                system_prompt=f"{query_system}",
+                predefined_messages_formatter_type=MessagesFormatterType.GEMMA_2,
+                debug_output=True,
+            )
+        settings = provider.get_provider_default_settings()
+        messages = BasicChatHistory()
+        result = agent.get_chat_response(
+                message,
+                llm_sampling_settings=settings,
+                chat_history=messages,
+                returns_streaming_generator=False,
+                print_output=False,
+            )
+        return result
+    except Exception as e:
+        # Custom exception handling
+        raise CustomExceptionHandling(e, sys) from e
 def respond(
     message: str,
     history: List[Tuple[str, str]],
                 flash_attn=False,
                 n_gpu_layers=0,
                 n_batch=8,
+                n_ctx=4096,
                 n_threads=2,
                 n_threads_batch=2,
             )
             llm_model = model
         provider = LlamaCppPythonProvider(llm)
+        query = to_query(provider,message)
+        text = retriever_tool(query=f"{query}")
         retriever_system="""
         You are an AI assistant that answers questions based on documents provided by the user.  Wait for the user to send a document. Once you receive the document, carefully read its contents and then answer the following question:
             value="You are a helpful assistant.",
             label="System Prompt",
             info="Define the AI assistant's personality and behavior",
+            lines=2,visible=False
         ),
         gr.Slider(
             minimum=512,