Qwen2.5-0.5B-Rag-Thinking-Flan-T5

Running

App Files Files Community

Akjava commited on Mar 20

Commit

0851363

1 Parent(s): 530aa5d

working

Browse files

Files changed (1) hide show

app.py +21 -5

app.py CHANGED Viewed

@@ -40,7 +40,7 @@ else:
     joblib.dump(docs_processed, cache_file)
     print("Created and saved docs_processed to cache.")
-class RetrieverTool(Tool):
     name = "retriever"
     description = "Uses semantic search to retrieve the parts of documentation that could be most relevant to answer your query."
     inputs = {
@@ -52,14 +52,14 @@ class RetrieverTool(Tool):
     output_type = "string"
     def __init__(self, docs, **kwargs):
-        super().__init__(**kwargs)
         self.retriever = BM25Retriever.from_documents(
             docs,
             k=7,
         )
-    def forward(self, query: str) -> str:
         assert isinstance(query, str), "Your search query must be a string"
         docs = self.retriever.invoke(
@@ -72,6 +72,8 @@ class RetrieverTool(Tool):
             ]
         )
 retriever_tool = RetrieverTool(docs_processed)
 # Download gguf model files
 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
@@ -85,7 +87,7 @@ hf_hub_download(
 t5_size="base"
 hf_hub_download(
     repo_id=f"Felladrin/gguf-flan-t5-{t5_size}",
-    filename=f"flan-t5-{size}.Q8_0.gguf",
     local_dir="./models",
 )
@@ -159,6 +161,7 @@ def generate_t5(llama,message):#text size must be smaller than ctx(default=512)
     return None
 def to_query(question):
     system = """
 You are a query rewriter. Your task is to convert a user's question into a concise search query suitable for information retrieval.
@@ -200,6 +203,17 @@ Search Query:
     return None
 def answer(document:str,question:str,model:str="Qwen2.5-0.5B-Rag-Thinking.i1-Q6_K.gguf")->str:
     global llm
     global llm_model
@@ -251,7 +265,9 @@ def respond(
     if model is None:#
         return
-    return to_query(message)
 # Create a chat interface
 demo = gr.ChatInterface(

     joblib.dump(docs_processed, cache_file)
     print("Created and saved docs_processed to cache.")
+class RetrieverTool():
     name = "retriever"
     description = "Uses semantic search to retrieve the parts of documentation that could be most relevant to answer your query."
     inputs = {
     output_type = "string"
     def __init__(self, docs, **kwargs):
+        #super().__init__(**kwargs)
         self.retriever = BM25Retriever.from_documents(
             docs,
             k=7,
         )
+    def __call__(self, query: str) -> str:
         assert isinstance(query, str), "Your search query must be a string"
         docs = self.retriever.invoke(
             ]
         )
 retriever_tool = RetrieverTool(docs_processed)
 # Download gguf model files
 huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 t5_size="base"
 hf_hub_download(
     repo_id=f"Felladrin/gguf-flan-t5-{t5_size}",
+    filename=f"flan-t5-{t5_size}.Q8_0.gguf",
     local_dir="./models",
 )
     return None
+llama = None
 def to_query(question):
     system = """
 You are a query rewriter. Your task is to convert a user's question into a concise search query suitable for information retrieval.
     return None
+qwen_prompt = """<|im_start|>system
+You answer questions from the user, always using the context provided as a basis.
+Write down your reasoning for answering the question, between the <think> and </think> tags.<|im_end|>
+<|im_start|>user
+Context:
+%s
+Question:
+%s<|im_end|>
+<|im_start|>assistant
+<think>"""
 def answer(document:str,question:str,model:str="Qwen2.5-0.5B-Rag-Thinking.i1-Q6_K.gguf")->str:
     global llm
     global llm_model
     if model is None:#
         return
+    query =  to_query(message)
+    document = retriever_tool(query=query)
+    return answer(document,message)
 # Create a chat interface
 demo = gr.ChatInterface(