Akjava commited on
Commit
9cbac54
·
1 Parent(s): 0851363
Files changed (1) hide show
  1. app.py +20 -7
app.py CHANGED
@@ -78,12 +78,12 @@ retriever_tool = RetrieverTool(docs_processed)
78
  # Download gguf model files
79
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
80
 
81
-
82
  hf_hub_download(
83
  repo_id="mradermacher/Qwen2.5-0.5B-Rag-Thinking-i1-GGUF",
84
  filename="Qwen2.5-0.5B-Rag-Thinking.i1-Q6_K.gguf",
85
  local_dir="./models",
86
  )
 
87
  t5_size="base"
88
  hf_hub_download(
89
  repo_id=f"Felladrin/gguf-flan-t5-{t5_size}",
@@ -92,8 +92,13 @@ hf_hub_download(
92
  )
93
 
94
  # Set the title and description
95
- title = "Qwen2.5-0.5B-Rag-Thinking-Flan-T5"
96
- description = """My Best CPU Rag Solution"""
 
 
 
 
 
97
 
98
 
99
 
@@ -231,9 +236,9 @@ def answer(document:str,question:str,model:str="Qwen2.5-0.5B-Rag-Thinking.i1-Q6_
231
  llm_model = model
232
  #provider = LlamaCppPythonProvider(llm)
233
 
234
- result = llm(qwen_prompt%(document,question),max_tokens=2048*4)
235
  #answer = to_answer(provider,document,question)
236
- return result['choices'][0]['text']
237
 
238
 
239
  def respond(
@@ -267,7 +272,15 @@ def respond(
267
 
268
  query = to_query(message)
269
  document = retriever_tool(query=query)
270
- return answer(document,message)
 
 
 
 
 
 
 
 
271
 
272
  # Create a chat interface
273
  demo = gr.ChatInterface(
@@ -287,7 +300,7 @@ demo = gr.ChatInterface(
287
  info="Select the AI model to use for chat",visible=False
288
  ),
289
  gr.Textbox(
290
- value="You are a helpful assistant.",
291
  label="System Prompt",
292
  info="Define the AI assistant's personality and behavior",
293
  lines=2,visible=False
 
78
  # Download gguf model files
79
  huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
80
 
 
81
  hf_hub_download(
82
  repo_id="mradermacher/Qwen2.5-0.5B-Rag-Thinking-i1-GGUF",
83
  filename="Qwen2.5-0.5B-Rag-Thinking.i1-Q6_K.gguf",
84
  local_dir="./models",
85
  )
86
+
87
  t5_size="base"
88
  hf_hub_download(
89
  repo_id=f"Felladrin/gguf-flan-t5-{t5_size}",
 
92
  )
93
 
94
  # Set the title and description
95
+ title = "Lhama.cpp Qwen2.5-0.5B-Rag-Thinking-Flan-T5"
96
+ description = """## My Best CPU Rag Solution
97
+ - I use forked [lhamacpp-python](https://github.com/fairydreaming/llama-cpp-python/tree/t5) server and this doesn't support new model
98
+ - search query generation(query reformulation) Tasks - I use flan-t5-base (large make better result,but too large for just this task)
99
+ - Qwen2.5-0.5B as good as small-size.
100
+ - anyway google T5 series on CPU is amazing
101
+ """
102
 
103
 
104
 
 
236
  llm_model = model
237
  #provider = LlamaCppPythonProvider(llm)
238
 
239
+
240
  #answer = to_answer(provider,document,question)
241
+ #return result['choices'][0]['text']
242
 
243
 
244
  def respond(
 
272
 
273
  query = to_query(message)
274
  document = retriever_tool(query=query)
275
+ print(document)
276
+ answer(document,message)
277
+ response = ""
278
+ #do direct in here
279
+ for chunk in llm(system_message%(document,message),max_tokens=2048*4,stream=True,top_k=top_k, top_p=top_p, temperature=temperature, repeat_penalty=repeat_penalty):
280
+ text = chunk['choices'][0]['text']
281
+ #print(text, end='', flush=True) # 逐次表示
282
+ response += text
283
+ yield response
284
 
285
  # Create a chat interface
286
  demo = gr.ChatInterface(
 
300
  info="Select the AI model to use for chat",visible=False
301
  ),
302
  gr.Textbox(
303
+ value=qwen_prompt,
304
  label="System Prompt",
305
  info="Define the AI assistant's personality and behavior",
306
  lines=2,visible=False