Spaces:

Orion-zhen
/

Qwen2.5-Math-7B-Instruct-GGUF

Runtime error

Orion-zhen commited on Oct 10, 2024

Commit

b18c263

verified ·

1 Parent(s): 30ba72c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,7 +26,8 @@ llm = Llama(
     model_path="models/Qwen2.5-Math-7B-Instruct-Q8_0.gguf",
     flash_attn=True,
     n_ctx=8192,
-    n_batch=1024
 )
 provider = LlamaCppPythonProvider(llm)
@@ -50,18 +51,19 @@ def respond(
     messages.append({"role": "user", "content": message})
     response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface

     model_path="models/Qwen2.5-Math-7B-Instruct-Q8_0.gguf",
     flash_attn=True,
     n_ctx=8192,
+    n_batch=1024,
+    chat_format="chatml"
 )
 provider = LlamaCppPythonProvider(llm)
     messages.append({"role": "user", "content": message})
     response = ""
+    response = llm.create_chat_completion(
+            messages=messages,
+            stream=True,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            top_p=top_p
+        )
+    message_repl = ""
+    for chunk in response:
+        if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
+            message_repl = message_repl + \
+                chunk['choices'][0]["delta"]["content"]
+        yield message_repl
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface