Orion-zhen commited on
Commit
b18c263
·
verified ·
1 Parent(s): 30ba72c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -13
app.py CHANGED
@@ -26,7 +26,8 @@ llm = Llama(
26
  model_path="models/Qwen2.5-Math-7B-Instruct-Q8_0.gguf",
27
  flash_attn=True,
28
  n_ctx=8192,
29
- n_batch=1024
 
30
  )
31
 
32
  provider = LlamaCppPythonProvider(llm)
@@ -50,18 +51,19 @@ def respond(
50
  messages.append({"role": "user", "content": message})
51
 
52
  response = ""
53
-
54
- for message in client.chat_completion(
55
- messages,
56
- max_tokens=max_tokens,
57
- stream=True,
58
- temperature=temperature,
59
- top_p=top_p,
60
- ):
61
- token = message.choices[0].delta.content
62
-
63
- response += token
64
- yield response
 
65
 
66
  """
67
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
26
  model_path="models/Qwen2.5-Math-7B-Instruct-Q8_0.gguf",
27
  flash_attn=True,
28
  n_ctx=8192,
29
+ n_batch=1024,
30
+ chat_format="chatml"
31
  )
32
 
33
  provider = LlamaCppPythonProvider(llm)
 
51
  messages.append({"role": "user", "content": message})
52
 
53
  response = ""
54
+ response = llm.create_chat_completion(
55
+ messages=messages,
56
+ stream=True,
57
+ max_tokens=max_tokens,
58
+ temperature=temperature,
59
+ top_p=top_p
60
+ )
61
+ message_repl = ""
62
+ for chunk in response:
63
+ if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
64
+ message_repl = message_repl + \
65
+ chunk['choices'][0]["delta"]["content"]
66
+ yield message_repl
67
 
68
  """
69
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface