Update app.py
Browse files
app.py
CHANGED
@@ -28,7 +28,7 @@ def respond(
|
|
28 |
temperature,
|
29 |
top_p,
|
30 |
):
|
31 |
-
|
32 |
chat_template = '<s>[INST] ' + system_message
|
33 |
# for human, assistant in history:
|
34 |
# chat_template += human + ' [/INST] ' + assistant + '</s>[INST]'
|
@@ -54,11 +54,17 @@ def respond(
|
|
54 |
completion_to_prompt=completion_to_prompt,
|
55 |
verbose=True,
|
56 |
)
|
57 |
-
response = ""
|
|
|
|
|
|
|
|
|
|
|
58 |
for chunk in llm.stream_complete(message):
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
62 |
|
63 |
demo = gr.ChatInterface(
|
64 |
respond,
|
|
|
28 |
temperature,
|
29 |
top_p,
|
30 |
):
|
31 |
+
stop_tokens = ["</s>", "[INST]", "[INST] ", "<s>", "[/INST]", "[/INST] "]
|
32 |
chat_template = '<s>[INST] ' + system_message
|
33 |
# for human, assistant in history:
|
34 |
# chat_template += human + ' [/INST] ' + assistant + '</s>[INST]'
|
|
|
54 |
completion_to_prompt=completion_to_prompt,
|
55 |
verbose=True,
|
56 |
)
|
57 |
+
# response = ""
|
58 |
+
# for chunk in llm.stream_complete(message):
|
59 |
+
# print(chunk.delta, end="", flush=True)
|
60 |
+
# response += str(chunk.delta)
|
61 |
+
# yield response
|
62 |
+
outputs = []
|
63 |
for chunk in llm.stream_complete(message):
|
64 |
+
outputs.append(chunk.delta)
|
65 |
+
if chunk.delta in stop_tokens:
|
66 |
+
break
|
67 |
+
yield "".join(outputs)
|
68 |
|
69 |
demo = gr.ChatInterface(
|
70 |
respond,
|