llama-cpp-agent

Paused

pabloce commited on May 20, 2024

Commit

92d9ef4

verified ·

1 Parent(s): 7e5587a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -28,10 +28,10 @@ def respond(
     temperature,
     top_p,
 ):
-    stop_tokens = ["</s>", "[INST]", "[INST] ", "<s>", "[/INST]", "[/INST] "]
     chat_template = '<s>[INST] ' + system_message
-    for human, assistant in history:
-        chat_template += human + ' [/INST] ' + assistant + '</s>[INST]'
     chat_template += ' ' + message + ' [/INST]'
     print(chat_template)
@@ -55,7 +55,7 @@ def respond(
         verbose=True,
     )
     response = ""
-    for chunk in llm.stream_chat(message):
         print(chunk.delta, end="", flush=True)
         response += str(chunk.delta)
         yield response

     temperature,
     top_p,
 ):
+    # stop_tokens = ["</s>", "[INST]", "[INST] ", "<s>", "[/INST]", "[/INST] "]
     chat_template = '<s>[INST] ' + system_message
+    # for human, assistant in history:
+    #     chat_template += human + ' [/INST] ' + assistant + '</s>[INST]'
     chat_template += ' ' + message + ' [/INST]'
     print(chat_template)
         verbose=True,
     )
     response = ""
+    for chunk in llm.stream_chat(chat_template):
         print(chunk.delta, end="", flush=True)
         response += str(chunk.delta)
         yield response