Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -98,13 +98,14 @@ def predict_chat(message: str, history: list):
|
|
98 |
prompt_input += f"Assistant: {msg['content']}\n"
|
99 |
prompt_input += "Assistant:"
|
100 |
|
|
|
101 |
for token in model.generate(
|
102 |
prompt_input,
|
103 |
-
|
104 |
temperature=TEMPERATURE,
|
105 |
top_k=TOP_K,
|
106 |
top_p=TOP_P,
|
107 |
-
|
108 |
repetition_penalty=1.1,
|
109 |
stop=["User:", "\nUser", "\n#", "\n##", "<|endoftext|>"]
|
110 |
):
|
@@ -168,4 +169,4 @@ if __name__ == "__main__":
|
|
168 |
|
169 |
demo.chatbot.value = initial_messages_for_value
|
170 |
|
171 |
-
demo.launch()
|
|
|
98 |
prompt_input += f"Assistant: {msg['content']}\n"
|
99 |
prompt_input += "Assistant:"
|
100 |
|
101 |
+
# FIXED: Use max_tokens instead of max_new_tokens for ctransformers
|
102 |
for token in model.generate(
|
103 |
prompt_input,
|
104 |
+
max_tokens=MAX_NEW_TOKENS, # Changed from max_new_tokens
|
105 |
temperature=TEMPERATURE,
|
106 |
top_k=TOP_K,
|
107 |
top_p=TOP_P,
|
108 |
+
sample=DO_SAMPLE, # Changed from do_sample
|
109 |
repetition_penalty=1.1,
|
110 |
stop=["User:", "\nUser", "\n#", "\n##", "<|endoftext|>"]
|
111 |
):
|
|
|
169 |
|
170 |
demo.chatbot.value = initial_messages_for_value
|
171 |
|
172 |
+
demo.launch()
|