Update app.py
Browse files
app.py
CHANGED
@@ -42,18 +42,22 @@ def respond(message, history, system_message, max_tokens, temperature, top_p):
|
|
42 |
messages.append({"role": "user", "content": message})
|
43 |
|
44 |
try:
|
45 |
-
response
|
|
|
46 |
messages=messages,
|
47 |
max_tokens=max_tokens,
|
48 |
temperature=temperature,
|
49 |
-
top_p=top_p
|
|
|
50 |
)
|
51 |
-
|
|
|
|
|
|
|
52 |
except Exception as e:
|
53 |
-
|
54 |
|
55 |
def clear_context():
|
56 |
-
# Initialize chat history with a greeting from the assistant
|
57 |
greeting_message = get_random_greeting()
|
58 |
return [("", greeting_message)], ""
|
59 |
|
@@ -74,10 +78,13 @@ with gr.Blocks() as demo:
|
|
74 |
temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
|
75 |
top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
|
76 |
|
77 |
-
# Automatically handle submission on Enter key press
|
78 |
def handle_submit(message, history, system_message, max_tokens, temperature, top_p):
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
81 |
return history, ""
|
82 |
|
83 |
msg.submit(
|
|
|
42 |
messages.append({"role": "user", "content": message})
|
43 |
|
44 |
try:
|
45 |
+
# Stream response from LLM
|
46 |
+
stream = llm.create_chat_completion(
|
47 |
messages=messages,
|
48 |
max_tokens=max_tokens,
|
49 |
temperature=temperature,
|
50 |
+
top_p=top_p,
|
51 |
+
stream=True # Enable streaming
|
52 |
)
|
53 |
+
response_content = ""
|
54 |
+
for chunk in stream:
|
55 |
+
response_content += chunk["choices"][0]["delta"]["content"]
|
56 |
+
yield response_content # Stream each chunk back to the frontend
|
57 |
except Exception as e:
|
58 |
+
yield f"Error: {e}"
|
59 |
|
60 |
def clear_context():
|
|
|
61 |
greeting_message = get_random_greeting()
|
62 |
return [("", greeting_message)], ""
|
63 |
|
|
|
78 |
temperature = gr.Slider(0.1, 4.0, value=0.7, step=0.1, label="Temperature")
|
79 |
top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-p")
|
80 |
|
81 |
+
# Automatically handle submission on Enter key press with streaming
|
82 |
def handle_submit(message, history, system_message, max_tokens, temperature, top_p):
|
83 |
+
history.append((message, None)) # Append user's message first
|
84 |
+
chatbot.update(history) # Display user's message before response
|
85 |
+
for response in respond(message, history, system_message, max_tokens, temperature, top_p):
|
86 |
+
history[-1] = (message, response) # Update the last response with streaming content
|
87 |
+
chatbot.update(history)
|
88 |
return history, ""
|
89 |
|
90 |
msg.submit(
|