intellijmind / app.py
Threatthriver's picture
Update app.py
c72759d verified
raw
history blame
2.24 kB
import gradio as gr
import os
import time
from cerebras.cloud.sdk import Cerebras
# Set up the Cerebras client
client = Cerebras(api_key=os.getenv("CEREBRAS_API_KEY"))
def chat_with_cerebras(user_input):
"""
Handles interaction with the Cerebras model.
Sends user input and returns the model's response along with compute time.
"""
# Start compute time measurement
start_time = time.time()
try:
# Create a chat stream with Cerebras
stream = client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": user_input}
],
model="llama-3.3-70b",
stream=True,
max_completion_tokens=1024,
temperature=0.2,
top_p=1
)
# Collect response from the stream
response = ""
for chunk in stream:
response += chunk.choices[0].delta.content or ""
# End compute time measurement
compute_time = time.time() - start_time
return response, f"Compute Time: {compute_time:.2f} seconds"
except Exception as e:
return "Error: Unable to process your request.", str(e)
# Gradio interface
def gradio_ui():
with gr.Blocks() as demo:
gr.Markdown("""# 🤖 Cerebras AI Chatbot\nChat with a state-of-the-art AI model!""")
with gr.Row():
with gr.Column(scale=8):
chat_history = gr.Chatbot(label="Chat History")
with gr.Column(scale=2):
compute_time = gr.Textbox(label="Compute Time", interactive=False)
user_input = gr.Textbox(label="Type your message", placeholder="Ask me anything...", lines=2)
send_button = gr.Button("Send", variant="primary")
def handle_chat(chat_history, user_input):
ai_response, compute_info = chat_with_cerebras(user_input)
chat_history.append((user_input, ai_response))
return chat_history, compute_info
send_button.click(handle_chat, inputs=[chat_history, user_input], outputs=[chat_history, compute_time])
return demo
# Run the Gradio app
demo = gradio_ui()
demo.launch()