intellijmind / app.py
Threatthriver's picture
Update app.py
8bb6b63 verified
raw
history blame
1.94 kB
import gradio as gr
import os
import time
from cerebras.cloud.sdk import Cerebras
# Set up the Cerebras client
client = Cerebras(api_key=os.getenv("CEREBRAS_API_KEY"))
def chat_with_cerebras(user_input):
"""
Handles interaction with the Cerebras model.
Sends user input and returns the model's response along with compute time.
"""
# Start compute time measurement
start_time = time.time()
try:
# Create a chat stream with Cerebras
stream = client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": user_input}
],
model="llama-3.3-70b",
stream=True,
max_completion_tokens=1024,
temperature=0.2,
top_p=1
)
# Collect response from the stream
response = ""
for chunk in stream:
response += chunk.choices[0].delta.content or ""
# End compute time measurement
compute_time = time.time() - start_time
return response, f"Compute Time: {compute_time:.2f} seconds"
except Exception as e:
return "Error: Unable to process your request.", str(e)
# Gradio interface
def gradio_ui():
with gr.Blocks() as demo:
gr.Markdown("""# Cerebras AI Chatbot\nChat with a state-of-the-art AI model.""")
with gr.Row():
user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
response = gr.Textbox(label="AI Response", interactive=False)
compute_time = gr.Textbox(label="Compute Time", interactive=False)
submit_button = gr.Button("Submit")
# Define interaction logic
submit_button.click(chat_with_cerebras, inputs=user_input, outputs=[response, compute_time])
return demo
# Run the Gradio app
demo = gradio_ui()
demo.launch()