import gradio as gr import os import time from cerebras.cloud.sdk import Cerebras # Set up the Cerebras client client = Cerebras(api_key=os.getenv("CEREBRAS_API_KEY")) def chat_with_cerebras(user_input): """ Handles interaction with the Cerebras model. Sends user input and returns the model's response along with compute time. """ # Start compute time measurement start_time = time.time() try: # Create a chat stream with Cerebras stream = client.chat.completions.create( messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": user_input} ], model="llama-3.3-70b", stream=True, max_completion_tokens=1024, temperature=0.2, top_p=1 ) # Collect response from the stream response = "" for chunk in stream: response += chunk.choices[0].delta.content or "" # End compute time measurement compute_time = time.time() - start_time return response, f"Compute Time: {compute_time:.2f} seconds" except Exception as e: return "Error: Unable to process your request.", str(e) # Gradio interface def gradio_ui(): with gr.Blocks() as demo: gr.Markdown("""# Cerebras AI Chatbot\nChat with a state-of-the-art AI model.""") with gr.Row(): user_input = gr.Textbox(label="Your Message", placeholder="Type your message here...") response = gr.Textbox(label="AI Response", interactive=False) compute_time = gr.Textbox(label="Compute Time", interactive=False) submit_button = gr.Button("Submit") # Define interaction logic submit_button.click(chat_with_cerebras, inputs=user_input, outputs=[response, compute_time]) return demo # Run the Gradio app demo = gradio_ui() demo.launch()