import gradio as gr from huggingface_hub import InferenceClient import json client = InferenceClient("google/gemma-3-27b-it") def add_message(role, content, messages): messages.append({"role": role, "content": content}) return messages, len(messages), str(messages) def clear_messages(messages): return [], 0, "[]" def start_conversation(messages, max_tokens, temperature, top_p): response = client.chat_completion( messages, max_tokens=max_tokens, stream=False, temperature=temperature, top_p=top_p, ) return response.choices[0].text demo = gr.Blocks() with demo: gr.Markdown("# Chat Interface") role_input = gr.Textbox(label="Role") content_input = gr.Textbox(label="Content") messages = [] messages_output = gr.Textbox(label="Messages", value="[]") count_output = gr.Number(label="Count", value=0) response_output = gr.Textbox(label="Response") add_button = gr.Button("Add") clear_button = gr.Button("Clear") start_button = gr.Button("Start") max_tokens_slider = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens") temperature_slider = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature") top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)") add_button.click( add_message, inputs=[role_input, content_input, messages], outputs=[messages, count_output, messages_output], ) clear_button.click( clear_messages, inputs=[messages], outputs=[messages, count_output, messages_output], ) start_button.click( start_conversation, inputs=[messages, max_tokens_slider, temperature_slider, top_p_slider], outputs=[response_output], ) if __name__ == "__main__": demo.launch()