import gradio as gr def greet(name): return "Hello " + name + "!!" from huggingface_hub import InferenceClient client = InferenceClient(api_key="hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx") messages = [ { "role": "user", "content": "What is the capital of France?" } ] stream = client.chat.completions.create( model="Qwen/Qwen2.5-Math-7B-Instruct", messages=messages, max_tokens=500, stream=True ) for chunk in stream: print(chunk.choices[0].delta.content, end="") demo = gr.Interface(fn=greet, inputs="text", outputs="text") demo.launch()