import gradio as gr from huggingface_hub import InferenceClient #client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct") client = InferenceClient("google/gemma-2-27b-it") def greet(name): messages = [{"role": "user", "content": name}] generated = "" for token in client.chat_completion(messages, max_tokens=100,stream=True): content = (token.choices[0].delta.content) generated+=content print(generated) yield generated demo = gr.Interface(fn=greet, inputs="text", outputs="text") demo.launch()