import gradio as gr | |
from huggingface_hub import InferenceClient | |
#client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct") | |
client = InferenceClient("google/gemma-2-27b-it") | |
def greet(name): | |
messages = [{"role": "user", "content": name}] | |
generated = "" | |
for token in client.chat_completion(messages, max_tokens=100,stream=True): | |
content = (token.choices[0].delta.content) | |
generated+=content | |
print(generated) | |
yield generated | |
demo = gr.Interface(fn=greet, inputs="text", outputs="text") | |
demo.launch() | |