import gradio as gr | |
from huggingface_hub import InferenceClient | |
#client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct") | |
client = InferenceClient("google/gemma-2-27b-it") | |
def greet(name): | |
messages = [{"role": "user", "content": name}] | |
generated = "" | |
for token in client.chat_completion(messages, max_tokens=100,stream=True): | |
content = (token.choices[0].delta.content) | |
generated+=content | |
print(generated) | |
yield generated | |
with gr.Interface(fn=greet, inputs="text", outputs="text") as demo: | |
textbox=gr.Textbox() | |
#textbox.change(None,[],[],js="function(){console.log('hello')}") | |
demo.launch() | |