Spaces:
Sleeping
Sleeping
""" | |
# Inference | |
import gradio as gr | |
app = gr.load( | |
"google/gemma-2-2b-it", | |
src = "models", | |
inputs = [gr.Textbox(label = "Input")], | |
outputs = [gr.Textbox(label = "Output")], | |
title = "Google Gemma", | |
description = "Inference", | |
examples = [ | |
["Hello, World."] | |
] | |
).launch() | |
""" | |
""" | |
# Pipeline | |
import gradio as gr | |
from transformers import pipeline | |
pipe = pipeline(model = "google/gemma-2-2b-it") | |
def fn(input): | |
output = pipe( | |
input, | |
max_new_tokens = 2048 | |
) | |
return output[0]["generated_text"]#[len(input):] | |
app = gr.Interface( | |
fn = fn, | |
inputs = [gr.Textbox(label = "Input")], | |
outputs = [gr.Textbox(label = "Output")], | |
title = "Google Gemma", | |
description = "Pipeline", | |
examples = [ | |
["Hello, World."] | |
] | |
).launch() | |
""" | |
import gradio as gr | |
from huggingface_hub import InferenceClient | |
import os | |
hf_token = os.getenv("HF_TOKEN") | |
client = InferenceClient(api_key=hf_token) | |
def fn(prompt, history=[]): | |
messages = [] | |
for user_prompt, bot_response in history: | |
messages.append({"role": "user", "content": user_prompt}) | |
messages.append({"role": "bot", "content": bot_response}) | |
messages.append({"role": "user", "content": prompt}) | |
stream = client.chat.completions.create( | |
model = "google/gemma-2-2b-it", | |
messages = messages, | |
#temperature = 0.5, | |
#max_tokens = 2048, | |
#top_p = 0.7, | |
stream = True | |
) | |
bot_response = "".join(chunk.choices[0].delta.content for chunk in stream) | |
history.append((prompt, bot_response)) | |
return bot_response, history | |
app = gr.Interface( | |
fn = fn, | |
inputs = [gr.Textbox(label = "Input")], | |
outputs = [gr.Textbox(label = "Output")], | |
title = "Google Gemma", | |
description = "Chatbot", | |
examples = [ | |
["Hello, World."] | |
] | |
).launch() |