Spaces:
Sleeping
Sleeping
""" | |
# Inference | |
import gradio as gr | |
app = gr.load( | |
"google/gemma-2-2b-it", | |
src = "models", | |
inputs = [gr.Textbox(label = "Input")], | |
outputs = [gr.Textbox(label = "Output")], | |
title = "Google Gemma", | |
description = "Inference", | |
examples = [ | |
["Hello, World."] | |
] | |
).launch() | |
""" | |
""" | |
# Pipeline | |
import gradio as gr | |
from transformers import pipeline | |
pipe = pipeline(model = "google/gemma-2-2b-it") | |
def fn(input): | |
output = pipe( | |
input, | |
max_new_tokens = 2048 | |
) | |
return output[0]["generated_text"]#[len(input):] | |
app = gr.Interface( | |
fn = fn, | |
inputs = [gr.Textbox(label = "Input")], | |
outputs = [gr.Textbox(label = "Output")], | |
title = "Google Gemma", | |
description = "Pipeline", | |
examples = [ | |
["Hello, World."] | |
] | |
).launch() | |
""" | |
import gradio as gr | |
from huggingface_hub import InferenceClient | |
import os | |
token = os.getenv("HF_TOKEN") | |
client = InferenceClient(api_key=token) | |
messages = [ | |
{ "role": "user", "content": "Tell me a story" } | |
] | |
stream = client.chat.completions.create( | |
model="google/gemma-2-2b-it", | |
messages=messages, | |
temperature=0.5, | |
max_tokens=2048, | |
top_p=0.7, | |
stream=True | |
) | |
for chunk in stream: | |
print(chunk.choices[0].delta.content) |