Spaces:
Running
Running
File size: 2,054 Bytes
58d9279 ec03c70 58d9279 f56245e 0451106 58d9279 e82a10b 58d9279 52e622e 58d9279 18160e9 58d9279 0451106 f56245e e82a10b 45de1a4 e82a10b 45de1a4 4505847 45de1a4 4505847 e82a10b 45de1a4 e82a10b 45de1a4 4505847 e82a10b 45de1a4 e82a10b 45de1a4 e82a10b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
"""
# Inference
import gradio as gr
app = gr.load(
"google/gemma-2-2b-it",
src = "models",
inputs = [gr.Textbox(label = "Input")],
outputs = [gr.Textbox(label = "Output")],
title = "Google Gemma",
description = "Inference",
examples = [
["Hello, World."]
]
).launch()
"""
"""
# Pipeline
import gradio as gr
from transformers import pipeline
pipe = pipeline(model = "google/gemma-2-2b-it")
def fn(input):
output = pipe(
input,
max_new_tokens = 2048
)
return output[0]["generated_text"]#[len(input):]
app = gr.Interface(
fn = fn,
inputs = [gr.Textbox(label = "Input")],
outputs = [gr.Textbox(label = "Output")],
title = "Google Gemma",
description = "Pipeline",
examples = [
["Hello, World."]
]
).launch()
"""
import gradio as gr
from huggingface_hub import InferenceClient
import os
# Initialize Hugging Face Inference Client
hf_token = os.getenv("HF_TOKEN")
client = InferenceClient(api_key=hf_token)
# Function to handle user inputs and fetch model responses
def chatbot(input_text, history=[]):
#messages = [{"role": "user", "content": input_text}]
messages = []
for user_input, bot_response in history:
messages.append({"role": "user", "content": user_input})
messages.append({"role": "assistant", "content": bot_response})
messages.append({"role": "user", "content": input_text})
stream = client.chat.completions.create(
model="google/gemma-2-2b-it",
messages=messages,
#temperature=0.5,
#max_tokens=2048,
#top_p=0.7,
stream=True
)
# Concatenate streamed response
bot_response = "".join(chunk.choices[0].delta.content for chunk in stream)
# Update conversation history
history.append((input_text, bot_response))
return bot_response, history
# Gradio Interface
demo = gr.Interface(
fn=chatbot,
inputs=["text", "state"],
outputs=["text", "state"],
title="Gemma Chatbot"
)
# Launch Gradio App
demo.launch() |