Spaces:
Running
Running
File size: 3,361 Bytes
58d9279 ec03c70 58d9279 f56245e 0451106 58d9279 e82a10b 58d9279 52e622e 58d9279 18160e9 58d9279 0451106 f56245e e82a10b c6e8f4b e82a10b 5f71f8b 4505847 5f71f8b e82a10b 5f71f8b e82a10b 45de1a4 4505847 5f71f8b e82a10b 5f71f8b c6e8f4b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
"""
# Inference
import gradio as gr
app = gr.load(
"google/gemma-2-2b-it",
src = "models",
inputs = [gr.Textbox(label = "Input")],
outputs = [gr.Textbox(label = "Output")],
title = "Google Gemma",
description = "Inference",
examples = [
["Hello, World."]
]
).launch()
"""
"""
# Pipeline
import gradio as gr
from transformers import pipeline
pipe = pipeline(model = "google/gemma-2-2b-it")
def fn(input):
output = pipe(
input,
max_new_tokens = 2048
)
return output[0]["generated_text"]#[len(input):]
app = gr.Interface(
fn = fn,
inputs = [gr.Textbox(label = "Input")],
outputs = [gr.Textbox(label = "Output")],
title = "Google Gemma",
description = "Pipeline",
examples = [
["Hello, World."]
]
).launch()
"""
import gradio as gr
from huggingface_hub import InferenceClient
import os
hf_token = os.getenv("HF_TOKEN")
client = InferenceClient("google/gemma-2-2b-it")
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch()
"""
client = InferenceClient(api_key=hf_token)
def fn(prompt, history=[]):
messages = []
for user_prompt, bot_response in history:
messages.append({"role": "user", "content": user_prompt})
messages.append({"role": "bot", "content": bot_response})
messages.append({"role": "user", "content": prompt})
stream = client.chat.completions.create(
model = "google/gemma-2-2b-it",
messages = messages,
#temperature = 0.5,
#max_tokens = 2048,
#top_p = 0.7,
stream = True
)
bot_response = "".join(chunk.choices[0].delta.content for chunk in stream)
history.append((prompt, bot_response))
return bot_response, history
app = gr.Interface(
fn = fn,
inputs = [gr.Textbox(label = "Input")],
outputs = [gr.Textbox(label = "Output")],
title = "Google Gemma",
description = "Chatbot",
examples = [
["Hello, World."]
]
).launch()
""" |