Spaces:
Sleeping
Sleeping
File size: 3,263 Bytes
58d9279 ec03c70 58d9279 f56245e 0451106 58d9279 e82a10b 58d9279 52e622e 58d9279 18160e9 58d9279 0451106 f56245e e82a10b 24ef847 c6e8f4b dc0d5b9 24ef847 c6e8f4b dc0d5b9 24ef847 c6e8f4b 24ef847 c6e8f4b 24ef847 c6e8f4b 24ef847 c6e8f4b e82a10b 5f71f8b 4505847 5f71f8b e82a10b 5f71f8b e82a10b 45de1a4 4505847 5f71f8b e82a10b 5f71f8b c6e8f4b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
"""
# Inference
import gradio as gr
app = gr.load(
"google/gemma-2-2b-it",
src = "models",
inputs = [gr.Textbox(label = "Input")],
outputs = [gr.Textbox(label = "Output")],
title = "Google Gemma",
description = "Inference",
examples = [
["Hello, World."]
]
).launch()
"""
"""
# Pipeline
import gradio as gr
from transformers import pipeline
pipe = pipeline(model = "google/gemma-2-2b-it")
def fn(input):
output = pipe(
input,
max_new_tokens = 2048
)
return output[0]["generated_text"]#[len(input):]
app = gr.Interface(
fn = fn,
inputs = [gr.Textbox(label = "Input")],
outputs = [gr.Textbox(label = "Output")],
title = "Google Gemma",
description = "Pipeline",
examples = [
["Hello, World."]
]
).launch()
"""
import gradio as gr
from huggingface_hub import InferenceClient
import os
#hf_token = os.getenv("HF_TOKEN")
client = InferenceClient("google/gemma-2-2b-it")
def respond(
message,
history: list[tuple[str, str]],
#system_message,
##user_message,
max_tokens,
temperature,
top_p,
):
#messages = [{"role": "system", "content": system_message}]
##messages = [{"role": "user", "content": user_message}]
messages = []
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
stream=True,
):
token = message.choices[0].delta.content
response += token
yield response
demo = gr.ChatInterface(
respond,
additional_inputs=[
#gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
],
)
if __name__ == "__main__":
demo.launch()
"""
client = InferenceClient(api_key=hf_token)
def fn(prompt, history=[]):
messages = []
for user_prompt, bot_response in history:
messages.append({"role": "user", "content": user_prompt})
messages.append({"role": "bot", "content": bot_response})
messages.append({"role": "user", "content": prompt})
stream = client.chat.completions.create(
model = "google/gemma-2-2b-it",
messages = messages,
#temperature = 0.5,
#max_tokens = 2048,
#top_p = 0.7,
stream = True
)
bot_response = "".join(chunk.choices[0].delta.content for chunk in stream)
history.append((prompt, bot_response))
return bot_response, history
app = gr.Interface(
fn = fn,
inputs = [gr.Textbox(label = "Input")],
outputs = [gr.Textbox(label = "Output")],
title = "Google Gemma",
description = "Chatbot",
examples = [
["Hello, World."]
]
).launch()
""" |