Spaces:
Running
Running
File size: 3,559 Bytes
58d9279 e82a10b c6e8f4b 12fb4a0 c7bde51 12fb4a0 c7bde51 12fb4a0 c7bde51 12fb4a0 c7bde51 12fb4a0 0fbbc23 2acd9b8 c6e8f4b 1c0b11d c6e8f4b dc0d5b9 c6e8f4b dc0d5b9 24ef847 c6e8f4b 1c0b11d c6e8f4b 242e31b c6e8f4b 242e31b 1c0b11d 8dbe12b 242e31b 65ad07f 2acd9b8 c6e8f4b 2acd9b8 c6e8f4b 242e31b 2acd9b8 85e5d5d c6e8f4b 85e5d5d 242e31b e82a10b 242e31b e82a10b 242e31b 4505847 242e31b e82a10b 5f71f8b 242e31b 5f71f8b 242e31b 5f71f8b c6e8f4b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
# Inference
import gradio as gr
from huggingface_hub import InferenceClient
model = "google/gemma-2-27b-it"
client = InferenceClient(model)
def fn_text(
prompt,
history,
system_prompt,
max_tokens,
temperature,
top_p,
):
#messages = [{"role": "user", "content": prompt}]
#history.append(messages[0])
messages = [{"role": "system", "content": system_prompt}]
history.append(messages[0])
messages.append({"role": "user", "content": prompt})
history.append(messages[1])
stream = client.chat.completions.create(
model = model,
messages = history,
max_tokens = max_tokens,
temperature = temperature,
top_p = top_p,
stream = True
)
chunks = []
for chunk in stream:
chunks.append(chunk.choices[0].delta.content or "")
yield "".join(chunks)
app_text = gr.ChatInterface(
fn = fn_text,
type = "messages",
additional_inputs = [
gr.Textbox(value="You are a helpful assistant.", label="System Prompt"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
],
title = "Google Gemma",
description = model,
)
app = gr.TabbedInterface(
[app_text],
["Text"]
).launch()
#if __name__ == "__main__":
# app.launch()
"""
# Inference
import gradio as gr
from huggingface_hub import InferenceClient
model = "google/gemma-2-27b-it"
#model = "google/gemma-2-9b-it"
#model = "google/gemma-2-2b-it"
client = InferenceClient(model)
def fn(
message,
history: list[tuple[str, str]],
#system_message,
max_tokens,
temperature,
top_p,
):
#messages = [{"role": "system", "content": system_message}]
messages = []
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
#messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "bot", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
max_tokens = max_tokens,
temperature = temperature,
top_p = top_p,
stream = True,
):
token = message.choices[0].delta.content
response += token
yield response
app = gr.ChatInterface(
fn = fn,
#type = "messages",
additional_inputs = [
#gr.Textbox(value="You are a helpful assistant.", label="System Message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
],
title = "Google Gemma",
description = model,
)
if __name__ == "__main__":
app.launch()
# Pipeline
import gradio as gr
from transformers import pipeline
pipe = pipeline(model = "google/gemma-2-2b-it")
def fn(input):
output = pipe(
input,
max_new_tokens = 2048
)
return output[0]["generated_text"]#[len(input):]
app = gr.Interface(
fn = fn,
inputs = [gr.Textbox(label = "Input")],
outputs = [gr.Textbox(label = "Output")],
title = "Google Gemma",
description = "Pipeline",
examples = [
["Hello, World."]
]
).launch()
""" |