Spaces:
Running
Running
File size: 3,062 Bytes
58d9279 e82a10b c6e8f4b 12fb4a0 97a4aa1 12fb4a0 97a4aa1 c7bde51 97a4aa1 c7bde51 97a4aa1 12fb4a0 97a4aa1 12fb4a0 0fbbc23 2acd9b8 c6e8f4b 1c0b11d c6e8f4b dc0d5b9 c6e8f4b dc0d5b9 24ef847 c6e8f4b 1c0b11d c6e8f4b 242e31b c6e8f4b 242e31b 1c0b11d 8dbe12b 242e31b 65ad07f 2acd9b8 c6e8f4b 2acd9b8 c6e8f4b 242e31b 2acd9b8 85e5d5d c6e8f4b 97a4aa1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
# Inference
import gradio as gr
from huggingface_hub import InferenceClient
model = "google/gemma-2-27b-it"
client = InferenceClient(model)
def fn_text(
prompt,
history,
#system_prompt,
max_tokens,
temperature,
top_p,
):
#messages = [{"role": "system", "content": system_prompt}]
#history.append(messages[0])
#messages.append({"role": "user", "content": prompt})
#history.append(messages[1])
messages = [{"role": "user", "content": prompt}]
history.append(messages[0])
stream = client.chat.completions.create(
model = model,
messages = history,
max_tokens = max_tokens,
temperature = temperature,
top_p = top_p,
stream = True
)
chunks = []
for chunk in stream:
chunks.append(chunk.choices[0].delta.content or "")
yield "".join(chunks)
app_text = gr.ChatInterface(
fn = fn_text,
type = "messages",
additional_inputs = [
#gr.Textbox(value="You are a helpful assistant.", label="System Prompt"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
],
title = "Google Gemma",
description = model,
)
app = gr.TabbedInterface(
[app_text],
["Text"]
).launch()
#if __name__ == "__main__":
# app.launch()
"""
# Inference
import gradio as gr
from huggingface_hub import InferenceClient
model = "google/gemma-2-27b-it"
#model = "google/gemma-2-9b-it"
#model = "google/gemma-2-2b-it"
client = InferenceClient(model)
def fn(
message,
history: list[tuple[str, str]],
#system_message,
max_tokens,
temperature,
top_p,
):
#messages = [{"role": "system", "content": system_message}]
messages = []
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
#messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "bot", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
max_tokens = max_tokens,
temperature = temperature,
top_p = top_p,
stream = True,
):
token = message.choices[0].delta.content
response += token
yield response
app = gr.ChatInterface(
fn = fn,
#type = "messages",
additional_inputs = [
#gr.Textbox(value="You are a helpful assistant.", label="System Message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
],
title = "Google Gemma",
description = model,
)
if __name__ == "__main__":
app.launch() |