Spaces:
Running
Running
File size: 2,072 Bytes
58d9279 e82a10b c6e8f4b 0fbbc23 2acd9b8 c6e8f4b 1c0b11d c6e8f4b dc0d5b9 c6e8f4b dc0d5b9 24ef847 c6e8f4b 1c0b11d c6e8f4b 242e31b c6e8f4b 242e31b 1c0b11d 8dbe12b 242e31b 65ad07f 2acd9b8 c6e8f4b 2acd9b8 c6e8f4b 242e31b 2acd9b8 85e5d5d c6e8f4b 85e5d5d 242e31b e82a10b 242e31b e82a10b 242e31b 4505847 242e31b e82a10b 5f71f8b 242e31b 5f71f8b 242e31b 5f71f8b c6e8f4b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# Inference
import gradio as gr
from huggingface_hub import InferenceClient
model = "google/gemma-2-27b-it"
#model = "google/gemma-2-9b-it"
#model = "google/gemma-2-2b-it"
client = InferenceClient(model)
def fn(
message,
history: list[tuple[str, str]],
#system_message,
max_tokens,
temperature,
top_p,
):
#messages = [{"role": "system", "content": system_message}]
messages = []
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
#messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "bot", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
max_tokens = max_tokens,
temperature = temperature,
top_p = top_p,
stream = True,
):
token = message.choices[0].delta.content
response += token
yield response
app = gr.ChatInterface(
fn = fn,
#type = "messages",
additional_inputs = [
#gr.Textbox(value="You are a helpful assistant.", label="System Message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
],
title = "Google Gemma",
description = model,
)
if __name__ == "__main__":
app.launch()
"""
# Pipeline
import gradio as gr
from transformers import pipeline
pipe = pipeline(model = "google/gemma-2-2b-it")
def fn(input):
output = pipe(
input,
max_new_tokens = 2048
)
return output[0]["generated_text"]#[len(input):]
app = gr.Interface(
fn = fn,
inputs = [gr.Textbox(label = "Input")],
outputs = [gr.Textbox(label = "Output")],
title = "Google Gemma",
description = "Pipeline",
examples = [
["Hello, World."]
]
).launch()
""" |