File size: 3,370 Bytes
58d9279
 
e82a10b
 
c6e8f4b
12fb4a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fbbc23
 
 
2acd9b8
c6e8f4b
1c0b11d
c6e8f4b
 
dc0d5b9
c6e8f4b
 
 
 
dc0d5b9
24ef847
c6e8f4b
 
 
 
 
1c0b11d
 
c6e8f4b
 
 
 
 
 
 
242e31b
 
 
 
c6e8f4b
 
 
 
 
 
242e31b
1c0b11d
8dbe12b
242e31b
65ad07f
2acd9b8
c6e8f4b
2acd9b8
c6e8f4b
242e31b
2acd9b8
85e5d5d
 
c6e8f4b
85e5d5d
 
242e31b
e82a10b
242e31b
 
e82a10b
242e31b
4505847
242e31b
 
 
 
 
 
e82a10b
5f71f8b
242e31b
5f71f8b
 
 
242e31b
5f71f8b
 
 
c6e8f4b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# Inference

import gradio as gr
from huggingface_hub import InferenceClient

model = "google/gemma-2-27b-it"
client = InferenceClient(model)

def fn_text(
    prompt,
    history,
    #system_prompt,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "user", "content": prompt}]
    history.append(messages[0])
    
    stream = client.chat.completions.create(
        model = model,
        messages = history,
        max_tokens = max_tokens,
        temperature = temperature,
        top_p = top_p,
        stream = True
    )
    
    chunks = []
    for chunk in stream:
        chunks.append(chunk.choices[0].delta.content or "")
        yield "".join(chunks)

app_text = gr.ChatInterface(
    fn = fn_text,
    type = "messages",
    additional_inputs = [
        #gr.Textbox(value="You are a helpful assistant.", label="System Prompt"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
    ],
    title = "Google Gemma",
    description = model,
)

app = gr.TabbedInterface(
    [app_text],
    ["Text"]
).launch()

#if __name__ == "__main__":
#    app.launch()

"""
# Inference

import gradio as gr
from huggingface_hub import InferenceClient

model = "google/gemma-2-27b-it"
#model = "google/gemma-2-9b-it"
#model = "google/gemma-2-2b-it"
client = InferenceClient(model)

def fn(
    message,
    history: list[tuple[str, str]],
    #system_message,
    max_tokens,
    temperature,
    top_p,
):
    #messages = [{"role": "system", "content": system_message}]
    messages = []

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            #messages.append({"role": "assistant", "content": val[1]})
            messages.append({"role": "bot", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens = max_tokens,
        temperature = temperature,
        top_p = top_p,
        stream = True,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response

app = gr.ChatInterface(
    fn = fn,
    #type = "messages",
    additional_inputs = [
        #gr.Textbox(value="You are a helpful assistant.", label="System Message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
    ],
    title = "Google Gemma",
    description = model,
)

if __name__ == "__main__":
    app.launch()

# Pipeline

import gradio as gr
from transformers import pipeline

pipe = pipeline(model = "google/gemma-2-2b-it")

def fn(input):
    output = pipe(
        input,
        max_new_tokens = 2048
    )
    return output[0]["generated_text"]#[len(input):]

app = gr.Interface(
    fn = fn,
    inputs = [gr.Textbox(label = "Input")],
    outputs = [gr.Textbox(label = "Output")],
    title = "Google Gemma",
    description = "Pipeline",
    examples = [
        ["Hello, World."]
    ]
).launch()
"""