File size: 3,062 Bytes
58d9279
 
e82a10b
 
c6e8f4b
12fb4a0
 
 
 
 
 
97a4aa1
12fb4a0
 
 
 
97a4aa1
c7bde51
97a4aa1
 
 
c7bde51
97a4aa1
12fb4a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97a4aa1
12fb4a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fbbc23
 
 
2acd9b8
c6e8f4b
1c0b11d
c6e8f4b
 
dc0d5b9
c6e8f4b
 
 
 
dc0d5b9
24ef847
c6e8f4b
 
 
 
 
1c0b11d
 
c6e8f4b
 
 
 
 
 
 
242e31b
 
 
 
c6e8f4b
 
 
 
 
 
242e31b
1c0b11d
8dbe12b
242e31b
65ad07f
2acd9b8
c6e8f4b
2acd9b8
c6e8f4b
242e31b
2acd9b8
85e5d5d
 
c6e8f4b
97a4aa1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# Inference

import gradio as gr
from huggingface_hub import InferenceClient

model = "google/gemma-2-27b-it"
client = InferenceClient(model)

def fn_text(
    prompt,
    history,
    #system_prompt,
    max_tokens,
    temperature,
    top_p,
):
    #messages = [{"role": "system", "content": system_prompt}]
    #history.append(messages[0])
    
    #messages.append({"role": "user", "content": prompt})
    #history.append(messages[1])

    messages = [{"role": "user", "content": prompt}]
    history.append(messages[0])
    
    stream = client.chat.completions.create(
        model = model,
        messages = history,
        max_tokens = max_tokens,
        temperature = temperature,
        top_p = top_p,
        stream = True
    )
    
    chunks = []
    for chunk in stream:
        chunks.append(chunk.choices[0].delta.content or "")
        yield "".join(chunks)

app_text = gr.ChatInterface(
    fn = fn_text,
    type = "messages",
    additional_inputs = [
        #gr.Textbox(value="You are a helpful assistant.", label="System Prompt"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
    ],
    title = "Google Gemma",
    description = model,
)

app = gr.TabbedInterface(
    [app_text],
    ["Text"]
).launch()

#if __name__ == "__main__":
#    app.launch()

"""
# Inference

import gradio as gr
from huggingface_hub import InferenceClient

model = "google/gemma-2-27b-it"
#model = "google/gemma-2-9b-it"
#model = "google/gemma-2-2b-it"
client = InferenceClient(model)

def fn(
    message,
    history: list[tuple[str, str]],
    #system_message,
    max_tokens,
    temperature,
    top_p,
):
    #messages = [{"role": "system", "content": system_message}]
    messages = []

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            #messages.append({"role": "assistant", "content": val[1]})
            messages.append({"role": "bot", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens = max_tokens,
        temperature = temperature,
        top_p = top_p,
        stream = True,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response

app = gr.ChatInterface(
    fn = fn,
    #type = "messages",
    additional_inputs = [
        #gr.Textbox(value="You are a helpful assistant.", label="System Message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
    ],
    title = "Google Gemma",
    description = model,
)

if __name__ == "__main__":
    app.launch()