File size: 3,361 Bytes
58d9279
 
 
ec03c70
 
58d9279
f56245e
 
 
 
0451106
 
58d9279
 
 
 
 
e82a10b
58d9279
 
 
 
 
52e622e
58d9279
 
 
 
18160e9
58d9279
 
 
 
 
 
 
0451106
 
f56245e
 
 
e82a10b
 
 
 
 
 
 
 
c6e8f4b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e82a10b
 
5f71f8b
4505847
5f71f8b
 
 
 
 
 
e82a10b
 
5f71f8b
 
 
 
 
 
e82a10b
 
45de1a4
4505847
5f71f8b
e82a10b
 
5f71f8b
 
 
 
 
 
 
 
 
c6e8f4b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
"""
# Inference

import gradio as gr

app = gr.load(
    "google/gemma-2-2b-it",
    src = "models",
    inputs = [gr.Textbox(label = "Input")],
    outputs = [gr.Textbox(label = "Output")],
    title = "Google Gemma",
    description = "Inference",
    examples = [
        ["Hello, World."]
    ]
).launch()
"""
"""
# Pipeline

import gradio as gr
from transformers import pipeline

pipe = pipeline(model = "google/gemma-2-2b-it")

def fn(input):
    output = pipe(
        input,
        max_new_tokens = 2048
    )
    return output[0]["generated_text"]#[len(input):]

app = gr.Interface(
    fn = fn,
    inputs = [gr.Textbox(label = "Input")],
    outputs = [gr.Textbox(label = "Output")],
    title = "Google Gemma",
    description = "Pipeline",
    examples = [
        ["Hello, World."]
    ]
).launch()
"""

import gradio as gr
from huggingface_hub import InferenceClient
import os

hf_token = os.getenv("HF_TOKEN")

client = InferenceClient("google/gemma-2-2b-it")

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)

if __name__ == "__main__":
    demo.launch()

"""
client = InferenceClient(api_key=hf_token)

def fn(prompt, history=[]):
    messages = []
    
    for user_prompt, bot_response in history:
        messages.append({"role": "user", "content": user_prompt})
        messages.append({"role": "bot", "content": bot_response})
    
    messages.append({"role": "user", "content": prompt})

    stream = client.chat.completions.create(
        model = "google/gemma-2-2b-it", 
        messages = messages, 
        #temperature = 0.5,
        #max_tokens = 2048,
        #top_p = 0.7,
        stream = True
    )

    bot_response = "".join(chunk.choices[0].delta.content for chunk in stream)

    history.append((prompt, bot_response))
    return bot_response, history

app = gr.Interface(
    fn = fn, 
    inputs = [gr.Textbox(label = "Input")],
    outputs = [gr.Textbox(label = "Output")],
    title = "Google Gemma",
    description = "Chatbot",
    examples = [
        ["Hello, World."]
    ]
).launch()
"""