File size: 2,857 Bytes
4ab8c75
 
dfddde4
de1b844
 
0cec9e8
0329404
 
 
de1b844
60d4859
c6925d2
ae3d933
619bc26
de1b844
 
 
60d4859
 
90b6f18
 
60d4859
0715c3e
60d4859
28944a9
0329404
2619751
de1b844
 
 
7524b2b
28944a9
60d4859
d7c4ce9
28944a9
d7c4ce9
 
dfddde4
60d4859
 
94f6279
de1b844
619bc26
de1b844
 
 
 
 
0329404
de1b844
 
60d4859
 
 
 
 
 
 
 
 
 
 
 
 
 
0329404
60d4859
 
 
 
7524b2b
60d4859
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0329404
60d4859
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# Inference

import gradio as gr
from huggingface_hub import InferenceClient

model_text = "meta-llama/Llama-3.2-3B-Instruct"
model_vision = "meta-llama/Llama-3.2-11B-Vision-Instruct"

client = InferenceClient()

def fn_text(
    prompt,
    history,
    system_prompt,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": [{"type": "text", "text": system_prompt}]}]
    history.append(messages[0])
    
    messages.append({"role": "user", "content": [{"type": "text", "text": prompt}]})
    history.append(messages[1])
    
    stream = client.chat.completions.create(
        model = model_text,
        messages = history,
        max_tokens = max_tokens,
        temperature = temperature,
        top_p = top_p,
        stream = True,
    )
    
    chunks = []
    for chunk in stream:
        chunks.append(chunk.choices[0].delta.content or "")
        yield "".join(chunks)

app_text = gr.ChatInterface(
    fn = fn_text,
    type = "messages",
    additional_inputs = [
        gr.Textbox(value="You are a helpful assistant.", label="System Prompt"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
    ],
    title = "Meta Llama",
    description = model_text,
)

def fn_vision(
    prompt,
    image_url,
    #system_prompt,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "user", "content": [{"type": "text", "text": prompt}]}]
    
    if image_url:
        messages[0]["content"].append({"type": "image_url", "image_url": {"url": image_url}})
    
    stream = client.chat.completions.create(
        model = model_vision,
        messages = messages,
        max_tokens = max_tokens,
        temperature = temperature,
        top_p = top_p,
        stream = True,
    )
    
    chunks = []
    for chunk in stream:
        chunks.append(chunk.choices[0].delta.content or "")
        yield "".join(chunks)

app_vision = gr.Interface(
    fn = fn_vision,
    inputs = [
        gr.Textbox(label="Prompt"),
        gr.Textbox(label="Image URL")
    ],
    outputs = [
        gr.Textbox(label="Output")
    ],
    additional_inputs = [
        #gr.Textbox(value="You are a helpful assistant.", label="System Prompt"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
    ],
    title = "Meta Llama",
    description = model_vision,
)

app = gr.TabbedInterface(
    [app_text, app_vision],
    ["Text", "Vision"]
).launch()

#if __name__ == "__main__":
#    app.launch()