Spaces:

richardskimco
/

meta-llama

Sleeping

File size: 2,920 Bytes

dfddde4
de1b844
e174db6
 
de1b844
1c18296
 
0329404
 
 
de1b844
60d4859
c6925d2
ae3d933
8ae4bcc
de1b844
 
 
60d4859
e174db6
8ae4bcc
90b6f18
 
60d4859
0715c3e
60d4859
28944a9
0329404
2619751
de1b844
 
 
7524b2b
28944a9
60d4859
d7c4ce9
28944a9
d7c4ce9
 
dfddde4
60d4859
 
94f6279
de1b844
8ae4bcc
de1b844
 
 
 
 
0329404
de1b844
 
60d4859
 
 
 
 
 
 
 
 
 
 
 
 
 
0329404
60d4859
 
 
 
7524b2b
60d4859
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0329404
60d4859

import gradio as gr
from huggingface_hub import InferenceClient

# Inference

#model_text = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
model_text = "meta-llama/Llama-3.2-3B-Instruct"
model_vision = "meta-llama/Llama-3.2-11B-Vision-Instruct"

client = InferenceClient()

def fn_text(
    prompt,
    history,
    system_prompt,
    max_tokens,
    temperature,
    top_p,
):
    
    messages = [{"role": "system", "content": [{"type": "text", "text": system_prompt}]}]
    history.append(messages[0])
    
    messages.append({"role": "user", "content": [{"type": "text", "text": prompt}]})
    history.append(messages[1])
    
    stream = client.chat.completions.create(
        model = model_text,
        messages = history,
        max_tokens = max_tokens,
        temperature = temperature,
        top_p = top_p,
        stream = True,
    )
    
    chunks = []
    for chunk in stream:
        chunks.append(chunk.choices[0].delta.content or "")
        yield "".join(chunks)

app_text = gr.ChatInterface(
    fn = fn_text,
    type = "messages",
    additional_inputs = [
        gr.Textbox(value="You are a helpful assistant.", label="System Prompt"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
    ],
    title = "Meta Llama",
    description = model_text,
)

def fn_vision(
    prompt,
    image_url,
    #system_prompt,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "user", "content": [{"type": "text", "text": prompt}]}]
    
    if image_url:
        messages[0]["content"].append({"type": "image_url", "image_url": {"url": image_url}})
    
    stream = client.chat.completions.create(
        model = model_vision,
        messages = messages,
        max_tokens = max_tokens,
        temperature = temperature,
        top_p = top_p,
        stream = True,
    )
    
    chunks = []
    for chunk in stream:
        chunks.append(chunk.choices[0].delta.content or "")
        yield "".join(chunks)

app_vision = gr.Interface(
    fn = fn_vision,
    inputs = [
        gr.Textbox(label="Prompt"),
        gr.Textbox(label="Image URL")
    ],
    outputs = [
        gr.Textbox(label="Output")
    ],
    additional_inputs = [
        #gr.Textbox(value="You are a helpful assistant.", label="System Prompt"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
    ],
    title = "Meta Llama",
    description = model_vision,
)

app = gr.TabbedInterface(
    [app_text, app_vision],
    ["Text", "Vision"]
).launch()

#if __name__ == "__main__":
#    app.launch()