Spaces:

richardskimco
/

meta-llama

Sleeping

File size: 3,023 Bytes

dfddde4
de1b844
e174db6
0329404
de1b844
7d5b9b1
 
 
 
c6925d2
ae3d933
8ae4bcc
de1b844
 
 
60d4859
e174db6
7d5b9b1
8ae4bcc
90b6f18
60d4859
0715c3e
60d4859
28944a9
7d5b9b1
2619751
de1b844
 
 
7524b2b
28944a9
60d4859
d7c4ce9
28944a9
d7c4ce9
 
dfddde4
dbd34c4
7d5b9b1
94f6279
de1b844
8ae4bcc
de1b844
 
 
 
7d5b9b1
 
de1b844
 
7d5b9b1
 
 
 
60d4859
 
 
 
 
 
 
 
7d5b9b1
 
60d4859
 
 
 
7d5b9b1
60d4859
 
 
 
7524b2b
60d4859
 
 
 
 
 
 
7d5b9b1
 
60d4859
 
 
 
 
 
 
 
 
 
 
 
 
7d5b9b1
 
60d4859
 
 
7d5b9b1

import gradio as gr
from huggingface_hub import InferenceClient

client = InferenceClient()

# Llama 3 - Text
model_llama_3_text = "meta-llama/Llama-3.2-3B-Instruct"

def fn_llama_3_text(
    prompt,
    history,
    system_prompt,
    max_tokens,
    temperature,
    top_p,
):
    
    # With System Prompt
    messages = [{"role": "system", "content": [{"type": "text", "text": system_prompt}]}]
    history.append(messages[0])
    messages.append({"role": "user", "content": [{"type": "text", "text": prompt}]})
    history.append(messages[1])
    
    stream = client.chat.completions.create(
        model = model_llama_3_text,
        messages = history,
        max_tokens = max_tokens,
        temperature = temperature,
        top_p = top_p,
        stream = True,
    )
    
    chunks = []
    for chunk in stream:
        chunks.append(chunk.choices[0].delta.content or "")
        yield "".join(chunks)

app_llama_3_text = gr.ChatInterface(
    fn = fn_llama_3_text,
    type = "messages",
    additional_inputs = [
        gr.Textbox(value="You are a helpful assistant.", label="System Prompt"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
    ],
    title = "Meta Llama 3",
    description = model_llama_3_text,
)

# Llama 3 - Vision
model_llama_3_vision = "meta-llama/Llama-3.2-11B-Vision-Instruct"

def fn_llama_3_vision(
    prompt,
    image_url,
    #system_prompt,
    max_tokens,
    temperature,
    top_p,
):
    
    # Without System Prompt
    messages = [{"role": "user", "content": [{"type": "text", "text": prompt}]}]
    if image_url:
        messages[0]["content"].append({"type": "image_url", "image_url": {"url": image_url}})
    
    stream = client.chat.completions.create(
        model = model_llama_3_vision,
        messages = messages,
        max_tokens = max_tokens,
        temperature = temperature,
        top_p = top_p,
        stream = True,
    )
    
    chunks = []
    for chunk in stream:
        chunks.append(chunk.choices[0].delta.content or "")
        yield "".join(chunks)

app_llama_3_vision = gr.Interface(
    fn = fn_llama_3_vision,
    inputs = [
        gr.Textbox(label="Prompt"),
        gr.Textbox(label="Image URL")
    ],
    outputs = [
        gr.Textbox(label="Output")
    ],
    additional_inputs = [
        #gr.Textbox(value="You are a helpful assistant.", label="System Prompt"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-P"),
    ],
    title = "Meta Llama 3",
    description = model_llama_3_vision,
)

app = gr.TabbedInterface(
    [app_llama_3_text, app_llama_3_vision],
    ["Llama 3 - Text", "Llama 3 - Vision"]
).launch()