File size: 3,352 Bytes
21b8ce0
13e498a
b5c263a
d8a3c53
606c0ce
40afde6
7418606
2cd3649
ec06a49
0fd9e08
dd0fa82
d8a3c53
69a7f00
 
 
 
 
 
 
 
 
3b38821
d8a3c53
 
 
 
 
 
 
df6c9eb
d8a3c53
40afde6
cf6a52f
 
 
ac70b49
777a931
12a9e25
8e6bf26
ac7a09d
888ea87
8e6bf26
cf6a52f
 
 
 
 
622f877
cf6a52f
8e6bf26
cf6a52f
 
e9aaf81
cf6a52f
ac70b49
 
 
 
777a931
 
 
 
 
 
ac70b49
 
12a9e25
777a931
 
cf6a52f
2cd3649
cf6a52f
559c9c0
8e6bf26
b9838b1
559c9c0
606c0ce
d8a3c53
 
 
606c0ce
2da6f34
d8a3c53
 
 
 
 
 
 
 
dd0fa82
d8a3c53
12a9e25
117cdb1
7a65743
69a7f00
 
 
 
d8a3c53
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import spaces
import json
import subprocess
import gradio as gr
from huggingface_hub import hf_hub_download

subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124', shell=True)
subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True)

hf_hub_download(repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF", filename="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf",  local_dir = "./models")
hf_hub_download(repo_id="bartowski/Llama-3-8B-Synthia-v3.5-GGUF", filename="Llama-3-8B-Synthia-v3.5-f16.gguf",  local_dir = "./models")

css = """
.message-row {
    justify-content: space-evenly;
}
.message .user .message-bubble-border {
    border-radius: 6px;
}
"""

@spaces.GPU(duration=120)
def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    model,
):
    from llama_cpp import Llama
    from llama_cpp_agent import LlamaCppAgent
    from llama_cpp_agent import MessagesFormatterType
    from llama_cpp_agent.providers import LlamaCppPythonProvider
    from llama_cpp_agent.chat_history import BasicChatHistory
    from llama_cpp_agent.chat_history.messages import Roles

    llm = Llama(
        model_path=f"models/{model}",
        n_gpu_layers=81,
    )
    provider = LlamaCppPythonProvider(llm)

    agent = LlamaCppAgent(
        provider,
        system_prompt="You are a helpful assistant.",
        predefined_messages_formatter_type=MessagesFormatterType.LLAMA_3,
        debug_output=True
    )
    
    settings = provider.get_provider_default_settings()
    settings.max_tokens = max_tokens
    settings.stream = True

    messages = BasicChatHistory()

    for msn in history:
        user = {
            'role': Roles.user,
            'content': msn[0]
        }
        assistant = {
            'role': Roles.assistant,
            'content': msn[1]
        }

        messages.add_message(user)
        messages.add_message(assistant)
    
    stream = agent.get_chat_response(message, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True, print_output=False)
    
    outputs = ""
    for output in stream:
        outputs += output
        yield outputs

demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a helpful assistant.", label="System message"),
        gr.Slider(minimum=1, maximum=8192, value=8192, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
        gr.Dropdown(['Meta-Llama-3-70B-Instruct-Q3_K_M.gguf', 'Llama-3-8B-Synthia-v3.5-f16.gguf'], value="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf", label="Model"),
    ],
    theme=gr.themes.Soft(primary_hue="green", secondary_hue="indigo", neutral_hue="zinc",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
        block_background_fill_dark="*neutral_950",
        input_background_fill_dark="*neutral_950",
        message_border_radius="*radius_md",
        border-color-accent-subdued="*neutral_900"
    ),
    css=css
)

if __name__ == "__main__":
    demo.launch()