File size: 3,431 Bytes
21b8ce0
13e498a
b5c263a
d8a3c53
606c0ce
40afde6
7418606
2cd3649
ec06a49
0fd9e08
dd0fa82
d8a3c53
69a7f00
 
70b9f60
69a7f00
70b9f60
 
69a7f00
79f3ba6
b8ad668
79f3ba6
69a7f00
 
3b38821
d8a3c53
 
 
 
 
 
 
df6c9eb
d8a3c53
40afde6
cf6a52f
 
 
ac70b49
777a931
12a9e25
8e6bf26
ac7a09d
888ea87
8e6bf26
cf6a52f
 
 
 
 
622f877
cf6a52f
8e6bf26
cf6a52f
 
e9aaf81
cf6a52f
ac70b49
 
 
 
777a931
 
 
 
 
 
ac70b49
 
12a9e25
777a931
 
cf6a52f
2cd3649
cf6a52f
559c9c0
8e6bf26
b9838b1
559c9c0
606c0ce
d8a3c53
1197317
d8a3c53
 
606c0ce
2da6f34
d8a3c53
 
 
 
 
 
 
 
dd0fa82
d8a3c53
12a9e25
117cdb1
b3d8b56
905a689
f97343d
 
 
 
d8a3c53
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import spaces
import json
import subprocess
import gradio as gr
from huggingface_hub import hf_hub_download

subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124', shell=True)
subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True)

hf_hub_download(repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF", filename="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf",  local_dir = "./models")
hf_hub_download(repo_id="bartowski/Llama-3-8B-Synthia-v3.5-GGUF", filename="Llama-3-8B-Synthia-v3.5-f16.gguf",  local_dir = "./models")

css = """
.message-row {
    justify-content: space-evenly !important;
}
.message-bubble-border {
    border-radius: 6px !important;
}
.user {
    background: transparent !important;
}
"""

@spaces.GPU(duration=120)
def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    model,
):
    from llama_cpp import Llama
    from llama_cpp_agent import LlamaCppAgent
    from llama_cpp_agent import MessagesFormatterType
    from llama_cpp_agent.providers import LlamaCppPythonProvider
    from llama_cpp_agent.chat_history import BasicChatHistory
    from llama_cpp_agent.chat_history.messages import Roles

    llm = Llama(
        model_path=f"models/{model}",
        n_gpu_layers=81,
    )
    provider = LlamaCppPythonProvider(llm)

    agent = LlamaCppAgent(
        provider,
        system_prompt="You are a helpful assistant.",
        predefined_messages_formatter_type=MessagesFormatterType.LLAMA_3,
        debug_output=True
    )
    
    settings = provider.get_provider_default_settings()
    settings.max_tokens = max_tokens
    settings.stream = True

    messages = BasicChatHistory()

    for msn in history:
        user = {
            'role': Roles.user,
            'content': msn[0]
        }
        assistant = {
            'role': Roles.assistant,
            'content': msn[1]
        }

        messages.add_message(user)
        messages.add_message(assistant)
    
    stream = agent.get_chat_response(message, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True, print_output=False)
    
    outputs = ""
    for output in stream:
        outputs += output
        yield outputs

demo = gr.ChatInterface(
    description="Llama-cpp-agent: Chat Multi llm",
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a helpful assistant.", label="System message"),
        gr.Slider(minimum=1, maximum=8192, value=8192, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
        gr.Dropdown(['Meta-Llama-3-70B-Instruct-Q3_K_M.gguf', 'Llama-3-8B-Synthia-v3.5-f16.gguf'], value="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf", label="Model"),
    ],
    theme=gr.themes.Soft(primary_hue="green", secondary_hue="indigo", neutral_hue="zinc",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
        block_background_fill_dark="*neutral_950",
        input_background_fill_dark="*neutral_950"
    ),
    css=css,
    retry_btn="Retry",
    undo_btn="Undo",
    clear_btn="Clear"
)

if __name__ == "__main__":
    demo.launch()