File size: 3,352 Bytes
21b8ce0 13e498a b5c263a d8a3c53 606c0ce 40afde6 7418606 2cd3649 ec06a49 0fd9e08 dd0fa82 d8a3c53 69a7f00 3b38821 d8a3c53 df6c9eb d8a3c53 40afde6 cf6a52f ac70b49 777a931 12a9e25 8e6bf26 ac7a09d 888ea87 8e6bf26 cf6a52f 622f877 cf6a52f 8e6bf26 cf6a52f e9aaf81 cf6a52f ac70b49 777a931 ac70b49 12a9e25 777a931 cf6a52f 2cd3649 cf6a52f 559c9c0 8e6bf26 b9838b1 559c9c0 606c0ce d8a3c53 606c0ce 2da6f34 d8a3c53 dd0fa82 d8a3c53 12a9e25 117cdb1 7a65743 69a7f00 d8a3c53 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import spaces
import json
import subprocess
import gradio as gr
from huggingface_hub import hf_hub_download
subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124', shell=True)
subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True)
hf_hub_download(repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF", filename="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf", local_dir = "./models")
hf_hub_download(repo_id="bartowski/Llama-3-8B-Synthia-v3.5-GGUF", filename="Llama-3-8B-Synthia-v3.5-f16.gguf", local_dir = "./models")
css = """
.message-row {
justify-content: space-evenly;
}
.message .user .message-bubble-border {
border-radius: 6px;
}
"""
@spaces.GPU(duration=120)
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
model,
):
from llama_cpp import Llama
from llama_cpp_agent import LlamaCppAgent
from llama_cpp_agent import MessagesFormatterType
from llama_cpp_agent.providers import LlamaCppPythonProvider
from llama_cpp_agent.chat_history import BasicChatHistory
from llama_cpp_agent.chat_history.messages import Roles
llm = Llama(
model_path=f"models/{model}",
n_gpu_layers=81,
)
provider = LlamaCppPythonProvider(llm)
agent = LlamaCppAgent(
provider,
system_prompt="You are a helpful assistant.",
predefined_messages_formatter_type=MessagesFormatterType.LLAMA_3,
debug_output=True
)
settings = provider.get_provider_default_settings()
settings.max_tokens = max_tokens
settings.stream = True
messages = BasicChatHistory()
for msn in history:
user = {
'role': Roles.user,
'content': msn[0]
}
assistant = {
'role': Roles.assistant,
'content': msn[1]
}
messages.add_message(user)
messages.add_message(assistant)
stream = agent.get_chat_response(message, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True, print_output=False)
outputs = ""
for output in stream:
outputs += output
yield outputs
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a helpful assistant.", label="System message"),
gr.Slider(minimum=1, maximum=8192, value=8192, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
gr.Dropdown(['Meta-Llama-3-70B-Instruct-Q3_K_M.gguf', 'Llama-3-8B-Synthia-v3.5-f16.gguf'], value="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf", label="Model"),
],
theme=gr.themes.Soft(primary_hue="green", secondary_hue="indigo", neutral_hue="zinc",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
block_background_fill_dark="*neutral_950",
input_background_fill_dark="*neutral_950",
message_border_radius="*radius_md",
border-color-accent-subdued="*neutral_900"
),
css=css
)
if __name__ == "__main__":
demo.launch() |