Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,7 @@ subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://a
|
|
8 |
subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True)
|
9 |
|
10 |
hf_hub_download(repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF", filename="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf", local_dir = "./models")
|
|
|
11 |
|
12 |
@spaces.GPU(duration=120)
|
13 |
def respond(
|
@@ -17,6 +18,7 @@ def respond(
|
|
17 |
max_tokens,
|
18 |
temperature,
|
19 |
top_p,
|
|
|
20 |
):
|
21 |
from llama_cpp import Llama
|
22 |
from llama_cpp_agent import LlamaCppAgent
|
@@ -24,7 +26,7 @@ def respond(
|
|
24 |
from llama_cpp_agent.providers import LlamaCppPythonProvider
|
25 |
from llama_cpp_agent.chat_history import BasicChatHistory
|
26 |
from llama_cpp_agent.chat_history.messages import Roles
|
27 |
-
|
28 |
llm = Llama(
|
29 |
model_path="models/Meta-Llama-3-70B-Instruct-Q3_K_M.gguf",
|
30 |
n_gpu_layers=81,
|
@@ -70,6 +72,7 @@ def respond(
|
|
70 |
demo = gr.ChatInterface(
|
71 |
respond,
|
72 |
additional_inputs=[
|
|
|
73 |
gr.Textbox(value="You are a helpful assistant.", label="System message"),
|
74 |
gr.Slider(minimum=1, maximum=8192, value=8192, step=1, label="Max new tokens"),
|
75 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|
|
|
8 |
subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True)
|
9 |
|
10 |
hf_hub_download(repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF", filename="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf", local_dir = "./models")
|
11 |
+
hf_hub_download(repo_id="bartowski/Smaug-Llama-3-70B-Instruct-GGUF", filename="Smaug-Llama-3-70B-Instruct-Q3_K_M.gguf", local_dir = "./models")
|
12 |
|
13 |
@spaces.GPU(duration=120)
|
14 |
def respond(
|
|
|
18 |
max_tokens,
|
19 |
temperature,
|
20 |
top_p,
|
21 |
+
model,
|
22 |
):
|
23 |
from llama_cpp import Llama
|
24 |
from llama_cpp_agent import LlamaCppAgent
|
|
|
26 |
from llama_cpp_agent.providers import LlamaCppPythonProvider
|
27 |
from llama_cpp_agent.chat_history import BasicChatHistory
|
28 |
from llama_cpp_agent.chat_history.messages import Roles
|
29 |
+
print(model)
|
30 |
llm = Llama(
|
31 |
model_path="models/Meta-Llama-3-70B-Instruct-Q3_K_M.gguf",
|
32 |
n_gpu_layers=81,
|
|
|
72 |
demo = gr.ChatInterface(
|
73 |
respond,
|
74 |
additional_inputs=[
|
75 |
+
gr.Dropdown(['Meta-Llama-3-70B-Instruct-Q3_K_M.gguf', 'Smaug-Llama-3-70B-Instruct-Q3_K_M.gguf'], label="Model"),
|
76 |
gr.Textbox(value="You are a helpful assistant.", label="System message"),
|
77 |
gr.Slider(minimum=1, maximum=8192, value=8192, step=1, label="Max new tokens"),
|
78 |
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
|