llama-cpp-agent

Paused

pabloce commited on Jul 9, 2024

Commit

a15f664

verified ·

1 Parent(s): abe9b48

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,6 +9,9 @@ from llama_cpp_agent.chat_history.messages import Roles
 import gradio as gr
 from huggingface_hub import hf_hub_download
 hf_hub_download(
     repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF",
     filename="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf",
@@ -81,15 +84,21 @@ def respond(
     top_k,
     repeat_penalty,
 ):
     chat_template = get_messages_formatter_type(model)
-    llm = Llama(
-        model_path=f"models/{model}",
-        flash_attn=True,
-        n_gpu_layers=81,
-        n_batch=1024,
-        n_ctx=8192,
-    )
     provider = LlamaCppPythonProvider(llm)
     agent = LlamaCppAgent(

 import gradio as gr
 from huggingface_hub import hf_hub_download
+llm = None
+llm_model = None
 hf_hub_download(
     repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF",
     filename="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf",
     top_k,
     repeat_penalty,
 ):
+    global llm
+    global llm_model
     chat_template = get_messages_formatter_type(model)
+    if llm is None or llm_model != model:
+        llm = Llama(
+            model_path=f"models/{model}",
+            flash_attn=True,
+            n_gpu_layers=81,
+            n_batch=1024,
+            n_ctx=8192,
+        )
+        llm_model = model
     provider = LlamaCppPythonProvider(llm)
     agent = LlamaCppAgent(