pabloce commited on
Commit
a15f664
·
verified ·
1 Parent(s): abe9b48

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -8
app.py CHANGED
@@ -9,6 +9,9 @@ from llama_cpp_agent.chat_history.messages import Roles
9
  import gradio as gr
10
  from huggingface_hub import hf_hub_download
11
 
 
 
 
12
  hf_hub_download(
13
  repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF",
14
  filename="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf",
@@ -81,15 +84,21 @@ def respond(
81
  top_k,
82
  repeat_penalty,
83
  ):
 
 
 
84
  chat_template = get_messages_formatter_type(model)
85
-
86
- llm = Llama(
87
- model_path=f"models/{model}",
88
- flash_attn=True,
89
- n_gpu_layers=81,
90
- n_batch=1024,
91
- n_ctx=8192,
92
- )
 
 
 
93
  provider = LlamaCppPythonProvider(llm)
94
 
95
  agent = LlamaCppAgent(
 
9
  import gradio as gr
10
  from huggingface_hub import hf_hub_download
11
 
12
+ llm = None
13
+ llm_model = None
14
+
15
  hf_hub_download(
16
  repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF",
17
  filename="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf",
 
84
  top_k,
85
  repeat_penalty,
86
  ):
87
+ global llm
88
+ global llm_model
89
+
90
  chat_template = get_messages_formatter_type(model)
91
+
92
+ if llm is None or llm_model != model:
93
+ llm = Llama(
94
+ model_path=f"models/{model}",
95
+ flash_attn=True,
96
+ n_gpu_layers=81,
97
+ n_batch=1024,
98
+ n_ctx=8192,
99
+ )
100
+ llm_model = model
101
+
102
  provider = LlamaCppPythonProvider(llm)
103
 
104
  agent = LlamaCppAgent(