Tijmen2 commited on
Commit
7e9cbb4
·
verified ·
1 Parent(s): 839a5ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -5
app.py CHANGED
@@ -14,11 +14,7 @@ llm = Llama(
14
  n_ctx=2048,
15
  chat_format="llama-3",
16
  n_gpu_layers=-1, # ensure all layers are on GPU
17
- n_threads=1, # no CPU multi-threading
18
- offload_kqv=True, # store kqv on GPU
19
- vocab_only=False,
20
- use_mmap=True,
21
- use_mlock=False,
22
  )
23
 
24
  # Placeholder responses for when context is empty
 
14
  n_ctx=2048,
15
  chat_format="llama-3",
16
  n_gpu_layers=-1, # ensure all layers are on GPU
17
+ split_mode="LLAMA_SPLIT_MODE_NONE",
 
 
 
 
18
  )
19
 
20
  # Placeholder responses for when context is empty