Tijmen2 commited on
Commit
839a5ef
·
verified ·
1 Parent(s): 086415c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -0
app.py CHANGED
@@ -15,6 +15,10 @@ llm = Llama(
15
  chat_format="llama-3",
16
  n_gpu_layers=-1, # ensure all layers are on GPU
17
  n_threads=1, # no CPU multi-threading
 
 
 
 
18
  )
19
 
20
  # Placeholder responses for when context is empty
 
15
  chat_format="llama-3",
16
  n_gpu_layers=-1, # ensure all layers are on GPU
17
  n_threads=1, # no CPU multi-threading
18
+ offload_kqv=True, # store kqv on GPU
19
+ vocab_only=False,
20
+ use_mmap=True,
21
+ use_mlock=False,
22
  )
23
 
24
  # Placeholder responses for when context is empty