Akjava commited on
Commit
e496267
·
verified ·
1 Parent(s): e557599

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -283,7 +283,12 @@ def respond(
283
  top_k: int,
284
  repeat_penalty: float,
285
  ):
286
- llama = Llama("models/madlad400-3b-mt-q8_0.gguf")
 
 
 
 
 
287
  #tokens = llama.tokenize(f"<2ja>{message}")#
288
  tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
289
  llama.encode(tokens)
 
283
  top_k: int,
284
  repeat_penalty: float,
285
  ):
286
+ llama = Llama("models/madlad400-3b-mt-q8_0.gguf",flash_attn=False,
287
+ n_gpu_layers=0,
288
+ n_batch=16,
289
+ n_ctx=512,
290
+ n_threads=2,
291
+ n_threads_batch=8,)
292
  #tokens = llama.tokenize(f"<2ja>{message}")#
293
  tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
294
  llama.encode(tokens)