Update app.py
Browse files
app.py
CHANGED
@@ -283,7 +283,12 @@ def respond(
|
|
283 |
top_k: int,
|
284 |
repeat_penalty: float,
|
285 |
):
|
286 |
-
llama = Llama("models/madlad400-3b-mt-q8_0.gguf"
|
|
|
|
|
|
|
|
|
|
|
287 |
#tokens = llama.tokenize(f"<2ja>{message}")#
|
288 |
tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
|
289 |
llama.encode(tokens)
|
|
|
283 |
top_k: int,
|
284 |
repeat_penalty: float,
|
285 |
):
|
286 |
+
llama = Llama("models/madlad400-3b-mt-q8_0.gguf",flash_attn=False,
|
287 |
+
n_gpu_layers=0,
|
288 |
+
n_batch=16,
|
289 |
+
n_ctx=512,
|
290 |
+
n_threads=2,
|
291 |
+
n_threads_batch=8,)
|
292 |
#tokens = llama.tokenize(f"<2ja>{message}")#
|
293 |
tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
|
294 |
llama.encode(tokens)
|