Update app.py
Browse files
app.py
CHANGED
@@ -34,9 +34,9 @@ hf_hub_download(
|
|
34 |
# Set the title and description
|
35 |
title = "madlad400-3b-mt Llama.cpp"
|
36 |
description = """
|
37 |
-
I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5) I'm not sure current llama-cpp-python support t5
|
38 |
|
39 |
-
[Model-Q8_0-GGUF](https://huggingface.co/mtsdurica/madlad400-3b-mt-Q8_0-GGUF) [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp) [Reference2](https://qiita.com/mbotsu/items/7dd80bc637ff6c12ef6a)
|
40 |
"""
|
41 |
|
42 |
|
@@ -82,10 +82,10 @@ def respond(
|
|
82 |
if llama == None:
|
83 |
llama = Llama("models/madlad400-3b-mt-q8_0.gguf",flash_attn=False,
|
84 |
n_gpu_layers=0,
|
85 |
-
n_batch=
|
86 |
n_ctx=512,
|
87 |
n_threads=2,
|
88 |
-
n_threads_batch=
|
89 |
|
90 |
tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
|
91 |
llama.encode(tokens)
|
|
|
34 |
# Set the title and description
|
35 |
title = "madlad400-3b-mt Llama.cpp"
|
36 |
description = """
|
37 |
+
I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python support t5
|
38 |
|
39 |
+
[Model-Q8_0-GGUF](https://huggingface.co/mtsdurica/madlad400-3b-mt-Q8_0-GGUF), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp), [Reference2](https://qiita.com/mbotsu/items/7dd80bc637ff6c12ef6a)
|
40 |
"""
|
41 |
|
42 |
|
|
|
82 |
if llama == None:
|
83 |
llama = Llama("models/madlad400-3b-mt-q8_0.gguf",flash_attn=False,
|
84 |
n_gpu_layers=0,
|
85 |
+
n_batch=16,
|
86 |
n_ctx=512,
|
87 |
n_threads=2,
|
88 |
+
n_threads_batch=8)
|
89 |
|
90 |
tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
|
91 |
llama.encode(tokens)
|