llamacpp-madlad400-3b-mt-2jp

Running

Akjava commited on Mar 19

Commit

e594c8e

verified ·

1 Parent(s): 663a9c3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -34,9 +34,9 @@ hf_hub_download(
 # Set the title and description
 title = "madlad400-3b-mt Llama.cpp"
 description = """
-I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5) I'm not sure current llama-cpp-python support t5
-[Model-Q8_0-GGUF](https://huggingface.co/mtsdurica/madlad400-3b-mt-Q8_0-GGUF) [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp) [Reference2](https://qiita.com/mbotsu/items/7dd80bc637ff6c12ef6a)
 """
@@ -82,10 +82,10 @@ def respond(
         if llama == None:
             llama = Llama("models/madlad400-3b-mt-q8_0.gguf",flash_attn=False,
                         n_gpu_layers=0,
-                        n_batch=32,
                         n_ctx=512,
                         n_threads=2,
-                        n_threads_batch=16)
         tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
         llama.encode(tokens)

 # Set the title and description
 title = "madlad400-3b-mt Llama.cpp"
 description = """
+I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python support t5
+[Model-Q8_0-GGUF](https://huggingface.co/mtsdurica/madlad400-3b-mt-Q8_0-GGUF), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp), [Reference2](https://qiita.com/mbotsu/items/7dd80bc637ff6c12ef6a)
 """
         if llama == None:
             llama = Llama("models/madlad400-3b-mt-q8_0.gguf",flash_attn=False,
                         n_gpu_layers=0,
+                        n_batch=16,
                         n_ctx=512,
                         n_threads=2,
+                        n_threads_batch=8)
         tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
         llama.encode(tokens)