Akjava commited on
Commit
e594c8e
·
verified ·
1 Parent(s): 663a9c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -34,9 +34,9 @@ hf_hub_download(
34
  # Set the title and description
35
  title = "madlad400-3b-mt Llama.cpp"
36
  description = """
37
- I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5) I'm not sure current llama-cpp-python support t5
38
 
39
- [Model-Q8_0-GGUF](https://huggingface.co/mtsdurica/madlad400-3b-mt-Q8_0-GGUF) [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp) [Reference2](https://qiita.com/mbotsu/items/7dd80bc637ff6c12ef6a)
40
  """
41
 
42
 
@@ -82,10 +82,10 @@ def respond(
82
  if llama == None:
83
  llama = Llama("models/madlad400-3b-mt-q8_0.gguf",flash_attn=False,
84
  n_gpu_layers=0,
85
- n_batch=32,
86
  n_ctx=512,
87
  n_threads=2,
88
- n_threads_batch=16)
89
 
90
  tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
91
  llama.encode(tokens)
 
34
  # Set the title and description
35
  title = "madlad400-3b-mt Llama.cpp"
36
  description = """
37
+ I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python support t5
38
 
39
+ [Model-Q8_0-GGUF](https://huggingface.co/mtsdurica/madlad400-3b-mt-Q8_0-GGUF), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp), [Reference2](https://qiita.com/mbotsu/items/7dd80bc637ff6c12ef6a)
40
  """
41
 
42
 
 
82
  if llama == None:
83
  llama = Llama("models/madlad400-3b-mt-q8_0.gguf",flash_attn=False,
84
  n_gpu_layers=0,
85
+ n_batch=16,
86
  n_ctx=512,
87
  n_threads=2,
88
+ n_threads_batch=8)
89
 
90
  tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
91
  llama.encode(tokens)