Akjava commited on
Commit
8c8c1ee
·
verified ·
1 Parent(s): e864d5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -26,17 +26,17 @@ huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
26
  os.makedirs("models",exist_ok=True)
27
 
28
  hf_hub_download(
29
- repo_id="mtsdurica/madlad400-3b-mt-Q8_0-GGUF",
30
- filename="madlad400-3b-mt-q8_0.gguf",
31
  local_dir="./models",
32
  )
33
 
34
  # Set the title and description
35
- title = "madlad400-3b-mt Llama.cpp"
36
  description = """
37
  I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python support t5
38
 
39
- [Model-Q8_0-GGUF](https://huggingface.co/mtsdurica/madlad400-3b-mt-Q8_0-GGUF), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp), [Reference2](https://qiita.com/mbotsu/items/7dd80bc637ff6c12ef6a)
40
  """
41
 
42
 
@@ -80,12 +80,12 @@ def respond(
80
  try:
81
  global llama
82
  if llama == None:
83
- llama = Llama("models/madlad400-3b-mt-q8_0.gguf",flash_attn=False,
84
  n_gpu_layers=0,
85
- n_batch=16,
86
- n_ctx=512,
87
  n_threads=2,
88
- n_threads_batch=8)
89
 
90
  tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
91
  llama.encode(tokens)
@@ -123,6 +123,7 @@ demo = gr.ChatInterface(
123
  value="madlad400-3b-mt-q8_0.gguf",
124
  label="Model",
125
  info="Select the AI model to use for chat",
 
126
  ),
127
  gr.Textbox(
128
  value="You are a helpful assistant.",
 
26
  os.makedirs("models",exist_ok=True)
27
 
28
  hf_hub_download(
29
+ repo_id="AnanyaPathak/t5-query-reformulation-RL-GGUF",
30
+ filename="t5-query-reformulation-RL-q8_0.gguf",
31
  local_dir="./models",
32
  )
33
 
34
  # Set the title and description
35
+ title = "t5-query-reformulation-RL Llama.cpp"
36
  description = """
37
  I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python support t5
38
 
39
+ [Model-Q8_0-GGUF](https://huggingface.co/AnanyaPathak/t5-query-reformulation-RL-GGUF), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp)
40
  """
41
 
42
 
 
80
  try:
81
  global llama
82
  if llama == None:
83
+ llama = Llama("models/t5-query-reformulation-RL-q8_0.gguf",flash_attn=False,
84
  n_gpu_layers=0,
85
+ n_batch=64,
86
+ n_ctx=256,
87
  n_threads=2,
88
+ n_threads_batch=2)
89
 
90
  tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
91
  llama.encode(tokens)
 
123
  value="madlad400-3b-mt-q8_0.gguf",
124
  label="Model",
125
  info="Select the AI model to use for chat",
126
+ visible=False
127
  ),
128
  gr.Textbox(
129
  value="You are a helpful assistant.",