Update app.py
Browse files
app.py
CHANGED
|
@@ -26,17 +26,17 @@ huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
|
| 26 |
os.makedirs("models",exist_ok=True)
|
| 27 |
|
| 28 |
hf_hub_download(
|
| 29 |
-
repo_id="
|
| 30 |
-
filename="
|
| 31 |
local_dir="./models",
|
| 32 |
)
|
| 33 |
|
| 34 |
# Set the title and description
|
| 35 |
-
title = "
|
| 36 |
description = """
|
| 37 |
I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python support t5
|
| 38 |
|
| 39 |
-
[Model-Q8_0-GGUF](https://huggingface.co/
|
| 40 |
"""
|
| 41 |
|
| 42 |
|
|
@@ -80,12 +80,12 @@ def respond(
|
|
| 80 |
try:
|
| 81 |
global llama
|
| 82 |
if llama == None:
|
| 83 |
-
llama = Llama("models/
|
| 84 |
n_gpu_layers=0,
|
| 85 |
-
n_batch=
|
| 86 |
-
n_ctx=
|
| 87 |
n_threads=2,
|
| 88 |
-
n_threads_batch=
|
| 89 |
|
| 90 |
tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
|
| 91 |
llama.encode(tokens)
|
|
@@ -123,6 +123,7 @@ demo = gr.ChatInterface(
|
|
| 123 |
value="madlad400-3b-mt-q8_0.gguf",
|
| 124 |
label="Model",
|
| 125 |
info="Select the AI model to use for chat",
|
|
|
|
| 126 |
),
|
| 127 |
gr.Textbox(
|
| 128 |
value="You are a helpful assistant.",
|
|
|
|
| 26 |
os.makedirs("models",exist_ok=True)
|
| 27 |
|
| 28 |
hf_hub_download(
|
| 29 |
+
repo_id="AnanyaPathak/t5-query-reformulation-RL-GGUF",
|
| 30 |
+
filename="t5-query-reformulation-RL-q8_0.gguf",
|
| 31 |
local_dir="./models",
|
| 32 |
)
|
| 33 |
|
| 34 |
# Set the title and description
|
| 35 |
+
title = "t5-query-reformulation-RL Llama.cpp"
|
| 36 |
description = """
|
| 37 |
I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python support t5
|
| 38 |
|
| 39 |
+
[Model-Q8_0-GGUF](https://huggingface.co/AnanyaPathak/t5-query-reformulation-RL-GGUF), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp)
|
| 40 |
"""
|
| 41 |
|
| 42 |
|
|
|
|
| 80 |
try:
|
| 81 |
global llama
|
| 82 |
if llama == None:
|
| 83 |
+
llama = Llama("models/t5-query-reformulation-RL-q8_0.gguf",flash_attn=False,
|
| 84 |
n_gpu_layers=0,
|
| 85 |
+
n_batch=64,
|
| 86 |
+
n_ctx=256,
|
| 87 |
n_threads=2,
|
| 88 |
+
n_threads_batch=2)
|
| 89 |
|
| 90 |
tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
|
| 91 |
llama.encode(tokens)
|
|
|
|
| 123 |
value="madlad400-3b-mt-q8_0.gguf",
|
| 124 |
label="Model",
|
| 125 |
info="Select the AI model to use for chat",
|
| 126 |
+
visible=False
|
| 127 |
),
|
| 128 |
gr.Textbox(
|
| 129 |
value="You are a helpful assistant.",
|