Update app.py
Browse files
app.py
CHANGED
@@ -26,17 +26,17 @@ huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
|
26 |
os.makedirs("models",exist_ok=True)
|
27 |
|
28 |
hf_hub_download(
|
29 |
-
repo_id="
|
30 |
-
filename="
|
31 |
local_dir="./models",
|
32 |
)
|
33 |
|
34 |
# Set the title and description
|
35 |
-
title = "
|
36 |
description = """
|
37 |
I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python support t5
|
38 |
|
39 |
-
[Model-Q8_0-GGUF](https://huggingface.co/
|
40 |
"""
|
41 |
|
42 |
|
@@ -80,12 +80,12 @@ def respond(
|
|
80 |
try:
|
81 |
global llama
|
82 |
if llama == None:
|
83 |
-
llama = Llama("models/
|
84 |
n_gpu_layers=0,
|
85 |
-
n_batch=
|
86 |
-
n_ctx=
|
87 |
n_threads=2,
|
88 |
-
n_threads_batch=
|
89 |
|
90 |
tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
|
91 |
llama.encode(tokens)
|
@@ -123,6 +123,7 @@ demo = gr.ChatInterface(
|
|
123 |
value="madlad400-3b-mt-q8_0.gguf",
|
124 |
label="Model",
|
125 |
info="Select the AI model to use for chat",
|
|
|
126 |
),
|
127 |
gr.Textbox(
|
128 |
value="You are a helpful assistant.",
|
|
|
26 |
os.makedirs("models",exist_ok=True)
|
27 |
|
28 |
hf_hub_download(
|
29 |
+
repo_id="AnanyaPathak/t5-query-reformulation-RL-GGUF",
|
30 |
+
filename="t5-query-reformulation-RL-q8_0.gguf",
|
31 |
local_dir="./models",
|
32 |
)
|
33 |
|
34 |
# Set the title and description
|
35 |
+
title = "t5-query-reformulation-RL Llama.cpp"
|
36 |
description = """
|
37 |
I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python support t5
|
38 |
|
39 |
+
[Model-Q8_0-GGUF](https://huggingface.co/AnanyaPathak/t5-query-reformulation-RL-GGUF), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp)
|
40 |
"""
|
41 |
|
42 |
|
|
|
80 |
try:
|
81 |
global llama
|
82 |
if llama == None:
|
83 |
+
llama = Llama("models/t5-query-reformulation-RL-q8_0.gguf",flash_attn=False,
|
84 |
n_gpu_layers=0,
|
85 |
+
n_batch=64,
|
86 |
+
n_ctx=256,
|
87 |
n_threads=2,
|
88 |
+
n_threads_batch=2)
|
89 |
|
90 |
tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
|
91 |
llama.encode(tokens)
|
|
|
123 |
value="madlad400-3b-mt-q8_0.gguf",
|
124 |
label="Model",
|
125 |
info="Select the AI model to use for chat",
|
126 |
+
visible=False
|
127 |
),
|
128 |
gr.Textbox(
|
129 |
value="You are a helpful assistant.",
|