update
Browse files
README.md
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
---
|
2 |
-
title: Llamacpp-
|
3 |
-
emoji: ⚡
|
4 |
colorFrom: indigo
|
5 |
colorTo: blue
|
6 |
sdk: gradio
|
@@ -8,7 +7,7 @@ sdk_version: 5.20.1
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
-
short_description:
|
12 |
---
|
13 |
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Llamacpp-t5-query-reformulation-RL
|
|
|
3 |
colorFrom: indigo
|
4 |
colorTo: blue
|
5 |
sdk: gradio
|
|
|
7 |
app_file: app.py
|
8 |
pinned: false
|
9 |
license: mit
|
10 |
+
short_description: t5-query-reformulation-RL on Llama.cpp CPU
|
11 |
---
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -25,16 +25,20 @@ from exception import CustomExceptionHandling
|
|
25 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
26 |
os.makedirs("models",exist_ok=True)
|
27 |
|
|
|
|
|
28 |
hf_hub_download(
|
29 |
repo_id="AnanyaPathak/t5-query-reformulation-RL-GGUF",
|
30 |
filename="t5-query-reformulation-RL-q8_0.gguf",
|
31 |
local_dir="./models",
|
32 |
)
|
33 |
|
|
|
|
|
34 |
# Set the title and description
|
35 |
title = "t5-query-reformulation-RL Llama.cpp"
|
36 |
description = """
|
37 |
-
I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python support t5
|
38 |
|
39 |
[Model-Q8_0-GGUF](https://huggingface.co/AnanyaPathak/t5-query-reformulation-RL-GGUF), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp)
|
40 |
"""
|
@@ -77,16 +81,18 @@ def respond(
|
|
77 |
Returns:
|
78 |
str: The response to the message.
|
79 |
"""
|
|
|
80 |
try:
|
81 |
global llama
|
82 |
if llama == None:
|
83 |
-
|
|
|
84 |
n_gpu_layers=0,
|
85 |
-
n_batch=
|
86 |
n_ctx=512,
|
87 |
n_threads=2,
|
88 |
n_threads_batch=2)
|
89 |
-
|
90 |
tokens = llama.tokenize(f"{message}".encode("utf-8"))
|
91 |
llama.encode(tokens)
|
92 |
tokens = [llama.decoder_start_token()]
|
@@ -114,7 +120,7 @@ def respond(
|
|
114 |
# Create a chat interface
|
115 |
demo = gr.ChatInterface(
|
116 |
respond,
|
117 |
-
examples=[["What is the capital of France?"], ["
|
118 |
additional_inputs_accordion=gr.Accordion(
|
119 |
label="⚙️ Parameters", open=False, render=False
|
120 |
),
|
@@ -140,12 +146,12 @@ demo = gr.ChatInterface(
|
|
140 |
value=1024,
|
141 |
step=1,
|
142 |
label="Max Tokens",
|
143 |
-
info="Maximum length of response (higher = longer replies)",
|
144 |
),
|
145 |
gr.Slider(
|
146 |
minimum=0.1,
|
147 |
maximum=2.0,
|
148 |
-
value=0.
|
149 |
step=0.1,
|
150 |
label="Temperature",
|
151 |
info="Creativity level (higher = more creative, lower = more focused)",
|
|
|
25 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
26 |
os.makedirs("models",exist_ok=True)
|
27 |
|
28 |
+
|
29 |
+
|
30 |
hf_hub_download(
|
31 |
repo_id="AnanyaPathak/t5-query-reformulation-RL-GGUF",
|
32 |
filename="t5-query-reformulation-RL-q8_0.gguf",
|
33 |
local_dir="./models",
|
34 |
)
|
35 |
|
36 |
+
|
37 |
+
|
38 |
# Set the title and description
|
39 |
title = "t5-query-reformulation-RL Llama.cpp"
|
40 |
description = """
|
41 |
+
I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python server support t5
|
42 |
|
43 |
[Model-Q8_0-GGUF](https://huggingface.co/AnanyaPathak/t5-query-reformulation-RL-GGUF), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp)
|
44 |
"""
|
|
|
81 |
Returns:
|
82 |
str: The response to the message.
|
83 |
"""
|
84 |
+
|
85 |
try:
|
86 |
global llama
|
87 |
if llama == None:
|
88 |
+
model_id = "t5-query-reformulation-RL-q8_0.gguf"
|
89 |
+
llama = Llama(f"models/{model_id}",flash_attn=False,
|
90 |
n_gpu_layers=0,
|
91 |
+
#n_batch=16,#batch sometime make error
|
92 |
n_ctx=512,
|
93 |
n_threads=2,
|
94 |
n_threads_batch=2)
|
95 |
+
|
96 |
tokens = llama.tokenize(f"{message}".encode("utf-8"))
|
97 |
llama.encode(tokens)
|
98 |
tokens = [llama.decoder_start_token()]
|
|
|
120 |
# Create a chat interface
|
121 |
demo = gr.ChatInterface(
|
122 |
respond,
|
123 |
+
examples=[["What is the capital of France?"], ["What real child was raised by wolves?"], ["What is gravity?"]],
|
124 |
additional_inputs_accordion=gr.Accordion(
|
125 |
label="⚙️ Parameters", open=False, render=False
|
126 |
),
|
|
|
146 |
value=1024,
|
147 |
step=1,
|
148 |
label="Max Tokens",
|
149 |
+
info="Maximum length of response (higher = longer replies)",visible=False
|
150 |
),
|
151 |
gr.Slider(
|
152 |
minimum=0.1,
|
153 |
maximum=2.0,
|
154 |
+
value=0.4,
|
155 |
step=0.1,
|
156 |
label="Temperature",
|
157 |
info="Creativity level (higher = more creative, lower = more focused)",
|