Update app.py
Browse files
app.py
CHANGED
@@ -36,11 +36,11 @@ hf_hub_download(
|
|
36 |
|
37 |
|
38 |
# Set the title and description
|
39 |
-
title = "t5-
|
40 |
description = """
|
41 |
I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python server support t5
|
42 |
|
43 |
-
[Model-
|
44 |
"""
|
45 |
|
46 |
|
@@ -88,7 +88,7 @@ def respond(
|
|
88 |
model_id = "ggml-model-Q6_K.gguf"
|
89 |
llama = Llama(f"models/{model_id}",flash_attn=False,
|
90 |
n_gpu_layers=0,
|
91 |
-
|
92 |
n_threads=2,
|
93 |
n_threads_batch=2)
|
94 |
|
@@ -98,7 +98,7 @@ def respond(
|
|
98 |
outputs =""
|
99 |
iteration = 1
|
100 |
for i in range(iteration):
|
101 |
-
for token in llama.generate(tokens, top_k=top_k, top_p=top_p, temp=temperature, repeat_penalty=repeat_penalty):
|
102 |
outputs+= llama.detokenize([token]).decode()
|
103 |
yield outputs
|
104 |
if token == llama.token_eos():
|
@@ -126,9 +126,9 @@ demo = gr.ChatInterface(
|
|
126 |
additional_inputs=[
|
127 |
gr.Dropdown(
|
128 |
choices=[
|
129 |
-
"
|
130 |
],
|
131 |
-
value="
|
132 |
label="Model",
|
133 |
info="Select the AI model to use for chat",
|
134 |
visible=False
|
|
|
36 |
|
37 |
|
38 |
# Set the title and description
|
39 |
+
title = "flan-t5-large-grammar-synthesis Llama.cpp"
|
40 |
description = """
|
41 |
I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python server support t5
|
42 |
|
43 |
+
[Model-Q6_K-GGUF](flan-t5-large-grammar-synthesis), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp)
|
44 |
"""
|
45 |
|
46 |
|
|
|
88 |
model_id = "ggml-model-Q6_K.gguf"
|
89 |
llama = Llama(f"models/{model_id}",flash_attn=False,
|
90 |
n_gpu_layers=0,
|
91 |
+
n_ctx=max_tokens
|
92 |
n_threads=2,
|
93 |
n_threads_batch=2)
|
94 |
|
|
|
98 |
outputs =""
|
99 |
iteration = 1
|
100 |
for i in range(iteration):
|
101 |
+
for token in llama.generate(tokens, top_k=top_k, top_p=top_p, temp=temperature, repeat_penalty=repeat_penalty,max_tokens=max_tokens):
|
102 |
outputs+= llama.detokenize([token]).decode()
|
103 |
yield outputs
|
104 |
if token == llama.token_eos():
|
|
|
126 |
additional_inputs=[
|
127 |
gr.Dropdown(
|
128 |
choices=[
|
129 |
+
"ggml-model-Q6_K.gguf",
|
130 |
],
|
131 |
+
value="ggml-model-Q6_K.gguf",
|
132 |
label="Model",
|
133 |
info="Select the AI model to use for chat",
|
134 |
visible=False
|