Update app.py
Browse files
app.py
CHANGED
@@ -28,8 +28,8 @@ os.makedirs("models",exist_ok=True)
|
|
28 |
|
29 |
|
30 |
hf_hub_download(
|
31 |
-
repo_id="
|
32 |
-
filename="
|
33 |
local_dir="./models",
|
34 |
)
|
35 |
|
@@ -85,11 +85,10 @@ def respond(
|
|
85 |
try:
|
86 |
global llama
|
87 |
if llama == None:
|
88 |
-
model_id = "
|
89 |
llama = Llama(f"models/{model_id}",flash_attn=False,
|
90 |
n_gpu_layers=0,
|
91 |
-
|
92 |
-
n_ctx=512,
|
93 |
n_threads=2,
|
94 |
n_threads_batch=2)
|
95 |
|
@@ -97,14 +96,14 @@ def respond(
|
|
97 |
llama.encode(tokens)
|
98 |
tokens = [llama.decoder_start_token()]
|
99 |
outputs =""
|
100 |
-
iteration =
|
101 |
for i in range(iteration):
|
102 |
for token in llama.generate(tokens, top_k=top_k, top_p=top_p, temp=temperature, repeat_penalty=repeat_penalty):
|
103 |
outputs+= llama.detokenize([token]).decode()
|
104 |
yield outputs
|
105 |
if token == llama.token_eos():
|
106 |
break
|
107 |
-
outputs+="\n"
|
108 |
return outputs
|
109 |
except Exception as e:
|
110 |
# Custom exception handling
|
|
|
28 |
|
29 |
|
30 |
hf_hub_download(
|
31 |
+
repo_id="pszemraj/flan-t5-large-grammar-synthesis",
|
32 |
+
filename="ggml-model-Q6_K.gguf",
|
33 |
local_dir="./models",
|
34 |
)
|
35 |
|
|
|
85 |
try:
|
86 |
global llama
|
87 |
if llama == None:
|
88 |
+
model_id = "ggml-model-Q6_K.gguf"
|
89 |
llama = Llama(f"models/{model_id}",flash_attn=False,
|
90 |
n_gpu_layers=0,
|
91 |
+
|
|
|
92 |
n_threads=2,
|
93 |
n_threads_batch=2)
|
94 |
|
|
|
96 |
llama.encode(tokens)
|
97 |
tokens = [llama.decoder_start_token()]
|
98 |
outputs =""
|
99 |
+
iteration = 1
|
100 |
for i in range(iteration):
|
101 |
for token in llama.generate(tokens, top_k=top_k, top_p=top_p, temp=temperature, repeat_penalty=repeat_penalty):
|
102 |
outputs+= llama.detokenize([token]).decode()
|
103 |
yield outputs
|
104 |
if token == llama.token_eos():
|
105 |
break
|
106 |
+
#outputs+="\n"
|
107 |
return outputs
|
108 |
except Exception as e:
|
109 |
# Custom exception handling
|