Akjava commited on
Commit
0430bc7
·
verified ·
1 Parent(s): 094f26e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -7
app.py CHANGED
@@ -28,8 +28,8 @@ os.makedirs("models",exist_ok=True)
28
 
29
 
30
  hf_hub_download(
31
- repo_id="AnanyaPathak/t5-query-reformulation-RL-GGUF",
32
- filename="t5-query-reformulation-RL-q8_0.gguf",
33
  local_dir="./models",
34
  )
35
 
@@ -85,11 +85,10 @@ def respond(
85
  try:
86
  global llama
87
  if llama == None:
88
- model_id = "t5-query-reformulation-RL-q8_0.gguf"
89
  llama = Llama(f"models/{model_id}",flash_attn=False,
90
  n_gpu_layers=0,
91
- #n_batch=16,#batch sometime make error
92
- n_ctx=512,
93
  n_threads=2,
94
  n_threads_batch=2)
95
 
@@ -97,14 +96,14 @@ def respond(
97
  llama.encode(tokens)
98
  tokens = [llama.decoder_start_token()]
99
  outputs =""
100
- iteration = 5
101
  for i in range(iteration):
102
  for token in llama.generate(tokens, top_k=top_k, top_p=top_p, temp=temperature, repeat_penalty=repeat_penalty):
103
  outputs+= llama.detokenize([token]).decode()
104
  yield outputs
105
  if token == llama.token_eos():
106
  break
107
- outputs+="\n"
108
  return outputs
109
  except Exception as e:
110
  # Custom exception handling
 
28
 
29
 
30
  hf_hub_download(
31
+ repo_id="pszemraj/flan-t5-large-grammar-synthesis",
32
+ filename="ggml-model-Q6_K.gguf",
33
  local_dir="./models",
34
  )
35
 
 
85
  try:
86
  global llama
87
  if llama == None:
88
+ model_id = "ggml-model-Q6_K.gguf"
89
  llama = Llama(f"models/{model_id}",flash_attn=False,
90
  n_gpu_layers=0,
91
+
 
92
  n_threads=2,
93
  n_threads_batch=2)
94
 
 
96
  llama.encode(tokens)
97
  tokens = [llama.decoder_start_token()]
98
  outputs =""
99
+ iteration = 1
100
  for i in range(iteration):
101
  for token in llama.generate(tokens, top_k=top_k, top_p=top_p, temp=temperature, repeat_penalty=repeat_penalty):
102
  outputs+= llama.detokenize([token]).decode()
103
  yield outputs
104
  if token == llama.token_eos():
105
  break
106
+ #outputs+="\n"
107
  return outputs
108
  except Exception as e:
109
  # Custom exception handling