Akjava commited on
Commit
8ce032d
·
verified ·
1 Parent(s): 33572bc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -7
app.py CHANGED
@@ -82,8 +82,8 @@ def respond(
82
  if llama == None:
83
  llama = Llama("models/t5-query-reformulation-RL-q8_0.gguf",flash_attn=False,
84
  n_gpu_layers=0,
85
- n_batch=64,
86
- n_ctx=256,
87
  n_threads=2,
88
  n_threads_batch=2)
89
 
@@ -91,11 +91,14 @@ def respond(
91
  llama.encode(tokens)
92
  tokens = [llama.decoder_start_token()]
93
  outputs =""
94
- for token in llama.generate(tokens, top_k=top_k, top_p=top_p, temp=temperature, repeat_penalty=repeat_penalty):
95
- outputs+= llama.detokenize([token]).decode()
96
- yield outputs
97
- if token == llama.token_eos():
98
- break
 
 
 
99
  return outputs
100
  except Exception as e:
101
  # Custom exception handling
 
82
  if llama == None:
83
  llama = Llama("models/t5-query-reformulation-RL-q8_0.gguf",flash_attn=False,
84
  n_gpu_layers=0,
85
+ n_batch=32,
86
+ n_ctx=512,
87
  n_threads=2,
88
  n_threads_batch=2)
89
 
 
91
  llama.encode(tokens)
92
  tokens = [llama.decoder_start_token()]
93
  outputs =""
94
+ iteration = 5
95
+ for i in range(iteration):
96
+ for token in llama.generate(tokens, top_k=top_k, top_p=top_p, temp=temperature, repeat_penalty=repeat_penalty):
97
+ outputs+= llama.detokenize([token]).decode()
98
+ yield outputs
99
+ if token == llama.token_eos():
100
+ break
101
+ outputs+="\n"
102
  return outputs
103
  except Exception as e:
104
  # Custom exception handling