Akjava commited on
Commit
1074fa0
·
verified ·
1 Parent(s): 0430bc7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -36,11 +36,11 @@ hf_hub_download(
36
 
37
 
38
  # Set the title and description
39
- title = "t5-query-reformulation-RL Llama.cpp"
40
  description = """
41
  I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python server support t5
42
 
43
- [Model-Q8_0-GGUF](https://huggingface.co/AnanyaPathak/t5-query-reformulation-RL-GGUF), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp)
44
  """
45
 
46
 
@@ -88,7 +88,7 @@ def respond(
88
  model_id = "ggml-model-Q6_K.gguf"
89
  llama = Llama(f"models/{model_id}",flash_attn=False,
90
  n_gpu_layers=0,
91
-
92
  n_threads=2,
93
  n_threads_batch=2)
94
 
@@ -98,7 +98,7 @@ def respond(
98
  outputs =""
99
  iteration = 1
100
  for i in range(iteration):
101
- for token in llama.generate(tokens, top_k=top_k, top_p=top_p, temp=temperature, repeat_penalty=repeat_penalty):
102
  outputs+= llama.detokenize([token]).decode()
103
  yield outputs
104
  if token == llama.token_eos():
@@ -126,9 +126,9 @@ demo = gr.ChatInterface(
126
  additional_inputs=[
127
  gr.Dropdown(
128
  choices=[
129
- "madlad400-3b-mt-q8_0.gguf",
130
  ],
131
- value="madlad400-3b-mt-q8_0.gguf",
132
  label="Model",
133
  info="Select the AI model to use for chat",
134
  visible=False
 
36
 
37
 
38
  # Set the title and description
39
+ title = "flan-t5-large-grammar-synthesis Llama.cpp"
40
  description = """
41
  I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python server support t5
42
 
43
+ [Model-Q6_K-GGUF](flan-t5-large-grammar-synthesis), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp)
44
  """
45
 
46
 
 
88
  model_id = "ggml-model-Q6_K.gguf"
89
  llama = Llama(f"models/{model_id}",flash_attn=False,
90
  n_gpu_layers=0,
91
+ n_ctx=max_tokens
92
  n_threads=2,
93
  n_threads_batch=2)
94
 
 
98
  outputs =""
99
  iteration = 1
100
  for i in range(iteration):
101
+ for token in llama.generate(tokens, top_k=top_k, top_p=top_p, temp=temperature, repeat_penalty=repeat_penalty,max_tokens=max_tokens):
102
  outputs+= llama.detokenize([token]).decode()
103
  yield outputs
104
  if token == llama.token_eos():
 
126
  additional_inputs=[
127
  gr.Dropdown(
128
  choices=[
129
+ "ggml-model-Q6_K.gguf",
130
  ],
131
+ value="ggml-model-Q6_K.gguf",
132
  label="Model",
133
  info="Select the AI model to use for chat",
134
  visible=False