llamacpp-flan-t5-large-grammar-synthesis

Running

Akjava commited on Mar 20

Commit

1074fa0

verified ·

1 Parent(s): 0430bc7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -36,11 +36,11 @@ hf_hub_download(
 # Set the title and description
-title = "t5-query-reformulation-RL Llama.cpp"
 description = """
 I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python server support t5
-[Model-Q8_0-GGUF](https://huggingface.co/AnanyaPathak/t5-query-reformulation-RL-GGUF), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp)
 """
@@ -88,7 +88,7 @@ def respond(
             model_id = "ggml-model-Q6_K.gguf"
             llama = Llama(f"models/{model_id}",flash_attn=False,
                         n_gpu_layers=0,
                         n_threads=2,
                         n_threads_batch=2)
@@ -98,7 +98,7 @@ def respond(
         outputs =""
         iteration = 1
         for i in range(iteration):
-            for token in llama.generate(tokens, top_k=top_k, top_p=top_p, temp=temperature, repeat_penalty=repeat_penalty):
                 outputs+= llama.detokenize([token]).decode()
                 yield outputs
                 if token == llama.token_eos():
@@ -126,9 +126,9 @@ demo = gr.ChatInterface(
     additional_inputs=[
         gr.Dropdown(
             choices=[
-                "madlad400-3b-mt-q8_0.gguf",
             ],
-            value="madlad400-3b-mt-q8_0.gguf",
             label="Model",
             info="Select the AI model to use for chat",
             visible=False

 # Set the title and description
+title = "flan-t5-large-grammar-synthesis Llama.cpp"
 description = """
 I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python server support t5
+[Model-Q6_K-GGUF](flan-t5-large-grammar-synthesis), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp)
 """
             model_id = "ggml-model-Q6_K.gguf"
             llama = Llama(f"models/{model_id}",flash_attn=False,
                         n_gpu_layers=0,
+                        n_ctx=max_tokens
                         n_threads=2,
                         n_threads_batch=2)
         outputs =""
         iteration = 1
         for i in range(iteration):
+            for token in llama.generate(tokens, top_k=top_k, top_p=top_p, temp=temperature, repeat_penalty=repeat_penalty,max_tokens=max_tokens):
                 outputs+= llama.detokenize([token]).decode()
                 yield outputs
                 if token == llama.token_eos():
     additional_inputs=[
         gr.Dropdown(
             choices=[
+                "ggml-model-Q6_K.gguf",
             ],
+            value="ggml-model-Q6_K.gguf",
             label="Model",
             info="Select the AI model to use for chat",
             visible=False