llamacpp-flan-t5-large-grammar-synthesis

Sleeping

Akjava commited on Mar 19

Commit

8c8c1ee

verified ·

1 Parent(s): e864d5c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,17 +26,17 @@ huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
 os.makedirs("models",exist_ok=True)
 hf_hub_download(
-    repo_id="mtsdurica/madlad400-3b-mt-Q8_0-GGUF",
-    filename="madlad400-3b-mt-q8_0.gguf",
     local_dir="./models",
 )
 # Set the title and description
-title = "madlad400-3b-mt Llama.cpp"
 description = """
 I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python support t5
-[Model-Q8_0-GGUF](https://huggingface.co/mtsdurica/madlad400-3b-mt-Q8_0-GGUF), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp), [Reference2](https://qiita.com/mbotsu/items/7dd80bc637ff6c12ef6a)
 """
@@ -80,12 +80,12 @@ def respond(
     try:
         global llama
         if llama == None:
-            llama = Llama("models/madlad400-3b-mt-q8_0.gguf",flash_attn=False,
                         n_gpu_layers=0,
-                        n_batch=16,
-                        n_ctx=512,
                         n_threads=2,
-                        n_threads_batch=8)
         tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
         llama.encode(tokens)
@@ -123,6 +123,7 @@ demo = gr.ChatInterface(
             value="madlad400-3b-mt-q8_0.gguf",
             label="Model",
             info="Select the AI model to use for chat",
         ),
         gr.Textbox(
             value="You are a helpful assistant.",

 os.makedirs("models",exist_ok=True)
 hf_hub_download(
+    repo_id="AnanyaPathak/t5-query-reformulation-RL-GGUF",
+    filename="t5-query-reformulation-RL-q8_0.gguf",
     local_dir="./models",
 )
 # Set the title and description
+title = "t5-query-reformulation-RL Llama.cpp"
 description = """
 I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python support t5
+[Model-Q8_0-GGUF](https://huggingface.co/AnanyaPathak/t5-query-reformulation-RL-GGUF), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp)
 """
     try:
         global llama
         if llama == None:
+            llama = Llama("models/t5-query-reformulation-RL-q8_0.gguf",flash_attn=False,
                         n_gpu_layers=0,
+                        n_batch=64,
+                        n_ctx=256,
                         n_threads=2,
+                        n_threads_batch=2)
         tokens = llama.tokenize(f"<2ja>{message}".encode("utf-8"))
         llama.encode(tokens)
             value="madlad400-3b-mt-q8_0.gguf",
             label="Model",
             info="Select the AI model to use for chat",
+            visible=False
         ),
         gr.Textbox(
             value="You are a helpful assistant.",