Spaces:

xsa-dev
/

llama2-7b-llama_cpp-ggmlv3-q4_1

Runtime error

xsa-dev commited on Aug 14, 2023

Commit

a0ec4ad

1 Parent(s): 1c1a36b

answer speed optimizations

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,12 +7,12 @@ from huggingface_hub import hf_hub_download  # load from huggingfaces
 CONST_REPO_ID = "TheBloke/Llama-2-7B-Chat-GGML"
 CONST_FILENAME = "llama-2-7b-chat.ggmlv3.q4_1.bin"
-N_CTX = 2048
 llm = Llama(model_path=hf_hub_download(
     repo_id=CONST_REPO_ID,
     filename=CONST_FILENAME),
-    n_ctx=2048
 )
 history = N_CTX
@@ -29,7 +29,7 @@ def generate_text(input_text, history):
     else:
         input_text_with_history = f"{history[-1][1]}" + "\n"
         input_text_with_history += f"USER: {input_text}" + "\n" + " ASSISTANT:"
-    output = llm(input_text_with_history, max_tokens=4096, stop=[
         "<|prompter|>", "<|endoftext|>", "<|endoftext|> \n",
         "ASSISTANT:", "USER:", "SYSTEM:"], stream=True
     )

 CONST_REPO_ID = "TheBloke/Llama-2-7B-Chat-GGML"
 CONST_FILENAME = "llama-2-7b-chat.ggmlv3.q4_1.bin"
+N_CTX = 1024
 llm = Llama(model_path=hf_hub_download(
     repo_id=CONST_REPO_ID,
     filename=CONST_FILENAME),
+    n_ctx=N_CTX
 )
 history = N_CTX
     else:
         input_text_with_history = f"{history[-1][1]}" + "\n"
         input_text_with_history += f"USER: {input_text}" + "\n" + " ASSISTANT:"
+    output = llm(input_text_with_history, max_tokens=1024, stop=[
         "<|prompter|>", "<|endoftext|>", "<|endoftext|> \n",
         "ASSISTANT:", "USER:", "SYSTEM:"], stream=True
     )