xsa-dev commited on
Commit
a0ec4ad
·
1 Parent(s): 1c1a36b

answer speed optimizations

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -7,12 +7,12 @@ from huggingface_hub import hf_hub_download # load from huggingfaces
7
  CONST_REPO_ID = "TheBloke/Llama-2-7B-Chat-GGML"
8
  CONST_FILENAME = "llama-2-7b-chat.ggmlv3.q4_1.bin"
9
 
10
- N_CTX = 2048
11
 
12
  llm = Llama(model_path=hf_hub_download(
13
  repo_id=CONST_REPO_ID,
14
  filename=CONST_FILENAME),
15
- n_ctx=2048
16
  )
17
  history = N_CTX
18
 
@@ -29,7 +29,7 @@ def generate_text(input_text, history):
29
  else:
30
  input_text_with_history = f"{history[-1][1]}" + "\n"
31
  input_text_with_history += f"USER: {input_text}" + "\n" + " ASSISTANT:"
32
- output = llm(input_text_with_history, max_tokens=4096, stop=[
33
  "<|prompter|>", "<|endoftext|>", "<|endoftext|> \n",
34
  "ASSISTANT:", "USER:", "SYSTEM:"], stream=True
35
  )
 
7
  CONST_REPO_ID = "TheBloke/Llama-2-7B-Chat-GGML"
8
  CONST_FILENAME = "llama-2-7b-chat.ggmlv3.q4_1.bin"
9
 
10
+ N_CTX = 1024
11
 
12
  llm = Llama(model_path=hf_hub_download(
13
  repo_id=CONST_REPO_ID,
14
  filename=CONST_FILENAME),
15
+ n_ctx=N_CTX
16
  )
17
  history = N_CTX
18
 
 
29
  else:
30
  input_text_with_history = f"{history[-1][1]}" + "\n"
31
  input_text_with_history += f"USER: {input_text}" + "\n" + " ASSISTANT:"
32
+ output = llm(input_text_with_history, max_tokens=1024, stop=[
33
  "<|prompter|>", "<|endoftext|>", "<|endoftext|> \n",
34
  "ASSISTANT:", "USER:", "SYSTEM:"], stream=True
35
  )