winglian commited on
Commit
4cc03d2
·
1 Parent(s): 0a981aa

fix chat history, update settings to use GPU

Browse files
Files changed (2) hide show
  1. chat.py +1 -1
  2. config.yml +1 -0
chat.py CHANGED
@@ -38,7 +38,7 @@ def chat(history, system_message):
38
  history[-1][1] = ""
39
  for output in llm(messages, max_tokens=512, stop=["</s>", "<unk>", "### User:"], echo=False, stream=True):
40
  answer = output['choices'][0]['text']
41
- history[-1][1] = answer
42
 
43
  yield history, history
44
 
 
38
  history[-1][1] = ""
39
  for output in llm(messages, max_tokens=512, stop=["</s>", "<unk>", "### User:"], echo=False, stream=True):
40
  answer = output['choices'][0]['text']
41
+ history[-1][1] += answer
42
 
43
  yield history, history
44
 
config.yml CHANGED
@@ -5,3 +5,4 @@ file: wizard-vicuna-13B.ggml.q5_1.bin
5
  base_model: junelee/wizard-vicuna-13b
6
  llama_cpp:
7
  n_ctx: 1024
 
 
5
  base_model: junelee/wizard-vicuna-13b
6
  llama_cpp:
7
  n_ctx: 1024
8
+ n_gpu_layers: 40 # llama 13b has 40 layers