pabloce commited on
Commit
0824852
·
verified ·
1 Parent(s): cad08b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -3
app.py CHANGED
@@ -36,6 +36,8 @@ def respond(
36
  max_tokens,
37
  temperature,
38
  top_p,
 
 
39
  model,
40
  ):
41
  from llama_cpp import Llama
@@ -47,10 +49,11 @@ def respond(
47
 
48
  llm = Llama(
49
  model_path=f"models/{model}",
 
50
  n_gpu_layers=81,
51
  n_ctx=8192,
52
  )
53
- provider = LlamaCppPythonProvider(llm)
54
 
55
  agent = LlamaCppAgent(
56
  provider,
@@ -89,14 +92,28 @@ demo = gr.ChatInterface(
89
  respond,
90
  additional_inputs=[
91
  gr.Textbox(value="You are a helpful assistant.", label="System message"),
92
- gr.Slider(minimum=1, maximum=8192, value=8192, step=1, label="Max new tokens"),
93
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
94
  gr.Slider(
95
  minimum=0.1,
96
  maximum=1.0,
97
  value=0.95,
98
  step=0.05,
99
- label="Top-p (nucleus sampling)",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  ),
101
  gr.Dropdown(['Meta-Llama-3-70B-Instruct-Q3_K_M.gguf', 'Llama-3-8B-Synthia-v3.5-f16.gguf'], value="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf", label="Model"),
102
  ],
 
36
  max_tokens,
37
  temperature,
38
  top_p,
39
+ top_k,
40
+ repeat_penalty,
41
  model,
42
  ):
43
  from llama_cpp import Llama
 
49
 
50
  llm = Llama(
51
  model_path=f"models/{model}",
52
+ flash_attn=True,
53
  n_gpu_layers=81,
54
  n_ctx=8192,
55
  )
56
+ provider = LlamaCppPythonProvider(llm, temperature, top_k, top_p, max_tokens, repeat_penalty)
57
 
58
  agent = LlamaCppAgent(
59
  provider,
 
92
  respond,
93
  additional_inputs=[
94
  gr.Textbox(value="You are a helpful assistant.", label="System message"),
95
+ gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
96
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
97
  gr.Slider(
98
  minimum=0.1,
99
  maximum=1.0,
100
  value=0.95,
101
  step=0.05,
102
+ label="Top-p",
103
+ ),
104
+ gr.Slider(
105
+ minimum=0,
106
+ maximum=100,
107
+ value=40,
108
+ step=1,
109
+ label="Top-k",
110
+ ),
111
+ gr.Slider(
112
+ minimum=0.0,
113
+ maximum=2.0,
114
+ value=1.1,
115
+ step=0.1,
116
+ label="Repetition penalty",
117
  ),
118
  gr.Dropdown(['Meta-Llama-3-70B-Instruct-Q3_K_M.gguf', 'Llama-3-8B-Synthia-v3.5-f16.gguf'], value="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf", label="Model"),
119
  ],