Changing response length
Browse files
app.py
CHANGED
@@ -16,7 +16,7 @@ llm = LlamaCPP(
|
|
16 |
# optionally, you can set the path to a pre-downloaded model instead of model_url
|
17 |
model_path=None,
|
18 |
temperature=0.01,
|
19 |
-
max_new_tokens=
|
20 |
# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
|
21 |
context_window=3900,
|
22 |
# kwargs to pass to __call__()
|
|
|
16 |
# optionally, you can set the path to a pre-downloaded model instead of model_url
|
17 |
model_path=None,
|
18 |
temperature=0.01,
|
19 |
+
max_new_tokens=256, # could be larger but requires more time
|
20 |
# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
|
21 |
context_window=3900,
|
22 |
# kwargs to pass to __call__()
|