Changing response length
Browse files
app.py
CHANGED
|
@@ -16,7 +16,7 @@ llm = LlamaCPP(
|
|
| 16 |
# optionally, you can set the path to a pre-downloaded model instead of model_url
|
| 17 |
model_path=None,
|
| 18 |
temperature=0.01,
|
| 19 |
-
max_new_tokens=
|
| 20 |
# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
|
| 21 |
context_window=3900,
|
| 22 |
# kwargs to pass to __call__()
|
|
|
|
| 16 |
# optionally, you can set the path to a pre-downloaded model instead of model_url
|
| 17 |
model_path=None,
|
| 18 |
temperature=0.01,
|
| 19 |
+
max_new_tokens=256, # could be larger but requires more time
|
| 20 |
# llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
|
| 21 |
context_window=3900,
|
| 22 |
# kwargs to pass to __call__()
|