Update app.py
Browse files
app.py
CHANGED
|
@@ -50,10 +50,12 @@ def respond(
|
|
| 50 |
llm = Llama(
|
| 51 |
model_path=f"models/{model}",
|
| 52 |
flash_attn=True,
|
|
|
|
| 53 |
n_gpu_layers=81,
|
|
|
|
| 54 |
n_ctx=8192,
|
| 55 |
)
|
| 56 |
-
provider = LlamaCppPythonProvider(llm
|
| 57 |
|
| 58 |
agent = LlamaCppAgent(
|
| 59 |
provider,
|
|
@@ -63,7 +65,11 @@ def respond(
|
|
| 63 |
)
|
| 64 |
|
| 65 |
settings = provider.get_provider_default_settings()
|
|
|
|
|
|
|
|
|
|
| 66 |
settings.max_tokens = max_tokens
|
|
|
|
| 67 |
settings.stream = True
|
| 68 |
|
| 69 |
messages = BasicChatHistory()
|
|
|
|
| 50 |
llm = Llama(
|
| 51 |
model_path=f"models/{model}",
|
| 52 |
flash_attn=True,
|
| 53 |
+
n_threads=4,
|
| 54 |
n_gpu_layers=81,
|
| 55 |
+
n_batch=1024,
|
| 56 |
n_ctx=8192,
|
| 57 |
)
|
| 58 |
+
provider = LlamaCppPythonProvider(llm)
|
| 59 |
|
| 60 |
agent = LlamaCppAgent(
|
| 61 |
provider,
|
|
|
|
| 65 |
)
|
| 66 |
|
| 67 |
settings = provider.get_provider_default_settings()
|
| 68 |
+
settings.temperature = temperature
|
| 69 |
+
settings.top_k = top_k
|
| 70 |
+
settings.top_p = top_p
|
| 71 |
settings.max_tokens = max_tokens
|
| 72 |
+
settings.repeat_penalty = repeat_penalty
|
| 73 |
settings.stream = True
|
| 74 |
|
| 75 |
messages = BasicChatHistory()
|