Update app.py
Browse files
app.py
CHANGED
|
@@ -88,8 +88,8 @@ def create_chain(_retriever):
|
|
| 88 |
# responses in real time.
|
| 89 |
# callback_manager = CallbackManager([stream_handler])
|
| 90 |
|
| 91 |
-
n_gpu_layers =
|
| 92 |
-
n_batch =
|
| 93 |
|
| 94 |
llm = LlamaCpp(
|
| 95 |
model_path="models/mistral-7b-instruct-v0.1.Q5_0.gguf",
|
|
|
|
| 88 |
# responses in real time.
|
| 89 |
# callback_manager = CallbackManager([stream_handler])
|
| 90 |
|
| 91 |
+
n_gpu_layers = 5 # Change this value based on your model and your GPU VRAM pool.
|
| 92 |
+
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
| 93 |
|
| 94 |
llm = LlamaCpp(
|
| 95 |
model_path="models/mistral-7b-instruct-v0.1.Q5_0.gguf",
|