Update app.py
Browse files
app.py
CHANGED
@@ -84,11 +84,11 @@ def create_chain(_retriever):
|
|
84 |
# responses in real time.
|
85 |
# callback_manager = CallbackManager([stream_handler])
|
86 |
|
87 |
-
n_gpu_layers =
|
88 |
-
n_batch =
|
89 |
|
90 |
llm = LlamaCpp(
|
91 |
-
model_path="models
|
92 |
n_gpu_layers=n_gpu_layers,
|
93 |
n_batch=n_batch,
|
94 |
n_ctx=2048,
|
|
|
84 |
# responses in real time.
|
85 |
# callback_manager = CallbackManager([stream_handler])
|
86 |
|
87 |
+
n_gpu_layers = 10 # Change this value based on your model and your GPU VRAM pool.
|
88 |
+
n_batch = 1024 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
89 |
|
90 |
llm = LlamaCpp(
|
91 |
+
model_path="models/mistral-7b-instruct-v0.1.Q5_0.gguf",
|
92 |
n_gpu_layers=n_gpu_layers,
|
93 |
n_batch=n_batch,
|
94 |
n_ctx=2048,
|