Spaces:

Mattral
/

RAG-bot

Sleeping

Mattral commited on May 10, 2024

Commit

a074633

verified ·

1 Parent(s): d7252e0

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -84,11 +84,11 @@ def create_chain(_retriever):
     # responses in real time.
     # callback_manager = CallbackManager([stream_handler])
-    n_gpu_layers = 40  # Change this value based on your model and your GPU VRAM pool.
-    n_batch = 2048  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
     llm = LlamaCpp(
-            model_path="models /mistral-7b-instruct-v0.1.Q5_0.gguf",
             n_gpu_layers=n_gpu_layers,
             n_batch=n_batch,
             n_ctx=2048,

     # responses in real time.
     # callback_manager = CallbackManager([stream_handler])
+    n_gpu_layers = 10  # Change this value based on your model and your GPU VRAM pool.
+    n_batch = 1024  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
     llm = LlamaCpp(
+            model_path="models/mistral-7b-instruct-v0.1.Q5_0.gguf",
             n_gpu_layers=n_gpu_layers,
             n_batch=n_batch,
             n_ctx=2048,