Spaces:

Mattral
/

RAG-bot

Sleeping

Mattral commited on May 13, 2024

Commit

df5d76d

verified ·

1 Parent(s): 7d30515

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -88,8 +88,8 @@ def create_chain(_retriever):
     # responses in real time.
     # callback_manager = CallbackManager([stream_handler])
-    n_gpu_layers = 10  # Change this value based on your model and your GPU VRAM pool.
-    n_batch = 1024  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
     llm = LlamaCpp(
             model_path="models/mistral-7b-instruct-v0.1.Q5_0.gguf",

     # responses in real time.
     # callback_manager = CallbackManager([stream_handler])
+    n_gpu_layers = 5  # Change this value based on your model and your GPU VRAM pool.
+    n_batch = 512  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
     llm = LlamaCpp(
             model_path="models/mistral-7b-instruct-v0.1.Q5_0.gguf",