llama-cpp-python-cuda-gradio

Runtime error

radames commited on Aug 20, 2023

Commit

22938c3

1 Parent(s): d4e1d16

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,11 +9,13 @@ from huggingface_hub import hf_hub_download  # load from huggingfaces
 llm = Llama(
     model_path=hf_hub_download(
-        repo_id="TheBloke/WizardLM-7B-uncensored-GGML",
-        filename="WizardLM-7B-uncensored.ggmlv3.q4_0.bin",
     ),
     n_ctx=2048,
-    n_gpu_layers=30
 )  # download model from hf/ n_ctx=2048 for high ccontext length
 history = []

 llm = Llama(
     model_path=hf_hub_download(
+        # repo_id="TheBloke/WizardLM-7B-uncensored-GGML",
+        repo_id="WizardLM-7B-uncensored-GGML",
+        # filename="WizardLM-7B-uncensored.ggmlv3.q4_0.bin",
+        filename="llama-2-7b-chat.ggmlv3.q5_0.bin",
     ),
     n_ctx=2048,
+    n_gpu_layers=50
 )  # download model from hf/ n_ctx=2048 for high ccontext length
 history = []