llama-cpp-agent

Paused

pabloce commited on May 20, 2024

Commit

98758c3

verified ·

1 Parent(s): b5c263a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ from llama_cpp_agent.providers import LlamaCppPythonProvider
 subprocess.run('pip install llama-cpp-python --no-build-isolation --no-cache-dir --force-reinstall --upgrade', env={'CMAKE_ARGS': "-DLLAMA_CUBLAS=ON", 'FORCE_CMAKE': '1'}, shell=True)
-hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q6_K.gguf")
 @spaces.GPU
 def respond(
@@ -20,7 +20,7 @@ def respond(
     temperature,
     top_p,
 ):
-    llama_model = Llama(r"mistral-7b-instruct-v0.2.Q6_K.gguf", n_batch=1024, n_threads=0,            n_gpu_layers=33, n_ctx=8192, verbose=False)
     provider = LlamaCppPythonProvider(llama_model)

 subprocess.run('pip install llama-cpp-python --no-build-isolation --no-cache-dir --force-reinstall --upgrade', env={'CMAKE_ARGS': "-DLLAMA_CUBLAS=ON", 'FORCE_CMAKE': '1'}, shell=True)
+hf_hub_download(repo_id="TheBloke/Mistral-7B-Instruct-v0.2-GGUF", filename="mistral-7b-instruct-v0.2.Q6_K.gguf",  local_dir = "./models")
 @spaces.GPU
 def respond(
     temperature,
     top_p,
 ):
+    llama_model = Llama(r"models/mistral-7b-instruct-v0.2.Q6_K.gguf", n_batch=1024, n_threads=0,            n_gpu_layers=33, n_ctx=8192, verbose=False)
     provider = LlamaCppPythonProvider(llama_model)