Spaces:

Luigi
/

ZeroGPU-LLM-Inference

Runtime error

Luigi commited on Apr 9

Commit

4443d46

1 Parent(s): 4d633ef

adjust thread numbers

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,9 +13,10 @@ hf_hub_download(
 def load_model():
     return Llama(
         model_path="models/qwen2.5-1.5b-instruct-q4_k_m.gguf",
-        n_ctx=2048,
-        n_threads=6,
-        n_batch=8,
         n_gpu_layers=0,
         use_mlock=False,
         use_mmap=True,

 def load_model():
     return Llama(
         model_path="models/qwen2.5-1.5b-instruct-q4_k_m.gguf",
+        n_ctx=1024,
+        n_threads=2,
+        n_threads_batch=2,
+        n_batch=4,
         n_gpu_layers=0,
         use_mlock=False,
         use_mmap=True,