Spaces:

Luigi
/

ZeroGPU-LLM-Inference

Running

Luigi commited on Apr 10

Commit

56919fd

1 Parent(s): f746942

increase context length to 2048

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ hf_hub_download(
 def load_model():
     return Llama(
         model_path="models/qwen2.5-7b-instruct-q2_k.gguf",
-        n_ctx=1024,
         n_threads=2,
         n_threads_batch=2,
         n_batch=4,

 def load_model():
     return Llama(
         model_path="models/qwen2.5-7b-instruct-q2_k.gguf",
+        n_ctx=2048,
         n_threads=2,
         n_threads_batch=2,
         n_batch=4,