Spaces:

Luigi
/

ZeroGPU-LLM-Inference

Running on Zero

Luigi commited on Apr 9

Commit

88b1f39

1 Parent(s): b5ee7db

switch to 3b q2 model

Files changed (2) hide show

README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-title: Qwen2.5 1.5B Instruct Llama.cpp
 emoji: 🌍
 colorFrom: pink
 colorTo: purple
@@ -8,7 +8,7 @@ sdk_version: 1.44.1
 app_file: app.py
 pinned: false
 license: apache-2.0
-short_description: Run Qwen2.5-1.5B on Llama.cpp
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Qwen2.5 3B Instruct Llama.cpp
 emoji: 🌍
 colorFrom: pink
 colorTo: purple
 app_file: app.py
 pinned: false
 license: apache-2.0
+short_description: Run Qwen2.5-3B on Llama.cpp
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -3,8 +3,8 @@ from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 hf_hub_download(
-    repo_id="Qwen/Qwen2.5-1.5B-Instruct-GGUF",
-    filename="qwen2.5-1.5b-instruct-q4_k_m.gguf",
     local_dir="./models",
 )
@@ -12,7 +12,7 @@ hf_hub_download(
 @st.cache_resource
 def load_model():
     return Llama(
-        model_path="models/qwen2.5-1.5b-instruct-q4_k_m.gguf",
         n_ctx=1024,
         n_threads=2,
         n_threads_batch=2,
@@ -29,8 +29,8 @@ llm = load_model()
 if "chat_history" not in st.session_state:
     st.session_state.chat_history = []
-st.title("🧠 Qwen2.5-1.5B-Instruct (Streamlit + GGUF)")
-st.caption("Powered by `llama.cpp` and `llama-cpp-python` | 4-bit Q4_K_M inference")
 with st.sidebar:
     st.header("⚙️ Settings")

 from huggingface_hub import hf_hub_download
 hf_hub_download(
+    repo_id="Qwen/Qwen2.5-3B-Instruct-GGUF",
+    filename="qwen2.5-3b-instruct-q2_k.gguf",
     local_dir="./models",
 )
 @st.cache_resource
 def load_model():
     return Llama(
+        model_path="models/qwen2.5-3b-instruct-q2_k.gguf",
         n_ctx=1024,
         n_threads=2,
         n_threads_batch=2,
 if "chat_history" not in st.session_state:
     st.session_state.chat_history = []
+st.title("🧠 Qwen2.5-3B-Instruct (Streamlit + GGUF)")
+st.caption("Powered by `llama.cpp` and `llama-cpp-python` | 2-bit Q2_K inference")
 with st.sidebar:
     st.header("⚙️ Settings")