Spaces:
Running
on
Zero
Running
on
Zero
switch to 3b q2 model
Browse files
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
---
|
2 |
-
title: Qwen2.5
|
3 |
emoji: 🌍
|
4 |
colorFrom: pink
|
5 |
colorTo: purple
|
@@ -8,7 +8,7 @@ sdk_version: 1.44.1
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
11 |
-
short_description: Run Qwen2.5-
|
12 |
---
|
13 |
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: Qwen2.5 3B Instruct Llama.cpp
|
3 |
emoji: 🌍
|
4 |
colorFrom: pink
|
5 |
colorTo: purple
|
|
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: apache-2.0
|
11 |
+
short_description: Run Qwen2.5-3B on Llama.cpp
|
12 |
---
|
13 |
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -3,8 +3,8 @@ from llama_cpp import Llama
|
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
|
5 |
hf_hub_download(
|
6 |
-
repo_id="Qwen/Qwen2.5-
|
7 |
-
filename="qwen2.5-
|
8 |
local_dir="./models",
|
9 |
)
|
10 |
|
@@ -12,7 +12,7 @@ hf_hub_download(
|
|
12 |
@st.cache_resource
|
13 |
def load_model():
|
14 |
return Llama(
|
15 |
-
model_path="models/qwen2.5-
|
16 |
n_ctx=1024,
|
17 |
n_threads=2,
|
18 |
n_threads_batch=2,
|
@@ -29,8 +29,8 @@ llm = load_model()
|
|
29 |
if "chat_history" not in st.session_state:
|
30 |
st.session_state.chat_history = []
|
31 |
|
32 |
-
st.title("🧠 Qwen2.5-
|
33 |
-
st.caption("Powered by `llama.cpp` and `llama-cpp-python` |
|
34 |
|
35 |
with st.sidebar:
|
36 |
st.header("⚙️ Settings")
|
|
|
3 |
from huggingface_hub import hf_hub_download
|
4 |
|
5 |
hf_hub_download(
|
6 |
+
repo_id="Qwen/Qwen2.5-3B-Instruct-GGUF",
|
7 |
+
filename="qwen2.5-3b-instruct-q2_k.gguf",
|
8 |
local_dir="./models",
|
9 |
)
|
10 |
|
|
|
12 |
@st.cache_resource
|
13 |
def load_model():
|
14 |
return Llama(
|
15 |
+
model_path="models/qwen2.5-3b-instruct-q2_k.gguf",
|
16 |
n_ctx=1024,
|
17 |
n_threads=2,
|
18 |
n_threads_batch=2,
|
|
|
29 |
if "chat_history" not in st.session_state:
|
30 |
st.session_state.chat_history = []
|
31 |
|
32 |
+
st.title("🧠 Qwen2.5-3B-Instruct (Streamlit + GGUF)")
|
33 |
+
st.caption("Powered by `llama.cpp` and `llama-cpp-python` | 2-bit Q2_K inference")
|
34 |
|
35 |
with st.sidebar:
|
36 |
st.header("⚙️ Settings")
|