Luigi commited on
Commit
88b1f39
·
1 Parent(s): b5ee7db

switch to 3b q2 model

Browse files
Files changed (2) hide show
  1. README.md +2 -2
  2. app.py +5 -5
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Qwen2.5 1.5B Instruct Llama.cpp
3
  emoji: 🌍
4
  colorFrom: pink
5
  colorTo: purple
@@ -8,7 +8,7 @@ sdk_version: 1.44.1
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
- short_description: Run Qwen2.5-1.5B on Llama.cpp
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Qwen2.5 3B Instruct Llama.cpp
3
  emoji: 🌍
4
  colorFrom: pink
5
  colorTo: purple
 
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
11
+ short_description: Run Qwen2.5-3B on Llama.cpp
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -3,8 +3,8 @@ from llama_cpp import Llama
3
  from huggingface_hub import hf_hub_download
4
 
5
  hf_hub_download(
6
- repo_id="Qwen/Qwen2.5-1.5B-Instruct-GGUF",
7
- filename="qwen2.5-1.5b-instruct-q4_k_m.gguf",
8
  local_dir="./models",
9
  )
10
 
@@ -12,7 +12,7 @@ hf_hub_download(
12
  @st.cache_resource
13
  def load_model():
14
  return Llama(
15
- model_path="models/qwen2.5-1.5b-instruct-q4_k_m.gguf",
16
  n_ctx=1024,
17
  n_threads=2,
18
  n_threads_batch=2,
@@ -29,8 +29,8 @@ llm = load_model()
29
  if "chat_history" not in st.session_state:
30
  st.session_state.chat_history = []
31
 
32
- st.title("🧠 Qwen2.5-1.5B-Instruct (Streamlit + GGUF)")
33
- st.caption("Powered by `llama.cpp` and `llama-cpp-python` | 4-bit Q4_K_M inference")
34
 
35
  with st.sidebar:
36
  st.header("⚙️ Settings")
 
3
  from huggingface_hub import hf_hub_download
4
 
5
  hf_hub_download(
6
+ repo_id="Qwen/Qwen2.5-3B-Instruct-GGUF",
7
+ filename="qwen2.5-3b-instruct-q2_k.gguf",
8
  local_dir="./models",
9
  )
10
 
 
12
  @st.cache_resource
13
  def load_model():
14
  return Llama(
15
+ model_path="models/qwen2.5-3b-instruct-q2_k.gguf",
16
  n_ctx=1024,
17
  n_threads=2,
18
  n_threads_batch=2,
 
29
  if "chat_history" not in st.session_state:
30
  st.session_state.chat_history = []
31
 
32
+ st.title("🧠 Qwen2.5-3B-Instruct (Streamlit + GGUF)")
33
+ st.caption("Powered by `llama.cpp` and `llama-cpp-python` | 2-bit Q2_K inference")
34
 
35
  with st.sidebar:
36
  st.header("⚙️ Settings")