Spaces:
Running
on
Zero
Running
on
Zero
add all qwen3 variants
Browse files
app.py
CHANGED
@@ -26,10 +26,14 @@ cancel_event = threading.Event()
|
|
26 |
MODELS = {
|
27 |
"Taiwan-ELM-1_1B-Instruct": {"repo_id": "liswei/Taiwan-ELM-1_1B-Instruct", "description": "Taiwan-ELM-1_1B-Instruct"},
|
28 |
"Taiwan-ELM-270M-Instruct": {"repo_id": "liswei/Taiwan-ELM-270M-Instruct", "description": "Taiwan-ELM-270M-Instruct"},
|
29 |
-
"Qwen3-
|
30 |
-
"Qwen3-
|
31 |
-
"Qwen3-
|
32 |
-
"Qwen3-
|
|
|
|
|
|
|
|
|
33 |
"Gemma-3-4B-IT": {"repo_id": "unsloth/gemma-3-4b-it", "description": "Gemma-3-4B-IT"},
|
34 |
"SmolLM2-135M-Instruct-TaiwanChat": {"repo_id": "Luigi/SmolLM2-135M-Instruct-TaiwanChat", "description": "SmolLM2‑135M Instruct fine-tuned on TaiwanChat"},
|
35 |
"SmolLM2-135M-Instruct": {"repo_id": "HuggingFaceTB/SmolLM2-135M-Instruct", "description": "Original SmolLM2‑135M Instruct"},
|
@@ -66,7 +70,7 @@ def load_pipeline(model_name):
|
|
66 |
tokenizer=tokenizer,
|
67 |
trust_remote_code=True,
|
68 |
torch_dtype=dtype,
|
69 |
-
device_map="
|
70 |
)
|
71 |
PIPELINES[model_name] = pipe
|
72 |
return pipe
|
@@ -78,7 +82,7 @@ def load_pipeline(model_name):
|
|
78 |
model=repo,
|
79 |
tokenizer=tokenizer,
|
80 |
trust_remote_code=True,
|
81 |
-
device_map="
|
82 |
)
|
83 |
PIPELINES[model_name] = pipe
|
84 |
return pipe
|
|
|
26 |
MODELS = {
|
27 |
"Taiwan-ELM-1_1B-Instruct": {"repo_id": "liswei/Taiwan-ELM-1_1B-Instruct", "description": "Taiwan-ELM-1_1B-Instruct"},
|
28 |
"Taiwan-ELM-270M-Instruct": {"repo_id": "liswei/Taiwan-ELM-270M-Instruct", "description": "Taiwan-ELM-270M-Instruct"},
|
29 |
+
"Qwen3-0.6B": {"repo_id":"Qwen/Qwen3-0.6B","description":"Dense causal language model with 0.6 B total parameters (0.44 B non-embedding), 28 transformer layers, 16 query heads & 8 KV heads, native 32 768-token context window, dual-mode generation, full multilingual & agentic capabilities."},
|
30 |
+
"Qwen3-1.7B": {"repo_id":"Qwen/Qwen3-1.7B","description":"Dense causal language model with 1.7 B total parameters (1.4 B non-embedding), 28 layers, 16 query heads & 8 KV heads, 32 768-token context, stronger reasoning vs. 0.6 B variant, dual-mode inference, instruction following across 100+ languages."},
|
31 |
+
"Qwen3-4B": {"repo_id":"Qwen/Qwen3-4B","description":"Dense causal language model with 4.0 B total parameters (3.6 B non-embedding), 36 layers, 32 query heads & 8 KV heads, native 32 768-token context (extendable to 131 072 via YaRN), balanced mid-range capacity & long-context reasoning."},
|
32 |
+
"Qwen3-8B": {"repo_id":"Qwen/Qwen3-8B","description":"Dense causal language model with 8.2 B total parameters (6.95 B non-embedding), 36 layers, 32 query heads & 8 KV heads, 32 768-token context (131 072 via YaRN), excels at multilingual instruction following & zero-shot tasks."},
|
33 |
+
"Qwen3-14B": {"repo_id":"Qwen/Qwen3-14B","description":"Dense causal language model with 14.8 B total parameters (13.2 B non-embedding), 40 layers, 40 query heads & 8 KV heads, 32 768-token context (131 072 via YaRN), enhanced human preference alignment & advanced agent integration."},
|
34 |
+
"Qwen3-32B": {"repo_id":"Qwen/Qwen3-32B","description":"Dense causal language model with 32.8 B total parameters (31.2 B non-embedding), 64 layers, 64 query heads & 8 KV heads, 32 768-token context (131 072 via YaRN), flagship variant delivering state-of-the-art reasoning & instruction following."},
|
35 |
+
"Qwen3-30B-A3B": {"repo_id":"Qwen/Qwen3-30B-A3B","description":"Mixture-of-Experts model with 30.5 B total parameters (29.9 B non-embedding, 3.3 B activated per token), 48 layers, 128 experts (8 activated per token), 32 query heads & 4 KV heads, 32 768-token context (131 072 via YaRN), MoE routing for scalable specialized reasoning."},
|
36 |
+
"Qwen3-235B-A22B":{"repo_id":"Qwen/Qwen3-235B-A22B","description":"Mixture-of-Experts model with 235 B total parameters (234 B non-embedding, 22 B activated per token), 94 layers, 128 experts (8 activated per token), 64 query heads & 4 KV heads, 32 768-token context (131 072 via YaRN), ultra-scale reasoning & agentic workflows."},
|
37 |
"Gemma-3-4B-IT": {"repo_id": "unsloth/gemma-3-4b-it", "description": "Gemma-3-4B-IT"},
|
38 |
"SmolLM2-135M-Instruct-TaiwanChat": {"repo_id": "Luigi/SmolLM2-135M-Instruct-TaiwanChat", "description": "SmolLM2‑135M Instruct fine-tuned on TaiwanChat"},
|
39 |
"SmolLM2-135M-Instruct": {"repo_id": "HuggingFaceTB/SmolLM2-135M-Instruct", "description": "Original SmolLM2‑135M Instruct"},
|
|
|
70 |
tokenizer=tokenizer,
|
71 |
trust_remote_code=True,
|
72 |
torch_dtype=dtype,
|
73 |
+
device_map="xpu"
|
74 |
)
|
75 |
PIPELINES[model_name] = pipe
|
76 |
return pipe
|
|
|
82 |
model=repo,
|
83 |
tokenizer=tokenizer,
|
84 |
trust_remote_code=True,
|
85 |
+
device_map="xpu"
|
86 |
)
|
87 |
PIPELINES[model_name] = pipe
|
88 |
return pipe
|