Spaces:
Sleeping
Sleeping
Commit
·
a60cacc
1
Parent(s):
9eefe17
Added new models
Browse files- app.py +30 -2
- frontend.html +14 -0
app.py
CHANGED
@@ -15,12 +15,34 @@ from llama_cpp import Llama
|
|
15 |
|
16 |
|
17 |
class SupportedModelPipes(StrEnum):
|
|
|
|
|
18 |
SmolLLM2 = "smollm2"
|
|
|
19 |
|
20 |
|
21 |
smollm2_pipeline = Llama.from_pretrained(
|
22 |
-
repo_id="
|
23 |
-
filename="
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
)
|
25 |
|
26 |
|
@@ -65,8 +87,14 @@ def chat(payload: ChatRequest, request: Request):
|
|
65 |
ad_fetch_response = client.fetch_ad_units(fetch_payload)
|
66 |
|
67 |
match payload.model:
|
|
|
|
|
|
|
|
|
68 |
case SupportedModelPipes.SmolLLM2:
|
69 |
ai_pipeline = smollm2_pipeline
|
|
|
|
|
70 |
|
71 |
ai_response = ai_pipeline.create_chat_completion(
|
72 |
messages=[{"role": "user", "content": f"{payload.message}"}],
|
|
|
15 |
|
16 |
|
17 |
class SupportedModelPipes(StrEnum):
|
18 |
+
Gemma3 = "gemma3"
|
19 |
+
QwenOpenR1 = "qwen-open-r1"
|
20 |
SmolLLM2 = "smollm2"
|
21 |
+
SmolLLM2Reasoning = "smollm2-reasoning"
|
22 |
|
23 |
|
24 |
smollm2_pipeline = Llama.from_pretrained(
|
25 |
+
repo_id="tensorblock/SmolLM2-135M-Instruct-GGUF",
|
26 |
+
filename="SmolLM2-135M-Instruct-Q8_0.gguf",
|
27 |
+
verbose=False,
|
28 |
+
)
|
29 |
+
|
30 |
+
smollm2_reasoning_pipeline = Llama.from_pretrained(
|
31 |
+
repo_id="tensorblock/Reasoning-SmolLM2-135M-GGUF",
|
32 |
+
filename="Reasoning-SmolLM2-135M-Q8_0.gguf",
|
33 |
+
verbose=False,
|
34 |
+
)
|
35 |
+
|
36 |
+
qwen_open_r1_pipeline = Llama.from_pretrained(
|
37 |
+
repo_id="tensorblock/Qwen2.5-0.5B-Open-R1-Distill-GGUF",
|
38 |
+
filename="Qwen2.5-0.5B-Open-R1-Distill-Q8_0.gguf",
|
39 |
+
verbose=False,
|
40 |
+
)
|
41 |
+
|
42 |
+
gemma_3_pipeline = Llama.from_pretrained(
|
43 |
+
repo_id="ggml-org/gemma-3-1b-it-GGUF",
|
44 |
+
filename="gemma-3-1b-it-Q8_0.gguf",
|
45 |
+
verbose=False,
|
46 |
)
|
47 |
|
48 |
|
|
|
87 |
ad_fetch_response = client.fetch_ad_units(fetch_payload)
|
88 |
|
89 |
match payload.model:
|
90 |
+
case SupportedModelPipes.Gemma3:
|
91 |
+
ai_pipeline = gemma_3_pipeline
|
92 |
+
case SupportedModelPipes.QwenOpenR1:
|
93 |
+
ai_pipeline = qwen_open_r1_pipeline
|
94 |
case SupportedModelPipes.SmolLLM2:
|
95 |
ai_pipeline = smollm2_pipeline
|
96 |
+
case SupportedModelPipes.SmolLLM2Reasoning:
|
97 |
+
ai_pipeline = smollm2_reasoning_pipeline
|
98 |
|
99 |
ai_response = ai_pipeline.create_chat_completion(
|
100 |
messages=[{"role": "user", "content": f"{payload.message}"}],
|
frontend.html
CHANGED
@@ -64,6 +64,20 @@
|
|
64 |
<li class="model-option px-4 py-2 hover:bg-gray-100 cursor-pointer" data-value="smollm2">
|
65 |
SmolLM2</li>
|
66 |
</ul>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
</div>
|
68 |
</div>
|
69 |
</div>
|
|
|
64 |
<li class="model-option px-4 py-2 hover:bg-gray-100 cursor-pointer" data-value="smollm2">
|
65 |
SmolLM2</li>
|
66 |
</ul>
|
67 |
+
<ul class="py-1">
|
68 |
+
<li class="model-option px-4 py-2 hover:bg-gray-100 cursor-pointer"
|
69 |
+
data-value="smollm2-reasoning">
|
70 |
+
SmolLLM2Reasoning</li>
|
71 |
+
</ul>
|
72 |
+
<ul class="py-1">
|
73 |
+
<li class="model-option px-4 py-2 hover:bg-gray-100 cursor-pointer"
|
74 |
+
data-value="qwen-open-r1">
|
75 |
+
QwenOpenR1</li>
|
76 |
+
</ul>
|
77 |
+
<ul class="py-1">
|
78 |
+
<li class="model-option px-4 py-2 hover:bg-gray-100 cursor-pointer" data-value="gemma3">
|
79 |
+
Gemma3</li>
|
80 |
+
</ul>
|
81 |
</div>
|
82 |
</div>
|
83 |
</div>
|