dexter2389 commited on
Commit
a60cacc
·
1 Parent(s): 9eefe17

Added new models

Browse files
Files changed (2) hide show
  1. app.py +30 -2
  2. frontend.html +14 -0
app.py CHANGED
@@ -15,12 +15,34 @@ from llama_cpp import Llama
15
 
16
 
17
  class SupportedModelPipes(StrEnum):
 
 
18
  SmolLLM2 = "smollm2"
 
19
 
20
 
21
  smollm2_pipeline = Llama.from_pretrained(
22
- repo_id="HuggingFaceTB/SmolLM2-360M-Instruct-GGUF",
23
- filename="smollm2-360m-instruct-q8_0.gguf",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  )
25
 
26
 
@@ -65,8 +87,14 @@ def chat(payload: ChatRequest, request: Request):
65
  ad_fetch_response = client.fetch_ad_units(fetch_payload)
66
 
67
  match payload.model:
 
 
 
 
68
  case SupportedModelPipes.SmolLLM2:
69
  ai_pipeline = smollm2_pipeline
 
 
70
 
71
  ai_response = ai_pipeline.create_chat_completion(
72
  messages=[{"role": "user", "content": f"{payload.message}"}],
 
15
 
16
 
17
  class SupportedModelPipes(StrEnum):
18
+ Gemma3 = "gemma3"
19
+ QwenOpenR1 = "qwen-open-r1"
20
  SmolLLM2 = "smollm2"
21
+ SmolLLM2Reasoning = "smollm2-reasoning"
22
 
23
 
24
  smollm2_pipeline = Llama.from_pretrained(
25
+ repo_id="tensorblock/SmolLM2-135M-Instruct-GGUF",
26
+ filename="SmolLM2-135M-Instruct-Q8_0.gguf",
27
+ verbose=False,
28
+ )
29
+
30
+ smollm2_reasoning_pipeline = Llama.from_pretrained(
31
+ repo_id="tensorblock/Reasoning-SmolLM2-135M-GGUF",
32
+ filename="Reasoning-SmolLM2-135M-Q8_0.gguf",
33
+ verbose=False,
34
+ )
35
+
36
+ qwen_open_r1_pipeline = Llama.from_pretrained(
37
+ repo_id="tensorblock/Qwen2.5-0.5B-Open-R1-Distill-GGUF",
38
+ filename="Qwen2.5-0.5B-Open-R1-Distill-Q8_0.gguf",
39
+ verbose=False,
40
+ )
41
+
42
+ gemma_3_pipeline = Llama.from_pretrained(
43
+ repo_id="ggml-org/gemma-3-1b-it-GGUF",
44
+ filename="gemma-3-1b-it-Q8_0.gguf",
45
+ verbose=False,
46
  )
47
 
48
 
 
87
  ad_fetch_response = client.fetch_ad_units(fetch_payload)
88
 
89
  match payload.model:
90
+ case SupportedModelPipes.Gemma3:
91
+ ai_pipeline = gemma_3_pipeline
92
+ case SupportedModelPipes.QwenOpenR1:
93
+ ai_pipeline = qwen_open_r1_pipeline
94
  case SupportedModelPipes.SmolLLM2:
95
  ai_pipeline = smollm2_pipeline
96
+ case SupportedModelPipes.SmolLLM2Reasoning:
97
+ ai_pipeline = smollm2_reasoning_pipeline
98
 
99
  ai_response = ai_pipeline.create_chat_completion(
100
  messages=[{"role": "user", "content": f"{payload.message}"}],
frontend.html CHANGED
@@ -64,6 +64,20 @@
64
  <li class="model-option px-4 py-2 hover:bg-gray-100 cursor-pointer" data-value="smollm2">
65
  SmolLM2</li>
66
  </ul>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  </div>
68
  </div>
69
  </div>
 
64
  <li class="model-option px-4 py-2 hover:bg-gray-100 cursor-pointer" data-value="smollm2">
65
  SmolLM2</li>
66
  </ul>
67
+ <ul class="py-1">
68
+ <li class="model-option px-4 py-2 hover:bg-gray-100 cursor-pointer"
69
+ data-value="smollm2-reasoning">
70
+ SmolLLM2Reasoning</li>
71
+ </ul>
72
+ <ul class="py-1">
73
+ <li class="model-option px-4 py-2 hover:bg-gray-100 cursor-pointer"
74
+ data-value="qwen-open-r1">
75
+ QwenOpenR1</li>
76
+ </ul>
77
+ <ul class="py-1">
78
+ <li class="model-option px-4 py-2 hover:bg-gray-100 cursor-pointer" data-value="gemma3">
79
+ Gemma3</li>
80
+ </ul>
81
  </div>
82
  </div>
83
  </div>