CultriX commited on
Commit
69437e4
·
verified ·
1 Parent(s): e00c04d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +331 -1569
app.py CHANGED
@@ -1,19 +1,228 @@
1
- import io
2
- import sys
 
 
3
  import requests
4
  from bs4 import BeautifulSoup
5
- import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
- # ---------------------------------------------------------
8
- # PART 1: FULL BENCHMARK DATA (Rank 44 through 105)
9
- # ---------------------------------------------------------
10
- # For each model, we store:
11
- # - rank (int)
12
- # - name (str)
13
- # - scores (dict) with keys: average, IFEval, BBH, MATH, GPQA, MUSR, MMLU-PRO
14
- # - known_config (dict if found, or None if no config)
15
 
16
- benchmark_data = [
 
 
 
 
 
 
 
 
 
 
 
17
  {
18
  "rank": 44,
19
  "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3",
@@ -26,6 +235,7 @@ benchmark_data = [
26
  "MUSR": 19.39,
27
  "MMLU-PRO": 48.26
28
  },
 
29
  "known_config": {
30
  "models": [
31
  {"model": "CultriX/SeQwence-14Bv1"},
@@ -39,1504 +249,21 @@ benchmark_data = [
39
  }
40
  }
41
  },
42
- {
43
- "rank": 45,
44
- "name": "sthenno-com/miscii-14b-1225",
45
- "scores": {
46
- "average": 40.08,
47
- "IFEval": 78.78,
48
- "BBH": 50.91,
49
- "MATH": 31.57,
50
- "GPQA": 17.00,
51
- "MUSR": 14.77,
52
- "MMLU-PRO": 47.46
53
- },
54
- "known_config": {
55
- "tokenizer_source": "base",
56
- "chat_template": "chatml",
57
- "merge_method": "ties",
58
- "dtype": "bfloat16",
59
- "parameters": {
60
- "normalize": True
61
- },
62
- "base_model": "sthenno-com/miscii-14b-1028",
63
- "models": [
64
- {
65
- "model": "sthenno-com/miscii-14b-1028",
66
- "parameters": {
67
- "weight": 1,
68
- "density": 0.5
69
- }
70
- },
71
- {
72
- "model": "sthenno/miscii-1218",
73
- "parameters": {
74
- "weight": 1,
75
- "density": 0.5
76
- }
77
- },
78
- {
79
- "model": "sthenno/exp-002",
80
- "parameters": {
81
- "weight": 0.9,
82
- "density": 0.5
83
- }
84
- },
85
- {
86
- "model": "sthenno/miscii-1218",
87
- "parameters": {
88
- "weight": 0.6,
89
- "density": 0.5
90
- }
91
- }
92
- ]
93
- }
94
- },
95
- {
96
- "rank": 46,
97
- "name": "djuna/Q2.5-Veltha-14B-0.5",
98
- "scores": {
99
- "average": 39.96,
100
- "IFEval": 77.96,
101
- "BBH": 50.32,
102
- "MATH": 33.84,
103
- "GPQA": 15.77,
104
- "MUSR": 14.17,
105
- "MMLU-PRO": 47.72
106
- },
107
- "known_config": {
108
- "merge_method": "della_linear",
109
- "dtype": "float32",
110
- "out_dtype": "bfloat16",
111
- "parameters": {
112
- "epsilon": 0.04,
113
- "lambda": 1.05,
114
- "normalize": True
115
- },
116
- "base_model": "arcee-ai/SuperNova-Medius",
117
- "tokenizer_source": "arcee-ai/SuperNova-Medius",
118
- "models": [
119
- {
120
- "model": "arcee-ai/SuperNova-Medius",
121
- "parameters": {
122
- "weight": 10,
123
- "density": 1
124
- }
125
- },
126
- {
127
- "model": "EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2",
128
- "parameters": {
129
- "weight": 7,
130
- "density": 0.5
131
- }
132
- },
133
- {
134
- "model": "v000000/Qwen2.5-Lumen-14B",
135
- "parameters": {
136
- "weight": 7,
137
- "density": 0.4
138
- }
139
- },
140
- {
141
- "model": "allura-org/TQ2.5-14B-Aletheia-v1",
142
- "parameters": {
143
- "weight": 8,
144
- "density": 0.4
145
- }
146
- },
147
- {
148
- "model": "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2",
149
- "parameters": {
150
- "weight": 8,
151
- "density": 0.45
152
- }
153
- }
154
- ]
155
- }
156
- },
157
- {
158
- "rank": 48,
159
- "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock",
160
- "scores": {
161
- "average": 39.81,
162
- "IFEval": 71.62,
163
- "BBH": 48.76,
164
- "MATH": 33.99,
165
- "GPQA": 17.34,
166
- "MUSR": 19.23,
167
- "MMLU-PRO": 47.95
168
- },
169
- "known_config": None
170
- },
171
- {
172
- "rank": 50,
173
- "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01",
174
- "scores": {
175
- "average": 39.46,
176
- "IFEval": 68.72,
177
- "BBH": 47.71,
178
- "MATH": 35.05,
179
- "GPQA": 18.23,
180
- "MUSR": 19.56,
181
- "MMLU-PRO": 47.50
182
- },
183
- "known_config": None
184
- },
185
- {
186
- "rank": 52,
187
- "name": "arcee-ai/Virtuoso-Small",
188
- "scores": {
189
- "average": 39.43,
190
- "IFEval": 79.35,
191
- "BBH": 50.40,
192
- "MATH": 34.29,
193
- "GPQA": 11.52,
194
- "MUSR": 14.44,
195
- "MMLU-PRO": 46.57
196
- },
197
- "known_config": None
198
- },
199
- {
200
- "rank": 54,
201
- "name": "sometimesanotion/Qwentinuum-14B-v6",
202
- "scores": {
203
- "average": 39.23,
204
- "IFEval": 63.04,
205
- "BBH": 50.23,
206
- "MATH": 33.84,
207
- "GPQA": 18.23,
208
- "MUSR": 21.18,
209
- "MMLU-PRO": 48.89
210
- },
211
- "known_config": None
212
- },
213
- {
214
- "rank": 55,
215
- "name": "djuna/Q2.5-Veltha-14B",
216
- "scores": {
217
- "average": 39.21,
218
- "IFEval": 82.92,
219
- "BBH": 49.75,
220
- "MATH": 28.02,
221
- "GPQA": 14.54,
222
- "MUSR": 12.26,
223
- "MMLU-PRO": 47.76
224
- },
225
- "known_config": {
226
- "merge_method": "della_linear",
227
- "dtype": "float32",
228
- "out_dtype": "bfloat16",
229
- "parameters": {
230
- "epsilon": 0.04,
231
- "lambda": 1.05,
232
- "normalize": True
233
- },
234
- "base_model": "qwen/Qwen2.5-14b",
235
- "tokenizer_source": "arcee-ai/SuperNova-Medius",
236
- "models": [
237
- {
238
- "model": "arcee-ai/SuperNova-Medius",
239
- "parameters": {
240
- "weight": 10,
241
- "density": 1
242
- }
243
- },
244
- {
245
- "model": "EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2",
246
- "parameters": {
247
- "weight": 7,
248
- "density": 0.5
249
- }
250
- },
251
- {
252
- "model": "v000000/Qwen2.5-Lumen-14B",
253
- "parameters": {
254
- "weight": 7,
255
- "density": 0.4
256
- }
257
- },
258
- {
259
- "model": "allura-org/TQ2.5-14B-Aletheia-v1",
260
- "parameters": {
261
- "weight": 8,
262
- "density": 0.4
263
- }
264
- },
265
- {
266
- "model": "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2",
267
- "parameters": {
268
- "weight": 8,
269
- "density": 0.45
270
- }
271
- }
272
- ]
273
- }
274
- },
275
- {
276
- "rank": 57,
277
- "name": "allknowingroger/QwenSlerp6-14B",
278
- "scores": {
279
- "average": 39.02,
280
- "IFEval": 68.67,
281
- "BBH": 47.59,
282
- "MATH": 34.14,
283
- "GPQA": 16.44,
284
- "MUSR": 18.32,
285
- "MMLU-PRO": 48.95
286
- },
287
- "known_config": {
288
- "models": [
289
- {"model": "CultriX/SeQwence-14Bv1"},
290
- {"model": "allknowingroger/Qwenslerp2-14B"}
291
- ],
292
- "merge_method": "slerp",
293
- "base_model": "CultriX/SeQwence-14Bv1",
294
- "dtype": "bfloat16",
295
- "parameters": {
296
- "t": [0, 0.5, 1, 0.5, 0]
297
- }
298
- }
299
- },
300
- {
301
- "rank": 58,
302
- "name": "allknowingroger/QwenSlerp5-14B",
303
- "scores": {
304
- "average": 38.94,
305
- "IFEval": 71.19,
306
- "BBH": 47.39,
307
- "MATH": 33.16,
308
- "GPQA": 15.32,
309
- "MUSR": 17.81,
310
- "MMLU-PRO": 48.78
311
- },
312
- "known_config": {
313
- "models": [
314
- {"model": "CultriX/SeQwence-14Bv1"},
315
- {"model": "CultriX/Qwestion-14B"}
316
- ],
317
- "merge_method": "slerp",
318
- "base_model": "CultriX/SeQwence-14Bv1",
319
- "dtype": "bfloat16",
320
- "parameters": {
321
- "t": [0, 0.5, 1, 0.5, 0]
322
- }
323
- }
324
- },
325
- {
326
- "rank": 59,
327
- "name": "sometimesanotion/Qwentinuum-14B-v5",
328
- "scores": {
329
- "average": 38.87,
330
- "IFEval": 62.86,
331
- "BBH": 50.28,
332
- "MATH": 31.57,
333
- "GPQA": 18.34,
334
- "MUSR": 21.09,
335
- "MMLU-PRO": 49.09
336
- },
337
- "known_config": None
338
- },
339
- {
340
- "rank": 60,
341
- "name": "sometimesanotion/Qwenvergence-14B-v6-Prose",
342
- "scores": {
343
- "average": 38.82,
344
- "IFEval": 59.90,
345
- "BBH": 50.12,
346
- "MATH": 34.89,
347
- "GPQA": 18.46,
348
- "MUSR": 21.02,
349
- "MMLU-PRO": 48.56
350
- },
351
- "known_config": {
352
- # This model had two YAML segments:
353
- # We'll store them in a single dictionary with keys "config1" and "config2" to preserve them:
354
- "config1": {
355
- "name": "Qwenvergence-14B-v6-Prose-model_stock",
356
- "merge_method": "model_stock",
357
- "base_model": "Qwen/Qwen2.5-14B",
358
- "tokenizer_source": "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2",
359
- "parameters": {
360
- "int8_mask": True,
361
- "normalize": True,
362
- "rescale": False
363
- },
364
- "models": [
365
- "arcee-ai/Virtuoso-Small",
366
- "sometimesanotion/Lamarck-14B-v0.3",
367
- "EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2",
368
- "allura-org/TQ2.5-14B-Sugarquill-v1",
369
- "oxyapi/oxy-1-small",
370
- "v000000/Qwen2.5-Lumen-14B",
371
- "sthenno-com/miscii-14b-1225",
372
- "sthenno-com/miscii-14b-1225",
373
- "underwoods/medius-erebus-magnum-14b",
374
- "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2"
375
- ],
376
- "dtype": "float32",
377
- "out_dtype": "bfloat16"
378
- },
379
- "config2": {
380
- "name": "Qwenvergence-14B-v6-Prose",
381
- "merge_method": "ties",
382
- "base_model": "Qwen/Qwen2.5-14B",
383
- "tokenizer_source": "base",
384
- "parameters": {
385
- "density": 1.00,
386
- "weight": 1.00,
387
- "int8_mask": True,
388
- "normalize": True,
389
- "rescale": False
390
- },
391
- "dtype": "float32",
392
- "out_dtype": "bfloat16",
393
- "models": [
394
- {
395
- "model": "sometimesanotion/Qwenvergence-14B-v6-Prose-slerp",
396
- "parameters": {
397
- "density": 1.00,
398
- "weight": 1.00
399
- }
400
- }
401
- ]
402
- }
403
- }
404
- },
405
- {
406
- "rank": 61,
407
- "name": "CultriX/Qwen2.5-14B-Brocav3",
408
- "scores": {
409
- "average": 38.76,
410
- "IFEval": 69.52,
411
- "BBH": 49.05,
412
- "MATH": 32.25,
413
- "GPQA": 14.54,
414
- "MUSR": 19.25,
415
- "MMLU-PRO": 47.97
416
- },
417
- "known_config": {
418
- "merge_method": "della_linear",
419
- "base_model": "CultriX/Qwen2.5-14B-Wernickev3",
420
- "dtype": "bfloat16",
421
- "parameters": {
422
- "epsilon": 0.012,
423
- "lambda": 1.4,
424
- "normalize": True
425
- },
426
- "adaptive_merge_parameters": {
427
- "task_weights": {
428
- "tinyArc": 1.6,
429
- "tinyHellaswag": 1.5,
430
- "tinyMMLU": 1.65,
431
- "tinyTruthfulQA": 1.9,
432
- "tinyTruthfulQA_mc1": 1.7,
433
- "tinyWinogrande": 1.75,
434
- "IFEval": 1.9,
435
- "BBH": 1.7,
436
- "MATH": 2.1,
437
- "GPQA": 1.8,
438
- "MUSR": 1.9,
439
- "MMLU-PRO": 1.8
440
- },
441
- "smoothing_factor": 0.1
442
- },
443
- "gradient_clipping": {
444
- "CultriX/Qwen2.5-14B-Wernickev3": 0.86,
445
- "CultriX/Qwenfinity-2.5-14B": 0.83,
446
- "djuna/Q2.5-Veltha-14B-0.5": 0.91,
447
- "CultriX/Qwen2.5-14B-Broca": 0.85,
448
- "qingy2019/Qwen2.5-Math-14B-Instruct": 0.93,
449
- "CultriX/SeQwence-14Bv1": 0.88,
450
- "sometimesanotion/Qwen2.5-14B-Vimarckoso": 0.89,
451
- "allknowingroger/QwenSlerp6-14B": 0.87
452
- },
453
- "models": [
454
- {
455
- "model": "CultriX/Qwen2.5-14B-Wernickev3",
456
- "parameters": {
457
- "weight": 0.26,
458
- "density": 0.7
459
- }
460
- },
461
- {
462
- "model": "CultriX/Qwenfinity-2.5-14B",
463
- "parameters": {
464
- "weight": 0.23,
465
- "density": 0.65
466
- }
467
- },
468
- {
469
- "model": "djuna/Q2.5-Veltha-14B-0.5",
470
- "parameters": {
471
- "weight": 0.22,
472
- "density": 0.72
473
- }
474
- },
475
- {
476
- "model": "CultriX/Qwen2.5-14B-Broca",
477
- "parameters": {
478
- "weight": 0.15,
479
- "density": 0.65
480
- }
481
- },
482
- {
483
- "model": "qingy2019/Qwen2.5-Math-14B-Instruct",
484
- "parameters": {
485
- "weight": 0.18,
486
- "density": 0.73
487
- }
488
- },
489
- {
490
- "model": "CultriX/SeQwence-14Bv1",
491
- "parameters": {
492
- "weight": 0.14,
493
- "density": 0.63
494
- }
495
- },
496
- {
497
- "model": "sometimesanotion/Qwen2.5-14B-Vimarckoso",
498
- "parameters": {
499
- "weight": 0.12,
500
- "density": 0.6
501
- }
502
- },
503
- {
504
- "model": "allknowingroger/QwenSlerp6-14B",
505
- "parameters": {
506
- "weight": 0.1,
507
- "density": 0.62
508
- }
509
- }
510
- ],
511
- "tokenizer_source": "CultriX/Qwen2.5-14B-Wernickev3"
512
- }
513
- },
514
- {
515
- "rank": 62,
516
- "name": "sometimesanotion/Qwentinuum-14B-v7",
517
- "scores": {
518
- "average": 38.76,
519
- "IFEval": 61.09,
520
- "BBH": 50.35,
521
- "MATH": 33.38,
522
- "GPQA": 18.79,
523
- "MUSR": 19.95,
524
- "MMLU-PRO": 49.00
525
- },
526
- "known_config": None
527
- },
528
- {
529
- "rank": 64,
530
- "name": "sometimesanotion/Qwentinuum-14B-v3",
531
- "scores": {
532
- "average": 38.74,
533
- "IFEval": 61.58,
534
- "BBH": 50.04,
535
- "MATH": 32.85,
536
- "GPQA": 18.34,
537
- "MUSR": 20.62,
538
- "MMLU-PRO": 49.03
539
- },
540
- "known_config": None
541
- },
542
- {
543
- "rank": 65,
544
- "name": "allura-org/TQ2.5-14B-Aletheia-v1",
545
- "scores": {
546
- "average": 38.74,
547
- "IFEval": 75.30,
548
- "BBH": 50.88,
549
- "MATH": 29.53,
550
- "GPQA": 14.99,
551
- "MUSR": 14.61,
552
- "MMLU-PRO": 47.12
553
- },
554
- # The snippet had:
555
- # <|im_start|>system
556
- # ...
557
- # This was presumably some leftover system text. We'll treat it as config, or None.
558
- # We'll store it as a minimal known_config example:
559
- "known_config": {
560
- "system_text_example": "<|im_start|>system ... <|im_end|>"
561
- }
562
- },
563
- {
564
- "rank": 66,
565
- "name": "qingy2024/Fusion4-14B-Instruct",
566
- "scores": {
567
- "average": 38.73,
568
- "IFEval": 76.49,
569
- "BBH": 50.70,
570
- "MATH": 33.91,
571
- "GPQA": 10.74,
572
- "MUSR": 13.97,
573
- "MMLU-PRO": 46.60
574
- },
575
- "known_config": {
576
- "models": [
577
- {
578
- "model": "arcee-ai/Virtuoso-Small",
579
- "parameters": {
580
- "weight": 1,
581
- "density": 1
582
- }
583
- }
584
- ],
585
- "merge_method": "ties",
586
- "base_model": "Qwen/Qwen2.5-14B",
587
- "parameters": {
588
- "weight": 1,
589
- "density": 1,
590
- "normalize": True,
591
- "int8_mask": True
592
- },
593
- "dtype": "float16"
594
- }
595
- },
596
- {
597
- "rank": 68,
598
- "name": "CultriX/Qwen2.5-14B-Brocav7",
599
- "scores": {
600
- "average": 38.52,
601
- "IFEval": 67.24,
602
- "BBH": 48.91,
603
- "MATH": 31.87,
604
- "GPQA": 15.66,
605
- "MUSR": 20.15,
606
- "MMLU-PRO": 47.31
607
- },
608
- "known_config": {
609
- "merge_method": "della_linear",
610
- "base_model": "CultriX/Qwen2.5-14B-Wernickev3",
611
- "dtype": "bfloat16",
612
- "parameters": {
613
- "epsilon": 0.01,
614
- "lambda": 1.5,
615
- "normalize": True,
616
- "smoothing_factor": 0.08
617
- },
618
- "gradient_clipping": {
619
- "CultriX/Qwen2.5-14B-Wernickev3": 0.85,
620
- "CultriX/Qwenfinity-2.5-14B": 0.82,
621
- "djuna/Q2.5-Veltha-14B-0.5": 0.92,
622
- "CultriX/Qwen2.5-14B-Broca": 0.86,
623
- "qingy2019/Qwen2.5-Math-14B-Instruct": 0.94,
624
- "CultriX/SeQwence-14Bv1": 0.87,
625
- "sometimesanotion/Qwen2.5-14B-Vimarckoso": 0.90,
626
- "allknowingroger/QwenSlerp6-14B": 0.86
627
- },
628
- "models": [
629
- {
630
- "model": "CultriX/Qwen2.5-14B-Wernickev3",
631
- "parameters": {
632
- "weight": 0.25,
633
- "density": 0.72
634
- }
635
- },
636
- {
637
- "model": "CultriX/Qwenfinity-2.5-14B",
638
- "parameters": {
639
- "weight": 0.22,
640
- "density": 0.68
641
- }
642
- },
643
- {
644
- "model": "djuna/Q2.5-Veltha-14B-0.5",
645
- "parameters": {
646
- "weight": 0.20,
647
- "density": 0.75
648
- }
649
- },
650
- {
651
- "model": "CultriX/Qwen2.5-14B-Broca",
652
- "parameters": {
653
- "weight": 0.16,
654
- "density": 0.68
655
- }
656
- },
657
- {
658
- "model": "qingy2019/Qwen2.5-Math-14B-Instruct",
659
- "parameters": {
660
- "weight": 0.19,
661
- "density": 0.75
662
- }
663
- },
664
- {
665
- "model": "CultriX/SeQwence-14Bv1",
666
- "parameters": {
667
- "weight": 0.13,
668
- "density": 0.65
669
- }
670
- },
671
- {
672
- "model": "sometimesanotion/Qwen2.5-14B-Vimarckoso",
673
- "parameters": {
674
- "weight": 0.11,
675
- "density": 0.62
676
- }
677
- },
678
- {
679
- "model": "allknowingroger/QwenSlerp6-14B",
680
- "parameters": {
681
- "weight": 0.09,
682
- "density": 0.65
683
- }
684
- }
685
- ],
686
- "adaptive_merge_parameters": {
687
- "task_weights": {
688
- "tinyArc": 1.65,
689
- "tinyHellaswag": 1.55,
690
- "tinyMMLU": 1.7,
691
- "tinyTruthfulQA": 1.95,
692
- "tinyTruthfulQA_mc1": 1.75,
693
- "tinyWinogrande": 1.8,
694
- "IFEval": 2.0,
695
- "BBH": 1.75,
696
- "MATH": 2.2,
697
- "GPQA": 1.85,
698
- "MUSR": 1.95,
699
- "MMLU-PRO": 1.85
700
- }
701
- },
702
- "tokenizer_source": "CultriX/Qwen2.5-14B-Wernickev3"
703
- }
704
- },
705
- {
706
- "rank": 71,
707
- "name": "sometimesanotion/Qwentinuum-14B-v6-Prose",
708
- "scores": {
709
- "average": 38.46,
710
- "IFEval": 56.43,
711
- "BBH": 50.14,
712
- "MATH": 35.57,
713
- "GPQA": 18.46,
714
- "MUSR": 21.34,
715
- "MMLU-PRO": 48.80
716
- },
717
- "known_config": {
718
- "name": "Qwentinuum-14B-v6-Prose-slerp",
719
- "merge_method": "slerp",
720
- "base_model": "sometimesanotion/Qwenvergence-14B-v6-Prose",
721
- "tokenizer_source": "sometimesanotion/Qwenvergence-14B-v6-Prose",
722
- "dtype": "bfloat16",
723
- "out_dtype": "bfloat16",
724
- "parameters": {
725
- "int8_mask": True,
726
- "normalize": True,
727
- "rescale": False
728
- },
729
- "slices": [
730
- {
731
- "sources": [
732
- {
733
- "model": "sometimesanotion/Qwenvergence-14B-v6-Prose",
734
- "layer_range": [0, 8]
735
- },
736
- {
737
- "model": "sometimesanotion/Qwentinuum-14B-v6",
738
- "layer_range": [0, 8]
739
- }
740
- ]
741
- },
742
- {
743
- "sources": [
744
- {
745
- "model": "sometimesanotion/Qwenvergence-14B-v6-Prose",
746
- "layer_range": [8, 16]
747
- },
748
- {
749
- "model": "sometimesanotion/Qwentinuum-14B-v6",
750
- "layer_range": [8, 16]
751
- }
752
- ]
753
- },
754
- {
755
- "sources": [
756
- {
757
- "model": "sometimesanotion/Qwenvergence-14B-v6-Prose",
758
- "layer_range": [16, 24]
759
- },
760
- {
761
- "model": "sometimesanotion/Qwentinuum-14B-v6",
762
- "layer_range": [16, 24]
763
- }
764
- ]
765
- },
766
- {
767
- "sources": [
768
- {
769
- "model": "sometimesanotion/Qwenvergence-14B-v6-Prose",
770
- "layer_range": [24, 32]
771
- },
772
- {
773
- "model": "sometimesanotion/Qwentinuum-14B-v6",
774
- "layer_range": [24, 32]
775
- }
776
- ]
777
- },
778
- {
779
- "sources": [
780
- {
781
- "model": "sometimesanotion/Qwenvergence-14B-v6-Prose",
782
- "layer_range": [32, 40]
783
- },
784
- {
785
- "model": "sometimesanotion/Qwentinuum-14B-v6",
786
- "layer_range": [32, 40]
787
- }
788
- ]
789
- },
790
- {
791
- "sources": [
792
- {
793
- "model": "sometimesanotion/Qwenvergence-14B-v6-Prose",
794
- "layer_range": [40, 48]
795
- },
796
- {
797
- "model": "sometimesanotion/Qwentinuum-14B-v6",
798
- "layer_range": [40, 48]
799
- }
800
- ]
801
- }
802
- ],
803
- # The 'parameters' block that includes "t: 0.40" is implied by the snippet
804
- }
805
- },
806
- {
807
- "rank": 76,
808
- "name": "CultriX/Qwen2.5-14B-Brocav6",
809
- "scores": {
810
- "average": 38.32,
811
- "IFEval": 69.95,
812
- "BBH": 47.82,
813
- "MATH": 29.61,
814
- "GPQA": 15.66,
815
- "MUSR": 18.88,
816
- "MMLU-PRO": 47.99
817
- },
818
- "known_config": {
819
- "merge_method": "della_linear",
820
- "base_model": "CultriX/Qwen2.5-14B-Wernickev3",
821
- "dtype": "bfloat16",
822
- "parameters": {
823
- "epsilon": 0.01,
824
- "lambda": 1.5,
825
- "normalize": True
826
- },
827
- "adaptive_merge_parameters": {
828
- "task_weights": {
829
- "tinyArc": 1.65,
830
- "tinyHellaswag": 1.55,
831
- "tinyMMLU": 1.7,
832
- "tinyTruthfulQA": 1.95,
833
- "tinyTruthfulQA_mc1": 1.75,
834
- "tinyWinogrande": 1.8,
835
- "IFEval": 2.0,
836
- "BBH": 1.75,
837
- "MATH": 2.2,
838
- "GPQA": 1.85,
839
- "MUSR": 1.95,
840
- "MMLU-PRO": 1.85
841
- },
842
- "smoothing_factor": 0.08
843
- },
844
- "gradient_clipping": {
845
- "CultriX/Qwen2.5-14B-Wernickev3": 0.85,
846
- "CultriX/Qwenfinity-2.5-14B": 0.82,
847
- "djuna/Q2.5-Veltha-14B-0.5": 0.92,
848
- "CultriX/Qwen2.5-14B-Broca": 0.86,
849
- "qingy2019/Qwen2.5-Math-14B-Instruct": 0.94,
850
- "CultriX/SeQwence-14Bv1": 0.87,
851
- "sometimesanotion/Qwen2.5-14B-Vimarckoso": 0.90,
852
- "allknowingroger/QwenSlerp6-14B": 0.86
853
- },
854
- "models": [
855
- {
856
- "model": "CultriX/Qwen2.5-14B-Wernickev3",
857
- "parameters": {
858
- "weight": 0.25,
859
- "density": 0.72
860
- }
861
- },
862
- {
863
- "model": "CultriX/Qwenfinity-2.5-14B",
864
- "parameters": {
865
- "weight": 0.22,
866
- "density": 0.68
867
- }
868
- },
869
- {
870
- "model": "djuna/Q2.5-Veltha-14B-0.5",
871
- "parameters": {
872
- "weight": 0.20,
873
- "density": 0.75
874
- }
875
- },
876
- {
877
- "model": "CultriX/Qwen2.5-14B-Broca",
878
- "parameters": {
879
- "weight": 0.16,
880
- "density": 0.68
881
- }
882
- },
883
- {
884
- "model": "qingy2019/Qwen2.5-Math-14B-Instruct",
885
- "parameters": {
886
- "weight": 0.19,
887
- "density": 0.75
888
- }
889
- },
890
- {
891
- "model": "CultriX/SeQwence-14Bv1",
892
- "parameters": {
893
- "weight": 0.13,
894
- "density": 0.65
895
- }
896
- },
897
- {
898
- "model": "sometimesanotion/Qwen2.5-14B-Vimarckoso",
899
- "parameters": {
900
- "weight": 0.11,
901
- "density": 0.62
902
- }
903
- },
904
- {
905
- "model": "allknowingroger/QwenSlerp6-14B",
906
- "parameters": {
907
- "weight": 0.09,
908
- "density": 0.65
909
- }
910
- }
911
- ]
912
- }
913
- },
914
- {
915
- "rank": 80,
916
- "name": "CultriX/SeQwence-14Bv1",
917
- "scores": {
918
- "average": 38.20,
919
- "IFEval": 66.78,
920
- "BBH": 47.19,
921
- "MATH": 33.53,
922
- "GPQA": 14.88,
923
- "MUSR": 18.80,
924
- "MMLU-PRO": 48.00
925
- },
926
- "known_config": {
927
- "models": [
928
- {
929
- "model": "CultriX/Qwen2.5-14B-Wernicke",
930
- "parameters": {
931
- "weight": 0.35,
932
- "density": 0.6
933
- }
934
- },
935
- {
936
- "model": "VAGOsolutions/SauerkrautLM-v2-14b-DPO",
937
- "parameters": {
938
- "weight": 0.30,
939
- "density": 0.6
940
- }
941
- },
942
- {
943
- "model": "CultriX/Qwen2.5-14B-MegaMerge-pt2",
944
- "parameters": {
945
- "weight": 0.20,
946
- "density": 0.5
947
- }
948
- },
949
- {
950
- "model": "CultriX/SeQwence-14B",
951
- "parameters": {
952
- "weight": 0.15,
953
- "density": 0.4
954
- }
955
- },
956
- {
957
- "model": "v000000/Qwen2.5-Lumen-14B",
958
- "parameters": {
959
- "weight": 0.10,
960
- "density": 0.5
961
- }
962
- }
963
- ],
964
- "base_model": "Qwen/Qwen2.5-14B",
965
- "merge_method": "dare_ties",
966
- "parameters": {
967
- "normalize": True,
968
- "int8_mask": True
969
- },
970
- "dtype": "bfloat16",
971
- "tokenizer_source": "Qwen/Qwen2.5-14B-Instruct"
972
- }
973
- },
974
- {
975
- "rank": 85,
976
- "name": "sometimesanotion/Qwentinuum-14B-v013",
977
- "scores": {
978
- "average": 37.96,
979
- "IFEval": 67.11,
980
- "BBH": 43.97,
981
- "MATH": 33.01,
982
- "GPQA": 14.32,
983
- "MUSR": 24.99,
984
- "MMLU-PRO": 44.34
985
- },
986
- "known_config": {
987
- "name": "Qwentinuum-14B-v013",
988
- "merge_method": "model_stock",
989
- "base_model": "Qwen/Qwen2.5-14B",
990
- "tokenizer_source": "base",
991
- "parameters": {
992
- "int8_mask": True,
993
- "normalize": True,
994
- "rescale": False
995
- },
996
- "models": [
997
- "sometimesanotion/Qwenvergence-14B-v3-Prose+sometimesanotion/Qwenvergence-Abliterate-512",
998
- "sometimesanotion/Qwentinuum-14B-v011+sometimesanotion/Qwenvergence-Abliterate-512",
999
- "sometimesanotion/Qwentinuum-14B-v012+sometimesanotion/Qwenvergence-Abliterate-256",
1000
- "sometimesanotion/Qwenvergence-14B-v6-Prose+sometimesanotion/Qwenvergence-Abliterate-512",
1001
- "sometimesanotion/Lamarck-14B-v0.3+sometimesanotion/Qwenvergence-Abliterate-512",
1002
- "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2"
1003
- ],
1004
- "dtype": "bfloat16",
1005
- "out_dtype": "bfloat16"
1006
- }
1007
- },
1008
- {
1009
- "rank": 86,
1010
- "name": "CultriX/Qwen2.5-14B-Wernickev3",
1011
- "scores": {
1012
- "average": 37.94,
1013
- "IFEval": 70.48,
1014
- "BBH": 44.58,
1015
- "MATH": 32.78,
1016
- "GPQA": 14.99,
1017
- "MUSR": 18.69,
1018
- "MMLU-PRO": 46.13
1019
- },
1020
- "known_config": {
1021
- "CONFIG SuperiorMerge-14B-From-2-to-10": {
1022
- "models": [
1023
- {
1024
- "model": "VAGOsolutions/SauerkrautLM-v2-14b-DPO",
1025
- "parameters": {
1026
- "weight": 0.25,
1027
- "density": 0.6
1028
- }
1029
- },
1030
- {
1031
- "model": "allknowingroger/QwenSlerp6-14B",
1032
- "parameters": {
1033
- "weight": 0.25,
1034
- "density": 0.6
1035
- }
1036
- },
1037
- {
1038
- "model": "CultriX/SeQwence-14B-EvolMerge",
1039
- "parameters": {
1040
- "weight": 0.20,
1041
- "density": 0.5
1042
- }
1043
- },
1044
- {
1045
- "model": "CultriX/Qwen2.5-14B-Wernicke",
1046
- "parameters": {
1047
- "weight": 0.15,
1048
- "density": 0.5
1049
- }
1050
- },
1051
- {
1052
- "model": "allknowingroger/QwenStock3-14B",
1053
- "parameters": {
1054
- "weight": 0.15,
1055
- "density": 0.5
1056
- }
1057
- }
1058
- ],
1059
- "base_model": "Qwen/Qwen2.5-14B",
1060
- "merge_method": "dare_ties",
1061
- "parameters": {
1062
- "normalize": True,
1063
- "int8_mask": True
1064
- },
1065
- "dtype": "bfloat16",
1066
- "tokenizer_source": "Qwen/Qwen2.5-14B-Instruct"
1067
- }
1068
- }
1069
- },
1070
- {
1071
- "rank": 88,
1072
- "name": "allknowingroger/QwenSlerp4-14B",
1073
- "scores": {
1074
- "average": 37.80,
1075
- "IFEval": 63.28,
1076
- "BBH": 49.38,
1077
- "MATH": 30.97,
1078
- "GPQA": 16.33,
1079
- "MUSR": 17.59,
1080
- "MMLU-PRO": 49.28
1081
- },
1082
- "known_config": {
1083
- "models": [
1084
- {
1085
- "model": "CultriX/Qwen2.5-14B-Wernicke",
1086
- "parameters": {
1087
- "weight": 0.55,
1088
- "density": 0.80
1089
- }
1090
- },
1091
- {
1092
- "model": "VAGOsolutions/SauerkrautLM-v2-14b-DPO",
1093
- "parameters": {
1094
- "weight": 0.20,
1095
- "density": 0.60
1096
- }
1097
- },
1098
- {
1099
- "model": "rombodawg/Rombos-LLM-V2.6-Qwen-14b",
1100
- "parameters": {
1101
- "weight": 0.25,
1102
- "density": 0.70
1103
- }
1104
- },
1105
- {
1106
- "model": "allknowingroger/Qwenslerp2-14B",
1107
- "parameters": {
1108
- "weight": 0.15,
1109
- "density": 0.65
1110
- }
1111
- }
1112
- ],
1113
- "base_model": "Qwen/Qwen2.5-14B",
1114
- "merge_method": "dare_ties",
1115
- "parameters": {
1116
- "normalize": True,
1117
- "int8_mask": True
1118
- },
1119
- "dtype": "bfloat16",
1120
- "tokenizer_source": "Qwen/Qwen2.5-14B-Instruct",
1121
- "adaptive_merge_parameters": {
1122
- "task_weights": {
1123
- "IFEval": 1.0,
1124
- "MATH": 1.3,
1125
- "GPQA": 1.1,
1126
- "MUSR": 1.2,
1127
- "MMLU-PRO": 1.0
1128
- },
1129
- "smoothing_factor": 0.15
1130
- },
1131
- "gradient_clipping": 1.0
1132
- }
1133
- },
1134
- {
1135
- "rank": 89,
1136
- "name": "CultriX/Qwen2.5-14B-Broca",
1137
- "scores": {
1138
- "average": 37.72,
1139
- "IFEval": 56.04,
1140
- "BBH": 50.03,
1141
- "MATH": 34.59,
1142
- "GPQA": 18.23,
1143
- "MUSR": 18.95,
1144
- "MMLU-PRO": 48.49
1145
- },
1146
- "known_config": {
1147
- "merge_method": "della_linear",
1148
- "base_model": "CultriX/Qwen2.5-14B-Wernickev3",
1149
- "dtype": "bfloat16",
1150
- "parameters": {
1151
- "epsilon": 0.03,
1152
- "lambda": 1.1,
1153
- "normalize": True
1154
- },
1155
- "adaptive_merge_parameters": {
1156
- "task_weights": {
1157
- "tinyArc": 1.3,
1158
- "tinyHellaswag": 1.2,
1159
- "tinyMMLU": 1.1,
1160
- "tinyTruthfulQA": 1.4,
1161
- "tinyWinogrande": 1.2,
1162
- "IFEval": 1.3,
1163
- "BBH": 1.3,
1164
- "MATH": 1.4,
1165
- "GPQA": 1.3,
1166
- "MUSR": 1.2,
1167
- "MMLU-PRO": 1.2
1168
- },
1169
- "smoothing_factor": 0.15
1170
- },
1171
- "gradient_clipping": 1.0,
1172
- "models": [
1173
- {
1174
- "model": "CultriX/Qwen2.5-14B-Wernickev3",
1175
- "parameters": {
1176
- "weight": 0.5,
1177
- "density": 0.7
1178
- }
1179
- },
1180
- {
1181
- "model": "djuna/Q2.5-Veltha-14B-0.5",
1182
- "parameters": {
1183
- "weight": 0.3,
1184
- "density": 0.8
1185
- }
1186
- },
1187
- {
1188
- "model": "CultriX/SeQwence-14B-EvolMerge",
1189
- "parameters": {
1190
- "weight": 0.2,
1191
- "density": 0.6
1192
- }
1193
- }
1194
- ],
1195
- "tokenizer_source": "CultriX/Qwen2.5-14B-Wernickev3"
1196
- }
1197
- },
1198
- {
1199
- "rank": 90,
1200
- "name": "CultriX/Qwen2.5-14B-Emerged",
1201
- "scores": {
1202
- "average": 37.66,
1203
- "IFEval": 70.00,
1204
- "BBH": 45.93,
1205
- "MATH": 30.74,
1206
- "GPQA": 14.32,
1207
- "MUSR": 18.47,
1208
- "MMLU-PRO": 46.51
1209
- },
1210
- "known_config": {
1211
- "models": [
1212
- {"model": "CultriX/Qwen2.5-14B-Wernickev3"},
1213
- {"model": "CultriX/Qwen2.5-14B-Wernickev5"}
1214
- ],
1215
- "merge_method": "slerp",
1216
- "base_model": "CultriX/Qwen2.5-14B-Wernickev3",
1217
- "dtype": "bfloat16",
1218
- "parameters": {
1219
- "t": [0, 0.5, 1, 0.5, 0]
1220
- },
1221
- "dtype_duplicate": "bfloat16", # The snippet repeated 'dtype' line
1222
- "adaptive_merge_parameters": {
1223
- "task_weights": {
1224
- "tinyArc": 1.2,
1225
- "tinyHellaswag": 1.1,
1226
- "tinyMMLU": 1.2,
1227
- "tinyTruthfulQA": 1.3,
1228
- "tinyTruthfulQA_mc1": 1.1,
1229
- "tinyWinogrande": 1.2
1230
- },
1231
- "smoothing_factor": 0.2
1232
- },
1233
- "gradient_clipping": 1.0
1234
- }
1235
- },
1236
- {
1237
- "rank": 91,
1238
- "name": "sometimesanotion/Qwentinuum-14B-v8",
1239
- "scores": {
1240
- "average": 37.65,
1241
- "IFEval": 54.12,
1242
- "BBH": 50.11,
1243
- "MATH": 34.14,
1244
- "GPQA": 17.79,
1245
- "MUSR": 20.75,
1246
- "MMLU-PRO": 49.02
1247
- },
1248
- "known_config": None
1249
- },
1250
- {
1251
- "rank": 92,
1252
- "name": "qingy2024/Fusion-14B-Instruct",
1253
- "scores": {
1254
- "average": 37.64,
1255
- "IFEval": 72.60,
1256
- "BBH": 48.58,
1257
- "MATH": 30.97,
1258
- "GPQA": 13.98,
1259
- "MUSR": 14.81,
1260
- "MMLU-PRO": 44.93
1261
- },
1262
- "known_config": {
1263
- "models": [
1264
- {
1265
- "model": "qingy2024/Qwen2.5-Math-14B-Instruct-Preview",
1266
- "parameters": {
1267
- "weight": 0.3,
1268
- "density": 0.6
1269
- }
1270
- },
1271
- {
1272
- "model": "arcee-ai/Virtuoso-Small",
1273
- "parameters": {
1274
- "weight": 0.7,
1275
- "density": 0.6
1276
- }
1277
- }
1278
- ],
1279
- "base_model": "Qwen/Qwen2.5-14B",
1280
- "merge_method": "dare_ties",
1281
- "parameters": {
1282
- "normalize": True,
1283
- "int8_mask": True
1284
- },
1285
- "dtype": "bfloat16",
1286
- "tokenizer_source": "Qwen/Qwen2.5-14B-Instruct"
1287
- }
1288
- },
1289
- {
1290
- "rank": 94,
1291
- "name": "CultriX/Qwestion-14B",
1292
- "scores": {
1293
- "average": 37.63,
1294
- "IFEval": 63.18,
1295
- "BBH": 48.76,
1296
- "MATH": 31.72,
1297
- "GPQA": 15.77,
1298
- "MUSR": 17.22,
1299
- "MMLU-PRO": 49.14
1300
- },
1301
- "known_config": {
1302
- "models": [
1303
- {
1304
- "model": "CultriX/Qwen2.5-14B-Wernicke",
1305
- "parameters": {
1306
- "weight": 0.55,
1307
- "density": 0.80
1308
- }
1309
- },
1310
- {
1311
- "model": "VAGOsolutions/SauerkrautLM-v2-14b-DPO",
1312
- "parameters": {
1313
- "weight": 0.20,
1314
- "density": 0.60
1315
- }
1316
- },
1317
- {
1318
- "model": "rombodawg/Rombos-LLM-V2.6-Qwen-14b",
1319
- "parameters": {
1320
- "weight": 0.25,
1321
- "density": 0.70
1322
- }
1323
- },
1324
- {
1325
- "model": "allknowingroger/Qwenslerp2-14B",
1326
- "parameters": {
1327
- "weight": 0.15,
1328
- "density": 0.65
1329
- }
1330
- }
1331
- ],
1332
- "base_model": "Qwen/Qwen2.5-14B",
1333
- "merge_method": "dare_ties",
1334
- "parameters": {
1335
- "normalize": True,
1336
- "int8_mask": True
1337
- },
1338
- "dtype": "bfloat16",
1339
- "tokenizer_source": "Qwen/Qwen2.5-14B-Instruct",
1340
- "adaptive_merge_parameters": {
1341
- "task_weights": {
1342
- "IFEval": 1.0,
1343
- "MATH": 1.3,
1344
- "GPQA": 1.1,
1345
- "MUSR": 1.2,
1346
- "MMLU-PRO": 1.0
1347
- },
1348
- "smoothing_factor": 0.15
1349
- },
1350
- "gradient_clipping": 1.0
1351
- }
1352
- },
1353
- {
1354
- "rank": 99,
1355
- "name": "sometimesanotion/Qwenvergence-14B-v3-Prose",
1356
- "scores": {
1357
- "average": 37.37,
1358
- "IFEval": 49.18,
1359
- "BBH": 49.80,
1360
- "MATH": 35.57,
1361
- "GPQA": 19.35,
1362
- "MUSR": 21.77,
1363
- "MMLU-PRO": 48.55
1364
- },
1365
- "known_config": None
1366
- },
1367
- {
1368
- "rank": 102,
1369
- "name": "CultriX/SeQwence-14B-v5",
1370
- "scores": {
1371
- "average": 37.27,
1372
- "IFEval": 59.20,
1373
- "BBH": 50.00,
1374
- "MATH": 31.04,
1375
- "GPQA": 16.00,
1376
- "MUSR": 18.33,
1377
- "MMLU-PRO": 49.05
1378
- },
1379
- "known_config": None
1380
- },
1381
- {
1382
- "rank": 103,
1383
- "name": "sometimesanotion/Qwen-14B-ProseStock-v4",
1384
- "scores": {
1385
- "average": 37.23,
1386
- "IFEval": 49.42,
1387
- "BBH": 49.54,
1388
- "MATH": 35.50,
1389
- "GPQA": 18.46,
1390
- "MUSR": 21.70,
1391
- "MMLU-PRO": 48.74
1392
- },
1393
- "known_config": None
1394
- },
1395
- {
1396
- "rank": 104,
1397
- "name": "sometimesanotion/IF-reasoning-experiment-40",
1398
- "scores": {
1399
- "average": 37.21,
1400
- "IFEval": 63.30,
1401
- "BBH": 44.31,
1402
- "MATH": 27.72,
1403
- "GPQA": 17.34,
1404
- "MUSR": 25.86,
1405
- "MMLU-PRO": 44.72
1406
- },
1407
- "known_config": {
1408
- "name": "sometimesanotion/IF-reasoning-experiment-40",
1409
- "merge_method": "slerp",
1410
- "base_model": "sometimesanotion/Qwenvergence-Abliterate",
1411
- "tokenizer_source": "base",
1412
- "dtype": "float32",
1413
- "out_dtype": "bfloat16",
1414
- "parameters": {
1415
- "t": [
1416
- {"value": 0.40}
1417
- ]
1418
- },
1419
- "slices": [
1420
- {
1421
- "sources": [
1422
- {
1423
- "model": "sometimesanotion/Qwenvergence-Abliterate",
1424
- "layer_range": [0, 48]
1425
- },
1426
- {
1427
- "model": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3+sometimesanotion/Qwenvergence-Abliterate-64",
1428
- "layer_range": [0, 48]
1429
- }
1430
- ]
1431
- }
1432
- ]
1433
- }
1434
- },
1435
- {
1436
- "rank": 105,
1437
- "name": "CultriX/SeQwence-14B-EvolMerge",
1438
- "scores": {
1439
- "average": 37.20,
1440
- "IFEval": 53.82,
1441
- "BBH": 50.78,
1442
- "MATH": 31.80,
1443
- "GPQA": 17.45,
1444
- "MUSR": 20.26,
1445
- "MMLU-PRO": 49.10
1446
- },
1447
- "known_config": {
1448
- "base_model": "CultriX/SeQwence-14Bv1",
1449
- "dtype": "bfloat16",
1450
- "merge_method": "dare_ties",
1451
- "parameters": {
1452
- "int8_mask": 1.0,
1453
- "normalize": 1.0
1454
- },
1455
- "slices": [
1456
- {
1457
- "sources": [
1458
- {
1459
- "layer_range": [0, 48],
1460
- "model": "CultriX/SeQwence-14Bv1",
1461
- "parameters": {
1462
- "density": [
1463
- 0.9723868064882017,
1464
- 1.0,
1465
- 1.0,
1466
- 1.0,
1467
- 1.0,
1468
- 0.9714039829478123
1469
- ],
1470
- "weight": [
1471
- 0.303941801676895,
1472
- 0.364404551023674,
1473
- 0.315900913803921,
1474
- 0.3276032249804535,
1475
- 0.32167313684876814,
1476
- 0.4385348686221433
1477
- ]
1478
- }
1479
- },
1480
- {
1481
- "layer_range": [0, 48],
1482
- "model": "CultriX/Qwestion-14B",
1483
- "parameters": {
1484
- "density": [
1485
- 1.0,
1486
- 0.9914516102369406,
1487
- 1.0,
1488
- 0.8035966798672015,
1489
- 0.8192028457518323,
1490
- 0.9514479609471497
1491
- ],
1492
- "weight": [
1493
- 0.23754044230348376,
1494
- 0.26302919982461254,
1495
- 0.26313082788173275,
1496
- 0.17815237275761467,
1497
- 0.34301750695974753,
1498
- 0.5374787613924082
1499
- ]
1500
- }
1501
- },
1502
- {
1503
- "layer_range": [0, 48],
1504
- "model": "CultriX/Qwen2.5-14B-Wernicke",
1505
- "parameters": {
1506
- "density": [
1507
- 0.9250003667144193,
1508
- 0.9603820599250329,
1509
- 0.8766642760655986,
1510
- 1.0,
1511
- 0.9993615706551808,
1512
- 0.7459506348277176
1513
- ],
1514
- "weight": [
1515
- 0.48038202535582214,
1516
- 0.5870170049221364,
1517
- 0.27054455623315504,
1518
- 0.06016442415521043,
1519
- 0.4012739361231067,
1520
- 0.26890177448533076
1521
- ]
1522
- }
1523
- }
1524
- ]
1525
- }
1526
- ]
1527
- }
1528
- }
1529
  ]
1530
 
1531
-
1532
- # ---------------------------------------------------------
1533
- # PART 2: PARSING LOGIC -- PRINTS OR SCRAPES
1534
- # ---------------------------------------------------------
 
 
 
1535
 
1536
  def print_benchmark_and_config_info(model_info):
1537
  """
1538
- Prints an overview (to stdout) of the benchmark scores for one model,
1539
- then prints its known MergeKit config if present, otherwise prints a "No config found" note.
1540
  """
1541
  print("---")
1542
  print(f"Model Rank: {model_info['rank']}")
@@ -1550,18 +277,33 @@ def print_benchmark_and_config_info(model_info):
1550
  print(f"Models average score in MMLU-PRO benchmarks in %: {model_info['scores']['MMLU-PRO']}")
1551
 
1552
  if model_info["known_config"] is not None:
 
1553
  print("###")
1554
- # For demonstration, let's just print the dictionary in a 'rough' YAML style
1555
- # If you want perfect YAML, consider using pyyaml to dump it.
1556
- _print_dict_as_yaml(model_info["known_config"], indent_level=0)
 
 
 
 
 
 
 
 
 
 
 
 
1557
  print("###")
1558
  else:
1559
- print("(No MergeKit configuration found.)")
1560
- print("")
1561
- print("You can try the following Python script to scrape the model page:")
1562
- print("######################################################################")
1563
- print(
1564
- f'''import requests
 
 
1565
  from bs4 import BeautifulSoup
1566
 
1567
  def scrape_model_page(model_url):
@@ -1571,10 +313,8 @@ def scrape_model_page(model_url):
1571
  return f"Error: Unable to fetch the page (Status Code: {{response.status_code}})"
1572
 
1573
  soup = BeautifulSoup(response.text, "html.parser")
1574
-
1575
  yaml_config = soup.find("pre")
1576
  yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found."
1577
-
1578
  metadata_section = soup.find("div", class_="metadata")
1579
  metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found."
1580
 
@@ -1587,71 +327,93 @@ def scrape_model_page(model_url):
1587
  return f"Error: {{str(e)}}"
1588
 
1589
  if __name__ == "__main__":
1590
- model_url = "https://huggingface.co/{model_info['name']}"
1591
  result = scrape_model_page(model_url)
1592
- print(result)'''
1593
- )
1594
- print("######################################################################")
1595
-
1596
-
1597
- def _print_dict_as_yaml(data, indent_level=0):
1598
- """
1599
- Recursively prints dict 'data' as pseudo-YAML to stdout.
1600
- (We do it manually because the user data can be nested.)
1601
- """
1602
- indent = " " * indent_level
1603
- if isinstance(data, dict):
1604
- for k, v in data.items():
1605
- if isinstance(v, dict):
1606
- print(f"{indent}{k}:")
1607
- _print_dict_as_yaml(v, indent_level+1)
1608
- elif isinstance(v, list):
1609
- print(f"{indent}{k}:")
1610
- for item in v:
1611
- if isinstance(item, dict):
1612
- print(f"{indent}-")
1613
- _print_dict_as_yaml(item, indent_level+2)
1614
- else:
1615
- print(f"{indent}- {item}")
1616
- else:
1617
- print(f"{indent}{k}: {v}")
1618
- else:
1619
- print(f"{indent}{data}")
1620
-
1621
 
1622
- def run_parsing_script():
1623
  """
1624
- Loops over all models in benchmark_data, calling print_benchmark_and_config_info()
1625
- to generate the entire "great results" text.
1626
- We capture the stdout prints, then return them as a single string.
1627
  """
1628
  old_stdout = sys.stdout
1629
- captured_output = io.StringIO()
1630
- sys.stdout = captured_output
1631
 
1632
- for model in benchmark_data:
 
1633
  print_benchmark_and_config_info(model)
1634
 
1635
  sys.stdout = old_stdout
1636
- return captured_output.getvalue()
1637
-
1638
 
1639
- # ---------------------------------------------------------
1640
- # PART 3: GRADIO APP
1641
- # ---------------------------------------------------------
1642
 
1643
- def parse_and_show_results():
1644
- """
1645
- Gradio-compatible function that runs the parser
1646
- and returns the captured output text.
1647
- """
1648
- return run_parsing_script()
1649
 
1650
  with gr.Blocks() as demo:
1651
- gr.Markdown("# Full-Dataset Dynamic Benchmark Parsing")
1652
- gr.Markdown("Click the button below to parse all models (Rank 44 to 105) from the dataset:")
1653
- parse_btn = gr.Button("Parse Benchmarks")
1654
- results_box = gr.Textbox(label="Parsed Benchmark Results", lines=25)
1655
- parse_btn.click(fn=parse_and_show_results, outputs=results_box)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1656
 
1657
  demo.launch()
 
1
+ import pandas as pd
2
+ import matplotlib.pyplot as plt
3
+ import seaborn as sns
4
+ import gradio as gr
5
  import requests
6
  from bs4 import BeautifulSoup
7
+ import io
8
+ import os
9
+ import base64
10
+ import zipfile
11
+ from PIL import Image
12
+ from io import BytesIO
13
+ import tempfile
14
+ import sys
15
+
16
+ # --------------------------------------------------------------------
17
+ # PART 1: YOUR EXISTING DATA & PLOTS (unchanged)
18
+ # --------------------------------------------------------------------
19
+
20
+ data_full = [
21
+ ['CultriX/Qwen2.5-14B-SLERPv7', 'https://huggingface.co/CultriX/Qwen2.5-14B-SLERPv7', 0.7205, 0.8272, 0.7541, 0.6581, 0.5, 0.729],
22
+ ['djuna/Q2.5-Veltha-14B-0.5', 'https://huggingface.co/djuna/Q2.5-Veltha-14B-0.5', 0.7492, 0.8386, 0.7305, 0.598, 0.43, 0.7817],
23
+ ['CultriX/Qwen2.5-14B-FinalMerge', 'https://huggingface.co/CultriX/Qwen2.5-14B-FinalMerge', 0.7248, 0.8277, 0.7113, 0.7052, 0.57, 0.7001],
24
+ ['CultriX/Qwen2.5-14B-MultiCultyv2', 'https://huggingface.co/CultriX/Qwen2.5-14B-MultiCultyv2', 0.7295, 0.8359, 0.7363, 0.5767, 0.44, 0.7316],
25
+ ['CultriX/Qwen2.5-14B-Brocav7', 'https://huggingface.co/CultriX/Qwen2.5-14B-Brocav7', 0.7445, 0.8353, 0.7508, 0.6292, 0.46, 0.7629],
26
+ ['CultriX/Qwen2.5-14B-Broca', 'https://huggingface.co/CultriX/Qwen2.5-14B-Broca', 0.7456, 0.8352, 0.748, 0.6034, 0.44, 0.7716],
27
+ ['CultriX/Qwen2.5-14B-Brocav3', 'https://huggingface.co/CultriX/Qwen2.5-14B-Brocav3', 0.7395, 0.8388, 0.7393, 0.6405, 0.47, 0.7659],
28
+ ['CultriX/Qwen2.5-14B-Brocav4', 'https://huggingface.co/CultriX/Qwen2.5-14B-Brocav4', 0.7432, 0.8377, 0.7444, 0.6277, 0.48, 0.758],
29
+ ['CultriX/Qwen2.5-14B-Brocav2', 'https://huggingface.co/CultriX/Qwen2.5-14B-Brocav2', 0.7492, 0.8302, 0.7508, 0.6377, 0.51, 0.7478],
30
+ ['CultriX/Qwen2.5-14B-Brocav5', 'https://huggingface.co/CultriX/Qwen2.5-14B-Brocav5', 0.7445, 0.8313, 0.7547, 0.6376, 0.5, 0.7304],
31
+ ['CultriX/Qwen2.5-14B-Brocav6', 'https://huggingface.co/CultriX/Qwen2.5-14B-Brocav6', 0.7179, 0.8354, 0.7531, 0.6378, 0.49, 0.7524],
32
+ ['CultriX/Qwenfinity-2.5-14B', 'https://huggingface.co/CultriX/Qwenfinity-2.5-14B', 0.7347, 0.8254, 0.7279, 0.7267, 0.56, 0.697],
33
+ ['CultriX/Qwen2.5-14B-Emergedv2', 'https://huggingface.co/CultriX/Qwen2.5-14B-Emergedv2', 0.7137, 0.8335, 0.7363, 0.5836, 0.44, 0.7344],
34
+ ['CultriX/Qwen2.5-14B-Unity', 'https://huggingface.co/CultriX/Qwen2.5-14B-Unity', 0.7063, 0.8343, 0.7423, 0.682, 0.57, 0.7498],
35
+ ['CultriX/Qwen2.5-14B-MultiCultyv3', 'https://huggingface.co/CultriX/Qwen2.5-14B-MultiCultyv3', 0.7132, 0.8216, 0.7395, 0.6792, 0.55, 0.712],
36
+ ['CultriX/Qwen2.5-14B-Emergedv3', 'https://huggingface.co/CultriX/Qwen2.5-14B-Emergedv3', 0.7436, 0.8312, 0.7519, 0.6585, 0.55, 0.7068],
37
+ ['CultriX/SeQwence-14Bv1', 'https://huggingface.co/CultriX/SeQwence-14Bv1', 0.7278, 0.841, 0.7541, 0.6816, 0.52, 0.7539],
38
+ ['CultriX/Qwen2.5-14B-Wernickev2', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev2', 0.7391, 0.8168, 0.7273, 0.622, 0.45, 0.7572],
39
+ ['CultriX/Qwen2.5-14B-Wernickev3', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev3', 0.7357, 0.8148, 0.7245, 0.7023, 0.55, 0.7869],
40
+ ['CultriX/Qwen2.5-14B-Wernickev4', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev4', 0.7355, 0.829, 0.7497, 0.6306, 0.48, 0.7635],
41
+ ['CultriX/SeQwential-14B-v1', 'https://huggingface.co/CultriX/SeQwential-14B-v1', 0.7355, 0.8205, 0.7549, 0.6367, 0.48, 0.7626],
42
+ ['CultriX/Qwen2.5-14B-Wernickev5', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev5', 0.7224, 0.8272, 0.7541, 0.679, 0.51, 0.7578],
43
+ ['CultriX/Qwen2.5-14B-Wernickev6', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev6', 0.6994, 0.7549, 0.5816, 0.6991, 0.58, 0.7267],
44
+ ['CultriX/Qwen2.5-14B-Wernickev7', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev7', 0.7147, 0.7599, 0.6097, 0.7056, 0.57, 0.7164],
45
+ ['CultriX/Qwen2.5-14B-FinalMerge-tmp2', 'https://huggingface.co/CultriX/Qwen2.5-14B-FinalMerge-tmp2', 0.7255, 0.8192, 0.7535, 0.6671, 0.5, 0.7612],
46
+ ['CultriX/Qwen2.5-14B-BrocaV8', 'https://huggingface.co/CultriX/Qwen2.5-14B-BrocaV8', 0.7415, 0.8396, 0.7334, 0.5785, 0.4300, 0.7646],
47
+ ]
48
+
49
+ columns = ["Model Configuration", "Model Link", "tinyArc", "tinyHellaswag",
50
+ "tinyMMLU", "tinyTruthfulQA", "tinyTruthfulQA_mc1", "tinyWinogrande"]
51
+ df_full = pd.DataFrame(data_full, columns=columns)
52
+
53
+ def plot_average_scores():
54
+ df_full["Average Score"] = df_full.iloc[:, 2:].mean(axis=1)
55
+ df_avg_sorted = df_full.sort_values(by="Average Score", ascending=False)
56
+
57
+ plt.figure(figsize=(14, 10))
58
+ plt.barh(df_avg_sorted["Model Configuration"], df_avg_sorted["Average Score"])
59
+ plt.title("Average Performance of Models Across Tasks", fontsize=16)
60
+ plt.xlabel("Average Score", fontsize=14)
61
+ plt.ylabel("Model Configuration", fontsize=14)
62
+ plt.gca().invert_yaxis()
63
+ plt.grid(axis='x', linestyle='--', alpha=0.7)
64
+ plt.tight_layout()
65
+
66
+ img_buffer = io.BytesIO()
67
+ plt.savefig(img_buffer, format='png')
68
+ img_buffer.seek(0)
69
+ img_base64 = base64.b64encode(img_buffer.read()).decode('utf-8')
70
+ plt.close()
71
+
72
+ pil_image = Image.open(BytesIO(base64.b64decode(img_base64)))
73
+ temp_image_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
74
+ pil_image.save(temp_image_file.name)
75
+ return pil_image, temp_image_file.name
76
+
77
+ def plot_task_performance():
78
+ df_full_melted = df_full.melt(id_vars=["Model Configuration", "Model Link"],
79
+ var_name="Task", value_name="Score")
80
+
81
+ plt.figure(figsize=(16, 12))
82
+ for model in df_full["Model Configuration"]:
83
+ model_data = df_full_melted[df_full_melted["Model Configuration"] == model]
84
+ plt.plot(model_data["Task"], model_data["Score"], marker="o", label=model)
85
+
86
+ plt.title("Performance of All Models Across Tasks", fontsize=16)
87
+ plt.xlabel("Task", fontsize=14)
88
+ plt.ylabel("Score", fontsize=14)
89
+ plt.xticks(rotation=45)
90
+ plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=9)
91
+ plt.grid(axis='y', linestyle='--', alpha=0.7)
92
+ plt.tight_layout()
93
+
94
+ img_buffer = io.BytesIO()
95
+ plt.savefig(img_buffer, format='png')
96
+ img_buffer.seek(0)
97
+ img_base64 = base64.b64encode(img_buffer.read()).decode('utf-8')
98
+ plt.close()
99
+
100
+ pil_image = Image.open(BytesIO(base64.b64decode(img_base64)))
101
+ temp_image_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
102
+ pil_image.save(temp_image_file.name)
103
+ return pil_image, temp_image_file.name
104
+
105
+ def plot_task_specific_top_models():
106
+ top_models = df_full.iloc[:, 2:].idxmax()
107
+ top_scores = df_full.iloc[:, 2:].max()
108
+ results = pd.DataFrame({"Top Model": top_models, "Score": top_scores}).reset_index().rename(columns={"index": "Task"})
109
+
110
+ plt.figure(figsize=(14, 8))
111
+ plt.bar(results["Task"], results["Score"])
112
+ plt.title("Task-Specific Top Models", fontsize=16)
113
+ plt.xlabel("Task", fontsize=14)
114
+ plt.ylabel("Score", fontsize=14)
115
+ plt.grid(axis="y", linestyle="--", alpha=0.7)
116
+ plt.tight_layout()
117
+
118
+ img_buffer = io.BytesIO()
119
+ plt.savefig(img_buffer, format='png')
120
+ img_buffer.seek(0)
121
+ img_base64 = base64.b64encode(img_buffer.read()).decode('utf-8')
122
+ plt.close()
123
+ pil_image = Image.open(BytesIO(base64.b64decode(img_base64)))
124
+ temp_image_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
125
+ pil_image.save(temp_image_file.name)
126
+ return pil_image, temp_image_file.name
127
+
128
+ def plot_heatmap():
129
+ plt.figure(figsize=(14, 10))
130
+ sns.heatmap(df_full.iloc[:, 2:], annot=True, cmap="YlGnBu",
131
+ xticklabels=columns[2:], yticklabels=df_full["Model Configuration"])
132
+ plt.title("Performance Heatmap", fontsize=16)
133
+ plt.tight_layout()
134
+
135
+ img_buffer = io.BytesIO()
136
+ plt.savefig(img_buffer, format='png')
137
+ img_buffer.seek(0)
138
+ img_base64 = base64.b64encode(img_buffer.read()).decode('utf-8')
139
+ plt.close()
140
+ pil_image = Image.open(BytesIO(base64.b64decode(img_base64)))
141
+ temp_image_file = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
142
+ pil_image.save(temp_image_file.name)
143
+ return pil_image, temp_image_file.name
144
+
145
+ def scrape_mergekit_config(model_name):
146
+ model_link = df_full.loc[df_full["Model Configuration"] == model_name, "Model Link"].values[0]
147
+ response = requests.get(model_link)
148
+ if response.status_code != 200:
149
+ return f"Failed to fetch model page for {model_name}. Please check the link."
150
+
151
+ soup = BeautifulSoup(response.text, "html.parser")
152
+ yaml_config = soup.find("pre") # Assume YAML is in <pre> tags
153
+ if yaml_config:
154
+ return yaml_config.text.strip()
155
+ return f"No YAML configuration found for {model_name}."
156
+
157
+ def download_yaml(yaml_content, model_name):
158
+ if "No YAML configuration found" in yaml_content or "Failed to fetch model page" in yaml_content:
159
+ return None
160
+ filename = f"{model_name.replace('/', '_')}_config.yaml"
161
+ return gr.File(value=yaml_content.encode(), filename=filename)
162
+
163
+ def scrape_model_page(model_url):
164
+ try:
165
+ response = requests.get(model_url)
166
+ if response.status_code != 200:
167
+ return f"Error: Unable to fetch the page (Status Code: {response.status_code})"
168
+
169
+ soup = BeautifulSoup(response.text, "html.parser")
170
+ yaml_config = soup.find("pre")
171
+ yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found."
172
+ metadata_section = soup.find("div", class_="metadata")
173
+ metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found."
174
+ return f"**YAML Configuration:**\n{yaml_text}\n\n**Metadata:**\n{metadata_text}"
175
+ except Exception as e:
176
+ return f"Error: {str(e)}"
177
+
178
+ def display_scraped_model_data(model_url):
179
+ return scrape_model_page(model_url)
180
+
181
+ def download_all_data():
182
+ csv_buffer = io.StringIO()
183
+ df_full.to_csv(csv_buffer, index=False)
184
+ csv_data = csv_buffer.getvalue().encode('utf-8')
185
+
186
+ average_plot_pil, average_plot_name = plot_average_scores()
187
+ task_plot_pil, task_plot_name = plot_task_performance()
188
+ top_models_plot_pil, top_models_plot_name = plot_task_specific_top_models()
189
+ heatmap_plot_pil, heatmap_plot_name = plot_heatmap()
190
+
191
+ plot_dict = {
192
+ "average_performance": (average_plot_pil, average_plot_name),
193
+ "task_performance": (task_plot_pil, task_plot_name),
194
+ "top_models": (top_models_plot_pil, top_models_plot_name),
195
+ "heatmap": (heatmap_plot_pil, heatmap_plot_name)
196
+ }
197
+
198
+ zip_buffer = io.BytesIO()
199
+ with zipfile.ZipFile(zip_buffer, 'w') as zf:
200
+ zf.writestr("model_scores.csv", csv_data)
201
+
202
+ for name, (pil_image, filename) in plot_dict.items():
203
+ image_bytes = io.BytesIO()
204
+ pil_image.save(image_bytes, format='PNG')
205
+ image_bytes.seek(0)
206
+ zf.writestr(filename, image_bytes.read())
207
 
208
+ # Also try scraping each model for a YAML config
209
+ for model_name in df_full["Model Configuration"].to_list():
210
+ yaml_content = scrape_mergekit_config(model_name)
211
+ if ("No YAML configuration found" not in yaml_content) and ("Failed to fetch model page" not in yaml_content):
212
+ zf.writestr(f"{model_name.replace('/', '_')}_config.yaml", yaml_content.encode())
 
 
 
213
 
214
+ zip_buffer.seek(0)
215
+ return zip_buffer, "analysis_data.zip"
216
+
217
+
218
+ # --------------------------------------------------------------------
219
+ # PART 2: THE "NON-TINY BENCHMARKS" PARSER (from your snippet)
220
+ # --------------------------------------------------------------------
221
+ # We'll define the logic that prints out each model, attempts to scrape config, etc.
222
+ # Then we capture that printed output and return it as a string.
223
+
224
+ # Example "non-tiny" data, or reuse the snippet's data exactly:
225
+ non_tiny_benchmark_data = [
226
  {
227
  "rank": 44,
228
  "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3",
 
235
  "MUSR": 19.39,
236
  "MMLU-PRO": 48.26
237
  },
238
+ "hf_url": "https://huggingface.co/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3",
239
  "known_config": {
240
  "models": [
241
  {"model": "CultriX/SeQwence-14Bv1"},
 
249
  }
250
  }
251
  },
252
+ # ... (include the rest of your non-tiny models from the snippet)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  ]
254
 
255
+ def snippet_scrape_model_page(url):
256
+ """
257
+ Same as scrape_model_page, but specifically for the snippet's logic if you want
258
+ them to remain separate. Alternatively, you can reuse the same 'scrape_model_page' above.
259
+ """
260
+ # We'll just reuse the same function from above to avoid duplication:
261
+ return scrape_model_page(url)
262
 
263
  def print_benchmark_and_config_info(model_info):
264
  """
265
+ Prints all info about the model: rank, scores, plus either a known config
266
+ or a scraped config. This is the logic from your snippet.
267
  """
268
  print("---")
269
  print(f"Model Rank: {model_info['rank']}")
 
277
  print(f"Models average score in MMLU-PRO benchmarks in %: {model_info['scores']['MMLU-PRO']}")
278
 
279
  if model_info["known_config"] is not None:
280
+ # Print known config in a simplistic YAML-like manner
281
  print("###")
282
+ kc = model_info["known_config"]
283
+ if "models" in kc:
284
+ print("models:")
285
+ for m in kc["models"]:
286
+ print(f" - model: {m['model']}")
287
+ if "merge_method" in kc:
288
+ print(f"merge_method: {kc['merge_method']}")
289
+ if "base_model" in kc:
290
+ print(f"base_model: {kc['base_model']}")
291
+ if "dtype" in kc:
292
+ print(f"dtype: {kc['dtype']}")
293
+ if "parameters" in kc:
294
+ print("parameters:")
295
+ for pk, pv in kc["parameters"].items():
296
+ print(f" {pk}: {pv}")
297
  print("###")
298
  else:
299
+ # Attempt to scrape
300
+ scraped = snippet_scrape_model_page(model_info["hf_url"])
301
+ # If it's an error or "No YAML config", then print the snippet
302
+ if "No YAML configuration found." in scraped or "Error:" in scraped:
303
+ print("(No MergeKit configuration found.)\n")
304
+ print("You can try the following Python script to scrape the model page:\n")
305
+ print("#" * 70)
306
+ print(f'''import requests
307
  from bs4 import BeautifulSoup
308
 
309
  def scrape_model_page(model_url):
 
313
  return f"Error: Unable to fetch the page (Status Code: {{response.status_code}})"
314
 
315
  soup = BeautifulSoup(response.text, "html.parser")
 
316
  yaml_config = soup.find("pre")
317
  yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found."
 
318
  metadata_section = soup.find("div", class_="metadata")
319
  metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found."
320
 
 
327
  return f"Error: {{str(e)}}"
328
 
329
  if __name__ == "__main__":
330
+ model_url = "{model_info['hf_url']}"
331
  result = scrape_model_page(model_url)
332
+ print(result)''')
333
+ print("#" * 70)
334
+ else:
335
+ print("###")
336
+ print(scraped)
337
+ print("###")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
 
339
+ def run_non_tiny_benchmarks():
340
  """
341
+ Runs the logic for all models in 'non_tiny_benchmark_data', capturing stdout
342
+ so we can return it as a single string for display in Gradio.
 
343
  """
344
  old_stdout = sys.stdout
345
+ buffer = io.StringIO()
346
+ sys.stdout = buffer
347
 
348
+ # Loop through them all
349
+ for model in non_tiny_benchmark_data:
350
  print_benchmark_and_config_info(model)
351
 
352
  sys.stdout = old_stdout
353
+ return buffer.getvalue()
 
354
 
 
 
 
355
 
356
+ # --------------------------------------------------------------------
357
+ # PART 3: GRADIO APP (Your existing code, with one new button!)
358
+ # --------------------------------------------------------------------
 
 
 
359
 
360
  with gr.Blocks() as demo:
361
+ gr.Markdown("# Comprehensive Model Performance Analysis with Hugging Face Links")
362
+
363
+ with gr.Row():
364
+ btn1 = gr.Button("Show Average Performance")
365
+ img1 = gr.Image(type="pil", label="Average Performance Plot")
366
+ img1_download = gr.File(label="Download Average Performance")
367
+ btn1.click(plot_average_scores, outputs=[img1, img1_download])
368
+
369
+ with gr.Row():
370
+ btn2 = gr.Button("Show Task Performance")
371
+ img2 = gr.Image(type="pil", label="Task Performance Plot")
372
+ img2_download = gr.File(label="Download Task Performance")
373
+ btn2.click(plot_task_performance, outputs=[img2, img2_download])
374
+
375
+ with gr.Row():
376
+ btn3 = gr.Button("Task-Specific Top Models")
377
+ img3 = gr.Image(type="pil", label="Task-Specific Top Models Plot")
378
+ img3_download = gr.File(label="Download Top Models")
379
+ btn3.click(plot_task_specific_top_models, outputs=[img3, img3_download])
380
+
381
+ with gr.Row():
382
+ btn4 = gr.Button("Plot Performance Heatmap")
383
+ heatmap_img = gr.Image(type="pil", label="Performance Heatmap")
384
+ heatmap_download = gr.File(label="Download Heatmap")
385
+ btn4.click(plot_heatmap, outputs=[heatmap_img, heatmap_download])
386
+
387
+ with gr.Row():
388
+ model_selector = gr.Dropdown(choices=df_full["Model Configuration"].tolist(), label="Select a Model")
389
+ with gr.Column():
390
+ scrape_btn = gr.Button("Scrape MergeKit Configuration")
391
+ yaml_output = gr.Textbox(lines=10, placeholder="YAML Configuration will appear here.")
392
+ scrape_btn.click(scrape_mergekit_config, inputs=model_selector, outputs=yaml_output)
393
+ with gr.Column():
394
+ save_yaml_btn = gr.Button("Save MergeKit Configuration")
395
+ yaml_download = gr.File(label="Download MergeKit Configuration")
396
+ save_yaml_btn.click(download_yaml, inputs=[yaml_output, model_selector], outputs=yaml_download)
397
+
398
+ with gr.Row():
399
+ download_all_btn = gr.Button("Download Everything")
400
+ all_downloads = gr.File(label="Download All Data")
401
+ download_all_btn.click(download_all_data, outputs=all_downloads)
402
+
403
+ gr.Markdown("## Live Scraping Features")
404
+ with gr.Row():
405
+ url_input = gr.Textbox(label="Enter Hugging Face Model URL", placeholder="https://huggingface.co/<model>")
406
+ live_scrape_btn = gr.Button("Scrape Model Page")
407
+ live_scrape_output = gr.Textbox(label="Scraped Data", lines=15)
408
+ live_scrape_btn.click(display_scraped_model_data, inputs=url_input, outputs=live_scrape_output)
409
+
410
+ # ----------------------------------------------------------------
411
+ # NEW: Button & Textbox for the "Non-Tiny Benchmarks" from the snippet
412
+ # ----------------------------------------------------------------
413
+ gr.Markdown("## Non-Tiny Benchmark Parser")
414
+ with gr.Row():
415
+ parse_non_tiny_btn = gr.Button("Parse Non-Tiny Benchmarks")
416
+ parse_non_tiny_output = gr.Textbox(label="Non-Tiny Benchmark Output", lines=30)
417
+ parse_non_tiny_btn.click(fn=run_non_tiny_benchmarks, outputs=parse_non_tiny_output)
418
 
419
  demo.launch()