CultriX commited on
Commit
bb72d5a
·
verified ·
1 Parent(s): 94bcb53

Delete scrape-leaderboard.py

Browse files
Files changed (1) hide show
  1. scrape-leaderboard.py +0 -645
scrape-leaderboard.py DELETED
@@ -1,645 +0,0 @@
1
- import requests
2
- from bs4 import BeautifulSoup
3
-
4
- # 1. A list of model benchmark data from your “DATA START”. Each entry contains:
5
- # - rank
6
- # - name
7
- # - scores (average, IFEval, BBH, MATH, GPQA, MUSR, MMLU-PRO)
8
- # - hf_url: the Hugging Face URL to scrape for a MergeKit config
9
- # - known_config: if we already know the configuration, store it here; otherwise None.
10
- benchmark_data = [
11
- {
12
- "rank": 44,
13
- "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3",
14
- "scores": {
15
- "average": 40.10,
16
- "IFEval": 72.57,
17
- "BBH": 48.58,
18
- "MATH": 34.44,
19
- "GPQA": 17.34,
20
- "MUSR": 19.39,
21
- "MMLU-PRO": 48.26
22
- },
23
- "hf_url": "https://huggingface.co/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3", # placeholder
24
- # This one has a known config from your data
25
- "known_config": {
26
- "models": [
27
- {"model": "CultriX/SeQwence-14Bv1"},
28
- {"model": "allknowingroger/Qwenslerp5-14B"}
29
- ],
30
- "merge_method": "slerp",
31
- "base_model": "CultriX/SeQwence-14Bv1",
32
- "dtype": "bfloat16",
33
- "parameters": {
34
- "t": [0, 0.5, 1, 0.5, 0]
35
- }
36
- }
37
- },
38
- {
39
- "rank": 45,
40
- "name": "sthenno-com/miscii-14b-1225",
41
- "scores": {
42
- "average": 40.08,
43
- "IFEval": 78.78,
44
- "BBH": 50.91,
45
- "MATH": 31.57,
46
- "GPQA": 17.00,
47
- "MUSR": 14.77,
48
- "MMLU-PRO": 47.46
49
- },
50
- "hf_url": "https://huggingface.co/sthenno-com/miscii-14b-1225",
51
- "known_config": None
52
- },
53
- {
54
- "rank": 46,
55
- "name": "djuna/Q2.5-Veltha-14B-0.5",
56
- "scores": {
57
- "average": 39.96,
58
- "IFEval": 77.96,
59
- "BBH": 50.32,
60
- "MATH": 33.84,
61
- "GPQA": 15.77,
62
- "MUSR": 14.17,
63
- "MMLU-PRO": 47.72
64
- },
65
- "hf_url": "https://huggingface.co/djuna/Q2.5-Veltha-14B-0.5",
66
- "known_config": None
67
- },
68
- {
69
- "rank": 48,
70
- "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock",
71
- "scores": {
72
- "average": 39.81,
73
- "IFEval": 71.62,
74
- "BBH": 48.76,
75
- "MATH": 33.99,
76
- "GPQA": 17.34,
77
- "MUSR": 19.23,
78
- "MMLU-PRO": 47.95
79
- },
80
- "hf_url": "https://huggingface.co/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock",
81
- "known_config": None
82
- },
83
- {
84
- "rank": 50,
85
- "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01",
86
- "scores": {
87
- "average": 39.46,
88
- "IFEval": 68.72,
89
- "BBH": 47.71,
90
- "MATH": 35.05,
91
- "GPQA": 18.23,
92
- "MUSR": 19.56,
93
- "MMLU-PRO": 47.50
94
- },
95
- "hf_url": "https://huggingface.co/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01",
96
- "known_config": None
97
- },
98
- {
99
- "rank": 52,
100
- "name": "arcee-ai/Virtuoso-Small",
101
- "scores": {
102
- "average": 39.43,
103
- "IFEval": 79.35,
104
- "BBH": 50.40,
105
- "MATH": 34.29,
106
- "GPQA": 11.52,
107
- "MUSR": 14.44,
108
- "MMLU-PRO": 46.57
109
- },
110
- "hf_url": "https://huggingface.co/arcee-ai/Virtuoso-Small",
111
- "known_config": None
112
- },
113
- {
114
- "rank": 54,
115
- "name": "sometimesanotion/Qwentinuum-14B-v6",
116
- "scores": {
117
- "average": 39.23,
118
- "IFEval": 63.04,
119
- "BBH": 50.23,
120
- "MATH": 33.84,
121
- "GPQA": 18.23,
122
- "MUSR": 21.18,
123
- "MMLU-PRO": 48.89
124
- },
125
- "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v6",
126
- "known_config": None
127
- },
128
- {
129
- "rank": 55,
130
- "name": "djuna/Q2.5-Veltha-14B",
131
- "scores": {
132
- "average": 39.21,
133
- "IFEval": 82.92,
134
- "BBH": 49.75,
135
- "MATH": 28.02,
136
- "GPQA": 14.54,
137
- "MUSR": 12.26,
138
- "MMLU-PRO": 47.76
139
- },
140
- "hf_url": "https://huggingface.co/djuna/Q2.5-Veltha-14B",
141
- "known_config": None
142
- },
143
- {
144
- "rank": 57,
145
- "name": "allknowingroger/QwenSlerp6-14B",
146
- "scores": {
147
- "average": 39.02,
148
- "IFEval": 68.67,
149
- "BBH": 47.59,
150
- "MATH": 34.14,
151
- "GPQA": 16.44,
152
- "MUSR": 18.32,
153
- "MMLU-PRO": 48.95
154
- },
155
- "hf_url": "https://huggingface.co/allknowingroger/QwenSlerp6-14B",
156
- "known_config": None
157
- },
158
- {
159
- "rank": 58,
160
- "name": "allknowingroger/QwenSlerp5-14B",
161
- "scores": {
162
- "average": 38.94,
163
- "IFEval": 71.19,
164
- "BBH": 47.39,
165
- "MATH": 33.16,
166
- "GPQA": 15.32,
167
- "MUSR": 17.81,
168
- "MMLU-PRO": 48.78
169
- },
170
- "hf_url": "https://huggingface.co/allknowingroger/QwenSlerp5-14B",
171
- "known_config": None
172
- },
173
- {
174
- "rank": 59,
175
- "name": "sometimesanotion/Qwentinuum-14B-v5",
176
- "scores": {
177
- "average": 38.87,
178
- "IFEval": 62.86,
179
- "BBH": 50.28,
180
- "MATH": 31.57,
181
- "GPQA": 18.34,
182
- "MUSR": 21.09,
183
- "MMLU-PRO": 49.09
184
- },
185
- "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v5",
186
- "known_config": None
187
- },
188
- {
189
- "rank": 60,
190
- "name": "sometimesanotion/Qwenvergence-14B-v6-Prose",
191
- "scores": {
192
- "average": 38.82,
193
- "IFEval": 59.90,
194
- "BBH": 50.12,
195
- "MATH": 34.89,
196
- "GPQA": 18.46,
197
- "MUSR": 21.02,
198
- "MMLU-PRO": 48.56
199
- },
200
- "hf_url": "https://huggingface.co/sometimesanotion/Qwenvergence-14B-v6-Prose",
201
- "known_config": None
202
- },
203
- {
204
- "rank": 61,
205
- "name": "CultriX/Qwen2.5-14B-Brocav3",
206
- "scores": {
207
- "average": 38.76,
208
- "IFEval": 69.52,
209
- "BBH": 49.05,
210
- "MATH": 32.25,
211
- "GPQA": 14.54,
212
- "MUSR": 19.25,
213
- "MMLU-PRO": 47.97
214
- },
215
- "hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Brocav3",
216
- "known_config": None
217
- },
218
- {
219
- "rank": 62,
220
- "name": "sometimesanotion/Qwentinuum-14B-v7",
221
- "scores": {
222
- "average": 38.76,
223
- "IFEval": 61.09,
224
- "BBH": 50.35,
225
- "MATH": 33.38,
226
- "GPQA": 18.79,
227
- "MUSR": 19.95,
228
- "MMLU-PRO": 49.00
229
- },
230
- "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v7",
231
- "known_config": None
232
- },
233
- {
234
- "rank": 64,
235
- "name": "sometimesanotion/Qwentinuum-14B-v3",
236
- "scores": {
237
- "average": 38.74,
238
- "IFEval": 61.58,
239
- "BBH": 50.04,
240
- "MATH": 32.85,
241
- "GPQA": 18.34,
242
- "MUSR": 20.62,
243
- "MMLU-PRO": 49.03
244
- },
245
- "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v3",
246
- "known_config": None
247
- },
248
- {
249
- "rank": 65,
250
- "name": "allura-org/TQ2.5-14B-Aletheia-v1",
251
- "scores": {
252
- "average": 38.74,
253
- "IFEval": 75.30,
254
- "BBH": 50.88,
255
- "MATH": 29.53,
256
- "GPQA": 14.99,
257
- "MUSR": 14.61,
258
- "MMLU-PRO": 47.12
259
- },
260
- "hf_url": "https://huggingface.co/allura-org/TQ2.5-14B-Aletheia-v1",
261
- "known_config": None
262
- },
263
- {
264
- "rank": 66,
265
- "name": "qingy2024/Fusion4-14B-Instruct",
266
- "scores": {
267
- "average": 38.73,
268
- "IFEval": 76.49,
269
- "BBH": 50.70,
270
- "MATH": 33.91,
271
- "GPQA": 10.74,
272
- "MUSR": 13.97,
273
- "MMLU-PRO": 46.60
274
- },
275
- "hf_url": "https://huggingface.co/qingy2024/Fusion4-14B-Instruct",
276
- "known_config": None
277
- },
278
- {
279
- "rank": 68,
280
- "name": "CultriX/Qwen2.5-14B-Brocav7",
281
- "scores": {
282
- "average": 38.52,
283
- "IFEval": 67.24,
284
- "BBH": 48.91,
285
- "MATH": 31.87,
286
- "GPQA": 15.66,
287
- "MUSR": 20.15,
288
- "MMLU-PRO": 47.31
289
- },
290
- "hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Brocav7",
291
- "known_config": None
292
- },
293
- {
294
- "rank": 71,
295
- "name": "sometimesanotion/Qwentinuum-14B-v6-Prose",
296
- "scores": {
297
- "average": 38.46,
298
- "IFEval": 56.43,
299
- "BBH": 50.14,
300
- "MATH": 35.57,
301
- "GPQA": 18.46,
302
- "MUSR": 21.34,
303
- "MMLU-PRO": 48.80
304
- },
305
- "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v6-Prose",
306
- "known_config": None
307
- },
308
- {
309
- "rank": 76,
310
- "name": "CultriX/Qwen2.5-14B-Brocav6",
311
- "scores": {
312
- "average": 38.32,
313
- "IFEval": 69.95,
314
- "BBH": 47.82,
315
- "MATH": 29.61,
316
- "GPQA": 15.66,
317
- "MUSR": 18.88,
318
- "MMLU-PRO": 47.99
319
- },
320
- "hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Brocav6",
321
- "known_config": None
322
- },
323
- {
324
- "rank": 80,
325
- "name": "CultriX/SeQwence-14Bv1",
326
- "scores": {
327
- "average": 38.20,
328
- "IFEval": 66.78,
329
- "BBH": 47.19,
330
- "MATH": 33.53,
331
- "GPQA": 14.88,
332
- "MUSR": 18.80,
333
- "MMLU-PRO": 48.00
334
- },
335
- "hf_url": "https://huggingface.co/CultriX/SeQwence-14Bv1",
336
- "known_config": None
337
- },
338
- {
339
- "rank": 85,
340
- "name": "sometimesanotion/Qwentinuum-14B-v013",
341
- "scores": {
342
- "average": 37.96,
343
- "IFEval": 67.11,
344
- "BBH": 43.97,
345
- "MATH": 33.01,
346
- "GPQA": 14.32,
347
- "MUSR": 24.99,
348
- "MMLU-PRO": 44.34
349
- },
350
- "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v013",
351
- "known_config": None
352
- },
353
- {
354
- "rank": 86,
355
- "name": "CultriX/Qwen2.5-14B-Wernickev3",
356
- "scores": {
357
- "average": 37.94,
358
- "IFEval": 70.48,
359
- "BBH": 44.58,
360
- "MATH": 32.78,
361
- "GPQA": 14.99,
362
- "MUSR": 18.69,
363
- "MMLU-PRO": 46.13
364
- },
365
- "hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev3",
366
- "known_config": None
367
- },
368
- {
369
- "rank": 88,
370
- "name": "allknowingroger/QwenSlerp4-14B",
371
- "scores": {
372
- "average": 37.80,
373
- "IFEval": 63.28,
374
- "BBH": 49.38,
375
- "MATH": 30.97,
376
- "GPQA": 16.33,
377
- "MUSR": 17.59,
378
- "MMLU-PRO": 49.28
379
- },
380
- "hf_url": "https://huggingface.co/allknowingroger/QwenSlerp4-14B",
381
- "known_config": None
382
- },
383
- {
384
- "rank": 89,
385
- "name": "CultriX/Qwen2.5-14B-Broca",
386
- "scores": {
387
- "average": 37.72,
388
- "IFEval": 56.04,
389
- "BBH": 50.03,
390
- "MATH": 34.59,
391
- "GPQA": 18.23,
392
- "MUSR": 18.95,
393
- "MMLU-PRO": 48.49
394
- },
395
- "hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Broca",
396
- "known_config": None
397
- },
398
- {
399
- "rank": 90,
400
- "name": "CultriX/Qwen2.5-14B-Emerged",
401
- "scores": {
402
- "average": 37.66,
403
- "IFEval": 70.00,
404
- "BBH": 45.93,
405
- "MATH": 30.74,
406
- "GPQA": 14.32,
407
- "MUSR": 18.47,
408
- "MMLU-PRO": 46.51
409
- },
410
- "hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Emerged",
411
- "known_config": None
412
- },
413
- {
414
- "rank": 91,
415
- "name": "sometimesanotion/Qwentinuum-14B-v8",
416
- "scores": {
417
- "average": 37.65,
418
- "IFEval": 54.12,
419
- "BBH": 50.11,
420
- "MATH": 34.14,
421
- "GPQA": 17.79,
422
- "MUSR": 20.75,
423
- "MMLU-PRO": 49.02
424
- },
425
- "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v8",
426
- "known_config": None
427
- },
428
- {
429
- "rank": 92,
430
- "name": "qingy2024/Fusion-14B-Instruct",
431
- "scores": {
432
- "average": 37.64,
433
- "IFEval": 72.60,
434
- "BBH": 48.58,
435
- "MATH": 30.97,
436
- "GPQA": 13.98,
437
- "MUSR": 14.81,
438
- "MMLU-PRO": 44.93
439
- },
440
- "hf_url": "https://huggingface.co/qingy2024/Fusion-14B-Instruct",
441
- "known_config": None
442
- },
443
- {
444
- "rank": 94,
445
- "name": "CultriX/Qwestion-14B",
446
- "scores": {
447
- "average": 37.63,
448
- "IFEval": 63.18,
449
- "BBH": 48.76,
450
- "MATH": 31.72,
451
- "GPQA": 15.77,
452
- "MUSR": 17.22,
453
- "MMLU-PRO": 49.14
454
- },
455
- "hf_url": "https://huggingface.co/CultriX/Qwestion-14B",
456
- "known_config": None
457
- },
458
- {
459
- "rank": 99,
460
- "name": "sometimesanotion/Qwenvergence-14B-v3-Prose",
461
- "scores": {
462
- "average": 37.37,
463
- "IFEval": 49.18,
464
- "BBH": 49.80,
465
- "MATH": 35.57,
466
- "GPQA": 19.35,
467
- "MUSR": 21.77,
468
- "MMLU-PRO": 48.55
469
- },
470
- "hf_url": "https://huggingface.co/sometimesanotion/Qwenvergence-14B-v3-Prose",
471
- "known_config": None
472
- },
473
- {
474
- "rank": 102,
475
- "name": "CultriX/SeQwence-14B-v5",
476
- "scores": {
477
- "average": 37.27,
478
- "IFEval": 59.20,
479
- "BBH": 50.00,
480
- "MATH": 31.04,
481
- "GPQA": 16.00,
482
- "MUSR": 18.33,
483
- "MMLU-PRO": 49.05
484
- },
485
- "hf_url": "https://huggingface.co/CultriX/SeQwence-14B-v5",
486
- "known_config": None
487
- },
488
- {
489
- "rank": 103,
490
- "name": "sometimesanotion/Qwen-14B-ProseStock-v4",
491
- "scores": {
492
- "average": 37.23,
493
- "IFEval": 49.42,
494
- "BBH": 49.54,
495
- "MATH": 35.50,
496
- "GPQA": 18.46,
497
- "MUSR": 21.70,
498
- "MMLU-PRO": 48.74
499
- },
500
- "hf_url": "https://huggingface.co/sometimesanotion/Qwen-14B-ProseStock-v4",
501
- "known_config": None
502
- },
503
- {
504
- "rank": 104,
505
- "name": "sometimesanotion/IF-reasoning-experiment-40",
506
- "scores": {
507
- "average": 37.21,
508
- "IFEval": 63.30,
509
- "BBH": 44.31,
510
- "MATH": 27.72,
511
- "GPQA": 17.34,
512
- "MUSR": 25.86,
513
- "MMLU-PRO": 44.72
514
- },
515
- "hf_url": "https://huggingface.co/sometimesanotion/IF-reasoning-experiment-40",
516
- "known_config": None
517
- },
518
- {
519
- "rank": 105,
520
- "name": "CultriX/SeQwence-14B-EvolMerge",
521
- "scores": {
522
- "average": 37.20,
523
- "IFEval": 53.82,
524
- "BBH": 50.78,
525
- "MATH": 31.80,
526
- "GPQA": 17.45,
527
- "MUSR": 20.26,
528
- "MMLU-PRO": 49.10
529
- },
530
- "hf_url": "https://huggingface.co/CultriX/SeQwence-14B-EvolMerge",
531
- "known_config": None
532
- }
533
- ]
534
-
535
-
536
- def scrape_model_page(model_url):
537
- """
538
- Scrapes the Hugging Face model page for potential MergeKit YAML configuration
539
- or any other YAML blocks. Returns a dict with "yaml_configuration" and "metadata"
540
- if successful, or an error string if something goes wrong.
541
- """
542
- try:
543
- response = requests.get(model_url)
544
- if response.status_code != 200:
545
- return f"Error: Unable to fetch the page (Status Code: {response.status_code})"
546
-
547
- soup = BeautifulSoup(response.text, "html.parser")
548
-
549
- # Attempt to locate a <pre> block that might contain YAML
550
- yaml_config = soup.find("pre")
551
- yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found."
552
-
553
- # Attempt to locate metadata (modify as needed based on how the page is structured)
554
- metadata_section = soup.find("div", class_="metadata")
555
- metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found."
556
-
557
- return {
558
- "yaml_configuration": yaml_text,
559
- "metadata": metadata_text
560
- }
561
-
562
- except Exception as e:
563
- return f"Error: {str(e)}"
564
-
565
-
566
- def print_benchmark_and_config_info(model_info):
567
- """
568
- Prints an overview of the benchmark scores for the given model,
569
- checks for known or scraped MergeKit config, and prints accordingly.
570
- """
571
- # Print the benchmark summary
572
- print(f"---\nModel Rank: {model_info['rank']}")
573
- print(f"Model Name: {model_info['name']}")
574
- print(f"Model average score across benchmarks in %: {model_info['scores']['average']}")
575
- print(f"Models average score on IFEval benchmarks in %: {model_info['scores']['IFEval']}")
576
- print(f"Models average score on BBH benchmarks in %: {model_info['scores']['BBH']}")
577
- print(f"Models average score on MATH benchmarks in %: {model_info['scores']['MATH']}")
578
- print(f"Models average score in GPQA benchmarks in %: {model_info['scores']['GPQA']}")
579
- print(f"Models average score in MUSR benchmarks in %: {model_info['scores']['MUSR']}")
580
- print(f"Models average score in MMLU-PRO benchmarks in %: {model_info['scores']['MMLU-PRO']}")
581
-
582
- # If we have a known config for this model, just print it.
583
- if model_info["known_config"] is not None:
584
- print("###")
585
- print("models:")
586
- for m in model_info["known_config"]["models"]:
587
- print(f" - model: {m['model']}")
588
- print(f"merge_method: {model_info['known_config']['merge_method']}")
589
- print(f"base_model: {model_info['known_config']['base_model']}")
590
- print(f"dtype: {model_info['known_config']['dtype']}")
591
- print("parameters:")
592
- print(f" t: {model_info['known_config']['parameters']['t']} # V shaped curve: Hermes for input & output, WizardMath in the middle layers")
593
- print("###")
594
- return
595
-
596
- # Otherwise, attempt to scrape the model page
597
- scrape_result = scrape_model_page(model_info["hf_url"])
598
- # If we got an error or can't find YAML, we show the scraping code
599
- if isinstance(scrape_result, str) or ("No YAML configuration found." in scrape_result["yaml_configuration"]):
600
- print("(No MergeKit configuration found.)\n")
601
- print("You can try the following Python script to scrape the model page:\n")
602
- print("#" * 70)
603
- print(
604
- f'''import requests
605
- from bs4 import BeautifulSoup
606
-
607
- def scrape_model_page(model_url):
608
- try:
609
- response = requests.get(model_url)
610
- if response.status_code != 200:
611
- return f"Error: Unable to fetch the page (Status Code: {{response.status_code}})"
612
-
613
- soup = BeautifulSoup(response.text, "html.parser")
614
-
615
- yaml_config = soup.find("pre")
616
- yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found."
617
-
618
- metadata_section = soup.find("div", class_="metadata")
619
- metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found."
620
-
621
- return {{
622
- "yaml_configuration": yaml_text,
623
- "metadata": metadata_text
624
- }}
625
-
626
- except Exception as e:
627
- return f"Error: {{str(e)}}"
628
-
629
- if __name__ == "__main__":
630
- model_url = "{model_info['hf_url']}"
631
- result = scrape_model_page(model_url)
632
- print(result)'''
633
- )
634
- print("#" * 70)
635
- else:
636
- # If we found a config, print it between triple-hash signs
637
- print("###")
638
- print(scrape_result["yaml_configuration"])
639
- print("###")
640
-
641
-
642
- if __name__ == "__main__":
643
- # 2. Loop through all models, printing benchmark data and MergeKit config info
644
- for model in benchmark_data:
645
- print_benchmark_and_config_info(model)