CultriX's picture
Update app.py
e00c04d verified
raw
history blame
54.3 kB
import io
import sys
import requests
from bs4 import BeautifulSoup
import gradio as gr
# ---------------------------------------------------------
# PART 1: FULL BENCHMARK DATA (Rank 44 through 105)
# ---------------------------------------------------------
# For each model, we store:
# - rank (int)
# - name (str)
# - scores (dict) with keys: average, IFEval, BBH, MATH, GPQA, MUSR, MMLU-PRO
# - known_config (dict if found, or None if no config)
benchmark_data = [
{
"rank": 44,
"name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3",
"scores": {
"average": 40.10,
"IFEval": 72.57,
"BBH": 48.58,
"MATH": 34.44,
"GPQA": 17.34,
"MUSR": 19.39,
"MMLU-PRO": 48.26
},
"known_config": {
"models": [
{"model": "CultriX/SeQwence-14Bv1"},
{"model": "allknowingroger/Qwenslerp5-14B"}
],
"merge_method": "slerp",
"base_model": "CultriX/SeQwence-14Bv1",
"dtype": "bfloat16",
"parameters": {
"t": [0, 0.5, 1, 0.5, 0]
}
}
},
{
"rank": 45,
"name": "sthenno-com/miscii-14b-1225",
"scores": {
"average": 40.08,
"IFEval": 78.78,
"BBH": 50.91,
"MATH": 31.57,
"GPQA": 17.00,
"MUSR": 14.77,
"MMLU-PRO": 47.46
},
"known_config": {
"tokenizer_source": "base",
"chat_template": "chatml",
"merge_method": "ties",
"dtype": "bfloat16",
"parameters": {
"normalize": True
},
"base_model": "sthenno-com/miscii-14b-1028",
"models": [
{
"model": "sthenno-com/miscii-14b-1028",
"parameters": {
"weight": 1,
"density": 0.5
}
},
{
"model": "sthenno/miscii-1218",
"parameters": {
"weight": 1,
"density": 0.5
}
},
{
"model": "sthenno/exp-002",
"parameters": {
"weight": 0.9,
"density": 0.5
}
},
{
"model": "sthenno/miscii-1218",
"parameters": {
"weight": 0.6,
"density": 0.5
}
}
]
}
},
{
"rank": 46,
"name": "djuna/Q2.5-Veltha-14B-0.5",
"scores": {
"average": 39.96,
"IFEval": 77.96,
"BBH": 50.32,
"MATH": 33.84,
"GPQA": 15.77,
"MUSR": 14.17,
"MMLU-PRO": 47.72
},
"known_config": {
"merge_method": "della_linear",
"dtype": "float32",
"out_dtype": "bfloat16",
"parameters": {
"epsilon": 0.04,
"lambda": 1.05,
"normalize": True
},
"base_model": "arcee-ai/SuperNova-Medius",
"tokenizer_source": "arcee-ai/SuperNova-Medius",
"models": [
{
"model": "arcee-ai/SuperNova-Medius",
"parameters": {
"weight": 10,
"density": 1
}
},
{
"model": "EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2",
"parameters": {
"weight": 7,
"density": 0.5
}
},
{
"model": "v000000/Qwen2.5-Lumen-14B",
"parameters": {
"weight": 7,
"density": 0.4
}
},
{
"model": "allura-org/TQ2.5-14B-Aletheia-v1",
"parameters": {
"weight": 8,
"density": 0.4
}
},
{
"model": "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2",
"parameters": {
"weight": 8,
"density": 0.45
}
}
]
}
},
{
"rank": 48,
"name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock",
"scores": {
"average": 39.81,
"IFEval": 71.62,
"BBH": 48.76,
"MATH": 33.99,
"GPQA": 17.34,
"MUSR": 19.23,
"MMLU-PRO": 47.95
},
"known_config": None
},
{
"rank": 50,
"name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01",
"scores": {
"average": 39.46,
"IFEval": 68.72,
"BBH": 47.71,
"MATH": 35.05,
"GPQA": 18.23,
"MUSR": 19.56,
"MMLU-PRO": 47.50
},
"known_config": None
},
{
"rank": 52,
"name": "arcee-ai/Virtuoso-Small",
"scores": {
"average": 39.43,
"IFEval": 79.35,
"BBH": 50.40,
"MATH": 34.29,
"GPQA": 11.52,
"MUSR": 14.44,
"MMLU-PRO": 46.57
},
"known_config": None
},
{
"rank": 54,
"name": "sometimesanotion/Qwentinuum-14B-v6",
"scores": {
"average": 39.23,
"IFEval": 63.04,
"BBH": 50.23,
"MATH": 33.84,
"GPQA": 18.23,
"MUSR": 21.18,
"MMLU-PRO": 48.89
},
"known_config": None
},
{
"rank": 55,
"name": "djuna/Q2.5-Veltha-14B",
"scores": {
"average": 39.21,
"IFEval": 82.92,
"BBH": 49.75,
"MATH": 28.02,
"GPQA": 14.54,
"MUSR": 12.26,
"MMLU-PRO": 47.76
},
"known_config": {
"merge_method": "della_linear",
"dtype": "float32",
"out_dtype": "bfloat16",
"parameters": {
"epsilon": 0.04,
"lambda": 1.05,
"normalize": True
},
"base_model": "qwen/Qwen2.5-14b",
"tokenizer_source": "arcee-ai/SuperNova-Medius",
"models": [
{
"model": "arcee-ai/SuperNova-Medius",
"parameters": {
"weight": 10,
"density": 1
}
},
{
"model": "EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2",
"parameters": {
"weight": 7,
"density": 0.5
}
},
{
"model": "v000000/Qwen2.5-Lumen-14B",
"parameters": {
"weight": 7,
"density": 0.4
}
},
{
"model": "allura-org/TQ2.5-14B-Aletheia-v1",
"parameters": {
"weight": 8,
"density": 0.4
}
},
{
"model": "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2",
"parameters": {
"weight": 8,
"density": 0.45
}
}
]
}
},
{
"rank": 57,
"name": "allknowingroger/QwenSlerp6-14B",
"scores": {
"average": 39.02,
"IFEval": 68.67,
"BBH": 47.59,
"MATH": 34.14,
"GPQA": 16.44,
"MUSR": 18.32,
"MMLU-PRO": 48.95
},
"known_config": {
"models": [
{"model": "CultriX/SeQwence-14Bv1"},
{"model": "allknowingroger/Qwenslerp2-14B"}
],
"merge_method": "slerp",
"base_model": "CultriX/SeQwence-14Bv1",
"dtype": "bfloat16",
"parameters": {
"t": [0, 0.5, 1, 0.5, 0]
}
}
},
{
"rank": 58,
"name": "allknowingroger/QwenSlerp5-14B",
"scores": {
"average": 38.94,
"IFEval": 71.19,
"BBH": 47.39,
"MATH": 33.16,
"GPQA": 15.32,
"MUSR": 17.81,
"MMLU-PRO": 48.78
},
"known_config": {
"models": [
{"model": "CultriX/SeQwence-14Bv1"},
{"model": "CultriX/Qwestion-14B"}
],
"merge_method": "slerp",
"base_model": "CultriX/SeQwence-14Bv1",
"dtype": "bfloat16",
"parameters": {
"t": [0, 0.5, 1, 0.5, 0]
}
}
},
{
"rank": 59,
"name": "sometimesanotion/Qwentinuum-14B-v5",
"scores": {
"average": 38.87,
"IFEval": 62.86,
"BBH": 50.28,
"MATH": 31.57,
"GPQA": 18.34,
"MUSR": 21.09,
"MMLU-PRO": 49.09
},
"known_config": None
},
{
"rank": 60,
"name": "sometimesanotion/Qwenvergence-14B-v6-Prose",
"scores": {
"average": 38.82,
"IFEval": 59.90,
"BBH": 50.12,
"MATH": 34.89,
"GPQA": 18.46,
"MUSR": 21.02,
"MMLU-PRO": 48.56
},
"known_config": {
# This model had two YAML segments:
# We'll store them in a single dictionary with keys "config1" and "config2" to preserve them:
"config1": {
"name": "Qwenvergence-14B-v6-Prose-model_stock",
"merge_method": "model_stock",
"base_model": "Qwen/Qwen2.5-14B",
"tokenizer_source": "huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2",
"parameters": {
"int8_mask": True,
"normalize": True,
"rescale": False
},
"models": [
"arcee-ai/Virtuoso-Small",
"sometimesanotion/Lamarck-14B-v0.3",
"EVA-UNIT-01/EVA-Qwen2.5-14B-v0.2",
"allura-org/TQ2.5-14B-Sugarquill-v1",
"oxyapi/oxy-1-small",
"v000000/Qwen2.5-Lumen-14B",
"sthenno-com/miscii-14b-1225",
"sthenno-com/miscii-14b-1225",
"underwoods/medius-erebus-magnum-14b",
"huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2"
],
"dtype": "float32",
"out_dtype": "bfloat16"
},
"config2": {
"name": "Qwenvergence-14B-v6-Prose",
"merge_method": "ties",
"base_model": "Qwen/Qwen2.5-14B",
"tokenizer_source": "base",
"parameters": {
"density": 1.00,
"weight": 1.00,
"int8_mask": True,
"normalize": True,
"rescale": False
},
"dtype": "float32",
"out_dtype": "bfloat16",
"models": [
{
"model": "sometimesanotion/Qwenvergence-14B-v6-Prose-slerp",
"parameters": {
"density": 1.00,
"weight": 1.00
}
}
]
}
}
},
{
"rank": 61,
"name": "CultriX/Qwen2.5-14B-Brocav3",
"scores": {
"average": 38.76,
"IFEval": 69.52,
"BBH": 49.05,
"MATH": 32.25,
"GPQA": 14.54,
"MUSR": 19.25,
"MMLU-PRO": 47.97
},
"known_config": {
"merge_method": "della_linear",
"base_model": "CultriX/Qwen2.5-14B-Wernickev3",
"dtype": "bfloat16",
"parameters": {
"epsilon": 0.012,
"lambda": 1.4,
"normalize": True
},
"adaptive_merge_parameters": {
"task_weights": {
"tinyArc": 1.6,
"tinyHellaswag": 1.5,
"tinyMMLU": 1.65,
"tinyTruthfulQA": 1.9,
"tinyTruthfulQA_mc1": 1.7,
"tinyWinogrande": 1.75,
"IFEval": 1.9,
"BBH": 1.7,
"MATH": 2.1,
"GPQA": 1.8,
"MUSR": 1.9,
"MMLU-PRO": 1.8
},
"smoothing_factor": 0.1
},
"gradient_clipping": {
"CultriX/Qwen2.5-14B-Wernickev3": 0.86,
"CultriX/Qwenfinity-2.5-14B": 0.83,
"djuna/Q2.5-Veltha-14B-0.5": 0.91,
"CultriX/Qwen2.5-14B-Broca": 0.85,
"qingy2019/Qwen2.5-Math-14B-Instruct": 0.93,
"CultriX/SeQwence-14Bv1": 0.88,
"sometimesanotion/Qwen2.5-14B-Vimarckoso": 0.89,
"allknowingroger/QwenSlerp6-14B": 0.87
},
"models": [
{
"model": "CultriX/Qwen2.5-14B-Wernickev3",
"parameters": {
"weight": 0.26,
"density": 0.7
}
},
{
"model": "CultriX/Qwenfinity-2.5-14B",
"parameters": {
"weight": 0.23,
"density": 0.65
}
},
{
"model": "djuna/Q2.5-Veltha-14B-0.5",
"parameters": {
"weight": 0.22,
"density": 0.72
}
},
{
"model": "CultriX/Qwen2.5-14B-Broca",
"parameters": {
"weight": 0.15,
"density": 0.65
}
},
{
"model": "qingy2019/Qwen2.5-Math-14B-Instruct",
"parameters": {
"weight": 0.18,
"density": 0.73
}
},
{
"model": "CultriX/SeQwence-14Bv1",
"parameters": {
"weight": 0.14,
"density": 0.63
}
},
{
"model": "sometimesanotion/Qwen2.5-14B-Vimarckoso",
"parameters": {
"weight": 0.12,
"density": 0.6
}
},
{
"model": "allknowingroger/QwenSlerp6-14B",
"parameters": {
"weight": 0.1,
"density": 0.62
}
}
],
"tokenizer_source": "CultriX/Qwen2.5-14B-Wernickev3"
}
},
{
"rank": 62,
"name": "sometimesanotion/Qwentinuum-14B-v7",
"scores": {
"average": 38.76,
"IFEval": 61.09,
"BBH": 50.35,
"MATH": 33.38,
"GPQA": 18.79,
"MUSR": 19.95,
"MMLU-PRO": 49.00
},
"known_config": None
},
{
"rank": 64,
"name": "sometimesanotion/Qwentinuum-14B-v3",
"scores": {
"average": 38.74,
"IFEval": 61.58,
"BBH": 50.04,
"MATH": 32.85,
"GPQA": 18.34,
"MUSR": 20.62,
"MMLU-PRO": 49.03
},
"known_config": None
},
{
"rank": 65,
"name": "allura-org/TQ2.5-14B-Aletheia-v1",
"scores": {
"average": 38.74,
"IFEval": 75.30,
"BBH": 50.88,
"MATH": 29.53,
"GPQA": 14.99,
"MUSR": 14.61,
"MMLU-PRO": 47.12
},
# The snippet had:
# <|im_start|>system
# ...
# This was presumably some leftover system text. We'll treat it as config, or None.
# We'll store it as a minimal known_config example:
"known_config": {
"system_text_example": "<|im_start|>system ... <|im_end|>"
}
},
{
"rank": 66,
"name": "qingy2024/Fusion4-14B-Instruct",
"scores": {
"average": 38.73,
"IFEval": 76.49,
"BBH": 50.70,
"MATH": 33.91,
"GPQA": 10.74,
"MUSR": 13.97,
"MMLU-PRO": 46.60
},
"known_config": {
"models": [
{
"model": "arcee-ai/Virtuoso-Small",
"parameters": {
"weight": 1,
"density": 1
}
}
],
"merge_method": "ties",
"base_model": "Qwen/Qwen2.5-14B",
"parameters": {
"weight": 1,
"density": 1,
"normalize": True,
"int8_mask": True
},
"dtype": "float16"
}
},
{
"rank": 68,
"name": "CultriX/Qwen2.5-14B-Brocav7",
"scores": {
"average": 38.52,
"IFEval": 67.24,
"BBH": 48.91,
"MATH": 31.87,
"GPQA": 15.66,
"MUSR": 20.15,
"MMLU-PRO": 47.31
},
"known_config": {
"merge_method": "della_linear",
"base_model": "CultriX/Qwen2.5-14B-Wernickev3",
"dtype": "bfloat16",
"parameters": {
"epsilon": 0.01,
"lambda": 1.5,
"normalize": True,
"smoothing_factor": 0.08
},
"gradient_clipping": {
"CultriX/Qwen2.5-14B-Wernickev3": 0.85,
"CultriX/Qwenfinity-2.5-14B": 0.82,
"djuna/Q2.5-Veltha-14B-0.5": 0.92,
"CultriX/Qwen2.5-14B-Broca": 0.86,
"qingy2019/Qwen2.5-Math-14B-Instruct": 0.94,
"CultriX/SeQwence-14Bv1": 0.87,
"sometimesanotion/Qwen2.5-14B-Vimarckoso": 0.90,
"allknowingroger/QwenSlerp6-14B": 0.86
},
"models": [
{
"model": "CultriX/Qwen2.5-14B-Wernickev3",
"parameters": {
"weight": 0.25,
"density": 0.72
}
},
{
"model": "CultriX/Qwenfinity-2.5-14B",
"parameters": {
"weight": 0.22,
"density": 0.68
}
},
{
"model": "djuna/Q2.5-Veltha-14B-0.5",
"parameters": {
"weight": 0.20,
"density": 0.75
}
},
{
"model": "CultriX/Qwen2.5-14B-Broca",
"parameters": {
"weight": 0.16,
"density": 0.68
}
},
{
"model": "qingy2019/Qwen2.5-Math-14B-Instruct",
"parameters": {
"weight": 0.19,
"density": 0.75
}
},
{
"model": "CultriX/SeQwence-14Bv1",
"parameters": {
"weight": 0.13,
"density": 0.65
}
},
{
"model": "sometimesanotion/Qwen2.5-14B-Vimarckoso",
"parameters": {
"weight": 0.11,
"density": 0.62
}
},
{
"model": "allknowingroger/QwenSlerp6-14B",
"parameters": {
"weight": 0.09,
"density": 0.65
}
}
],
"adaptive_merge_parameters": {
"task_weights": {
"tinyArc": 1.65,
"tinyHellaswag": 1.55,
"tinyMMLU": 1.7,
"tinyTruthfulQA": 1.95,
"tinyTruthfulQA_mc1": 1.75,
"tinyWinogrande": 1.8,
"IFEval": 2.0,
"BBH": 1.75,
"MATH": 2.2,
"GPQA": 1.85,
"MUSR": 1.95,
"MMLU-PRO": 1.85
}
},
"tokenizer_source": "CultriX/Qwen2.5-14B-Wernickev3"
}
},
{
"rank": 71,
"name": "sometimesanotion/Qwentinuum-14B-v6-Prose",
"scores": {
"average": 38.46,
"IFEval": 56.43,
"BBH": 50.14,
"MATH": 35.57,
"GPQA": 18.46,
"MUSR": 21.34,
"MMLU-PRO": 48.80
},
"known_config": {
"name": "Qwentinuum-14B-v6-Prose-slerp",
"merge_method": "slerp",
"base_model": "sometimesanotion/Qwenvergence-14B-v6-Prose",
"tokenizer_source": "sometimesanotion/Qwenvergence-14B-v6-Prose",
"dtype": "bfloat16",
"out_dtype": "bfloat16",
"parameters": {
"int8_mask": True,
"normalize": True,
"rescale": False
},
"slices": [
{
"sources": [
{
"model": "sometimesanotion/Qwenvergence-14B-v6-Prose",
"layer_range": [0, 8]
},
{
"model": "sometimesanotion/Qwentinuum-14B-v6",
"layer_range": [0, 8]
}
]
},
{
"sources": [
{
"model": "sometimesanotion/Qwenvergence-14B-v6-Prose",
"layer_range": [8, 16]
},
{
"model": "sometimesanotion/Qwentinuum-14B-v6",
"layer_range": [8, 16]
}
]
},
{
"sources": [
{
"model": "sometimesanotion/Qwenvergence-14B-v6-Prose",
"layer_range": [16, 24]
},
{
"model": "sometimesanotion/Qwentinuum-14B-v6",
"layer_range": [16, 24]
}
]
},
{
"sources": [
{
"model": "sometimesanotion/Qwenvergence-14B-v6-Prose",
"layer_range": [24, 32]
},
{
"model": "sometimesanotion/Qwentinuum-14B-v6",
"layer_range": [24, 32]
}
]
},
{
"sources": [
{
"model": "sometimesanotion/Qwenvergence-14B-v6-Prose",
"layer_range": [32, 40]
},
{
"model": "sometimesanotion/Qwentinuum-14B-v6",
"layer_range": [32, 40]
}
]
},
{
"sources": [
{
"model": "sometimesanotion/Qwenvergence-14B-v6-Prose",
"layer_range": [40, 48]
},
{
"model": "sometimesanotion/Qwentinuum-14B-v6",
"layer_range": [40, 48]
}
]
}
],
# The 'parameters' block that includes "t: 0.40" is implied by the snippet
}
},
{
"rank": 76,
"name": "CultriX/Qwen2.5-14B-Brocav6",
"scores": {
"average": 38.32,
"IFEval": 69.95,
"BBH": 47.82,
"MATH": 29.61,
"GPQA": 15.66,
"MUSR": 18.88,
"MMLU-PRO": 47.99
},
"known_config": {
"merge_method": "della_linear",
"base_model": "CultriX/Qwen2.5-14B-Wernickev3",
"dtype": "bfloat16",
"parameters": {
"epsilon": 0.01,
"lambda": 1.5,
"normalize": True
},
"adaptive_merge_parameters": {
"task_weights": {
"tinyArc": 1.65,
"tinyHellaswag": 1.55,
"tinyMMLU": 1.7,
"tinyTruthfulQA": 1.95,
"tinyTruthfulQA_mc1": 1.75,
"tinyWinogrande": 1.8,
"IFEval": 2.0,
"BBH": 1.75,
"MATH": 2.2,
"GPQA": 1.85,
"MUSR": 1.95,
"MMLU-PRO": 1.85
},
"smoothing_factor": 0.08
},
"gradient_clipping": {
"CultriX/Qwen2.5-14B-Wernickev3": 0.85,
"CultriX/Qwenfinity-2.5-14B": 0.82,
"djuna/Q2.5-Veltha-14B-0.5": 0.92,
"CultriX/Qwen2.5-14B-Broca": 0.86,
"qingy2019/Qwen2.5-Math-14B-Instruct": 0.94,
"CultriX/SeQwence-14Bv1": 0.87,
"sometimesanotion/Qwen2.5-14B-Vimarckoso": 0.90,
"allknowingroger/QwenSlerp6-14B": 0.86
},
"models": [
{
"model": "CultriX/Qwen2.5-14B-Wernickev3",
"parameters": {
"weight": 0.25,
"density": 0.72
}
},
{
"model": "CultriX/Qwenfinity-2.5-14B",
"parameters": {
"weight": 0.22,
"density": 0.68
}
},
{
"model": "djuna/Q2.5-Veltha-14B-0.5",
"parameters": {
"weight": 0.20,
"density": 0.75
}
},
{
"model": "CultriX/Qwen2.5-14B-Broca",
"parameters": {
"weight": 0.16,
"density": 0.68
}
},
{
"model": "qingy2019/Qwen2.5-Math-14B-Instruct",
"parameters": {
"weight": 0.19,
"density": 0.75
}
},
{
"model": "CultriX/SeQwence-14Bv1",
"parameters": {
"weight": 0.13,
"density": 0.65
}
},
{
"model": "sometimesanotion/Qwen2.5-14B-Vimarckoso",
"parameters": {
"weight": 0.11,
"density": 0.62
}
},
{
"model": "allknowingroger/QwenSlerp6-14B",
"parameters": {
"weight": 0.09,
"density": 0.65
}
}
]
}
},
{
"rank": 80,
"name": "CultriX/SeQwence-14Bv1",
"scores": {
"average": 38.20,
"IFEval": 66.78,
"BBH": 47.19,
"MATH": 33.53,
"GPQA": 14.88,
"MUSR": 18.80,
"MMLU-PRO": 48.00
},
"known_config": {
"models": [
{
"model": "CultriX/Qwen2.5-14B-Wernicke",
"parameters": {
"weight": 0.35,
"density": 0.6
}
},
{
"model": "VAGOsolutions/SauerkrautLM-v2-14b-DPO",
"parameters": {
"weight": 0.30,
"density": 0.6
}
},
{
"model": "CultriX/Qwen2.5-14B-MegaMerge-pt2",
"parameters": {
"weight": 0.20,
"density": 0.5
}
},
{
"model": "CultriX/SeQwence-14B",
"parameters": {
"weight": 0.15,
"density": 0.4
}
},
{
"model": "v000000/Qwen2.5-Lumen-14B",
"parameters": {
"weight": 0.10,
"density": 0.5
}
}
],
"base_model": "Qwen/Qwen2.5-14B",
"merge_method": "dare_ties",
"parameters": {
"normalize": True,
"int8_mask": True
},
"dtype": "bfloat16",
"tokenizer_source": "Qwen/Qwen2.5-14B-Instruct"
}
},
{
"rank": 85,
"name": "sometimesanotion/Qwentinuum-14B-v013",
"scores": {
"average": 37.96,
"IFEval": 67.11,
"BBH": 43.97,
"MATH": 33.01,
"GPQA": 14.32,
"MUSR": 24.99,
"MMLU-PRO": 44.34
},
"known_config": {
"name": "Qwentinuum-14B-v013",
"merge_method": "model_stock",
"base_model": "Qwen/Qwen2.5-14B",
"tokenizer_source": "base",
"parameters": {
"int8_mask": True,
"normalize": True,
"rescale": False
},
"models": [
"sometimesanotion/Qwenvergence-14B-v3-Prose+sometimesanotion/Qwenvergence-Abliterate-512",
"sometimesanotion/Qwentinuum-14B-v011+sometimesanotion/Qwenvergence-Abliterate-512",
"sometimesanotion/Qwentinuum-14B-v012+sometimesanotion/Qwenvergence-Abliterate-256",
"sometimesanotion/Qwenvergence-14B-v6-Prose+sometimesanotion/Qwenvergence-Abliterate-512",
"sometimesanotion/Lamarck-14B-v0.3+sometimesanotion/Qwenvergence-Abliterate-512",
"huihui-ai/Qwen2.5-14B-Instruct-abliterated-v2"
],
"dtype": "bfloat16",
"out_dtype": "bfloat16"
}
},
{
"rank": 86,
"name": "CultriX/Qwen2.5-14B-Wernickev3",
"scores": {
"average": 37.94,
"IFEval": 70.48,
"BBH": 44.58,
"MATH": 32.78,
"GPQA": 14.99,
"MUSR": 18.69,
"MMLU-PRO": 46.13
},
"known_config": {
"CONFIG SuperiorMerge-14B-From-2-to-10": {
"models": [
{
"model": "VAGOsolutions/SauerkrautLM-v2-14b-DPO",
"parameters": {
"weight": 0.25,
"density": 0.6
}
},
{
"model": "allknowingroger/QwenSlerp6-14B",
"parameters": {
"weight": 0.25,
"density": 0.6
}
},
{
"model": "CultriX/SeQwence-14B-EvolMerge",
"parameters": {
"weight": 0.20,
"density": 0.5
}
},
{
"model": "CultriX/Qwen2.5-14B-Wernicke",
"parameters": {
"weight": 0.15,
"density": 0.5
}
},
{
"model": "allknowingroger/QwenStock3-14B",
"parameters": {
"weight": 0.15,
"density": 0.5
}
}
],
"base_model": "Qwen/Qwen2.5-14B",
"merge_method": "dare_ties",
"parameters": {
"normalize": True,
"int8_mask": True
},
"dtype": "bfloat16",
"tokenizer_source": "Qwen/Qwen2.5-14B-Instruct"
}
}
},
{
"rank": 88,
"name": "allknowingroger/QwenSlerp4-14B",
"scores": {
"average": 37.80,
"IFEval": 63.28,
"BBH": 49.38,
"MATH": 30.97,
"GPQA": 16.33,
"MUSR": 17.59,
"MMLU-PRO": 49.28
},
"known_config": {
"models": [
{
"model": "CultriX/Qwen2.5-14B-Wernicke",
"parameters": {
"weight": 0.55,
"density": 0.80
}
},
{
"model": "VAGOsolutions/SauerkrautLM-v2-14b-DPO",
"parameters": {
"weight": 0.20,
"density": 0.60
}
},
{
"model": "rombodawg/Rombos-LLM-V2.6-Qwen-14b",
"parameters": {
"weight": 0.25,
"density": 0.70
}
},
{
"model": "allknowingroger/Qwenslerp2-14B",
"parameters": {
"weight": 0.15,
"density": 0.65
}
}
],
"base_model": "Qwen/Qwen2.5-14B",
"merge_method": "dare_ties",
"parameters": {
"normalize": True,
"int8_mask": True
},
"dtype": "bfloat16",
"tokenizer_source": "Qwen/Qwen2.5-14B-Instruct",
"adaptive_merge_parameters": {
"task_weights": {
"IFEval": 1.0,
"MATH": 1.3,
"GPQA": 1.1,
"MUSR": 1.2,
"MMLU-PRO": 1.0
},
"smoothing_factor": 0.15
},
"gradient_clipping": 1.0
}
},
{
"rank": 89,
"name": "CultriX/Qwen2.5-14B-Broca",
"scores": {
"average": 37.72,
"IFEval": 56.04,
"BBH": 50.03,
"MATH": 34.59,
"GPQA": 18.23,
"MUSR": 18.95,
"MMLU-PRO": 48.49
},
"known_config": {
"merge_method": "della_linear",
"base_model": "CultriX/Qwen2.5-14B-Wernickev3",
"dtype": "bfloat16",
"parameters": {
"epsilon": 0.03,
"lambda": 1.1,
"normalize": True
},
"adaptive_merge_parameters": {
"task_weights": {
"tinyArc": 1.3,
"tinyHellaswag": 1.2,
"tinyMMLU": 1.1,
"tinyTruthfulQA": 1.4,
"tinyWinogrande": 1.2,
"IFEval": 1.3,
"BBH": 1.3,
"MATH": 1.4,
"GPQA": 1.3,
"MUSR": 1.2,
"MMLU-PRO": 1.2
},
"smoothing_factor": 0.15
},
"gradient_clipping": 1.0,
"models": [
{
"model": "CultriX/Qwen2.5-14B-Wernickev3",
"parameters": {
"weight": 0.5,
"density": 0.7
}
},
{
"model": "djuna/Q2.5-Veltha-14B-0.5",
"parameters": {
"weight": 0.3,
"density": 0.8
}
},
{
"model": "CultriX/SeQwence-14B-EvolMerge",
"parameters": {
"weight": 0.2,
"density": 0.6
}
}
],
"tokenizer_source": "CultriX/Qwen2.5-14B-Wernickev3"
}
},
{
"rank": 90,
"name": "CultriX/Qwen2.5-14B-Emerged",
"scores": {
"average": 37.66,
"IFEval": 70.00,
"BBH": 45.93,
"MATH": 30.74,
"GPQA": 14.32,
"MUSR": 18.47,
"MMLU-PRO": 46.51
},
"known_config": {
"models": [
{"model": "CultriX/Qwen2.5-14B-Wernickev3"},
{"model": "CultriX/Qwen2.5-14B-Wernickev5"}
],
"merge_method": "slerp",
"base_model": "CultriX/Qwen2.5-14B-Wernickev3",
"dtype": "bfloat16",
"parameters": {
"t": [0, 0.5, 1, 0.5, 0]
},
"dtype_duplicate": "bfloat16", # The snippet repeated 'dtype' line
"adaptive_merge_parameters": {
"task_weights": {
"tinyArc": 1.2,
"tinyHellaswag": 1.1,
"tinyMMLU": 1.2,
"tinyTruthfulQA": 1.3,
"tinyTruthfulQA_mc1": 1.1,
"tinyWinogrande": 1.2
},
"smoothing_factor": 0.2
},
"gradient_clipping": 1.0
}
},
{
"rank": 91,
"name": "sometimesanotion/Qwentinuum-14B-v8",
"scores": {
"average": 37.65,
"IFEval": 54.12,
"BBH": 50.11,
"MATH": 34.14,
"GPQA": 17.79,
"MUSR": 20.75,
"MMLU-PRO": 49.02
},
"known_config": None
},
{
"rank": 92,
"name": "qingy2024/Fusion-14B-Instruct",
"scores": {
"average": 37.64,
"IFEval": 72.60,
"BBH": 48.58,
"MATH": 30.97,
"GPQA": 13.98,
"MUSR": 14.81,
"MMLU-PRO": 44.93
},
"known_config": {
"models": [
{
"model": "qingy2024/Qwen2.5-Math-14B-Instruct-Preview",
"parameters": {
"weight": 0.3,
"density": 0.6
}
},
{
"model": "arcee-ai/Virtuoso-Small",
"parameters": {
"weight": 0.7,
"density": 0.6
}
}
],
"base_model": "Qwen/Qwen2.5-14B",
"merge_method": "dare_ties",
"parameters": {
"normalize": True,
"int8_mask": True
},
"dtype": "bfloat16",
"tokenizer_source": "Qwen/Qwen2.5-14B-Instruct"
}
},
{
"rank": 94,
"name": "CultriX/Qwestion-14B",
"scores": {
"average": 37.63,
"IFEval": 63.18,
"BBH": 48.76,
"MATH": 31.72,
"GPQA": 15.77,
"MUSR": 17.22,
"MMLU-PRO": 49.14
},
"known_config": {
"models": [
{
"model": "CultriX/Qwen2.5-14B-Wernicke",
"parameters": {
"weight": 0.55,
"density": 0.80
}
},
{
"model": "VAGOsolutions/SauerkrautLM-v2-14b-DPO",
"parameters": {
"weight": 0.20,
"density": 0.60
}
},
{
"model": "rombodawg/Rombos-LLM-V2.6-Qwen-14b",
"parameters": {
"weight": 0.25,
"density": 0.70
}
},
{
"model": "allknowingroger/Qwenslerp2-14B",
"parameters": {
"weight": 0.15,
"density": 0.65
}
}
],
"base_model": "Qwen/Qwen2.5-14B",
"merge_method": "dare_ties",
"parameters": {
"normalize": True,
"int8_mask": True
},
"dtype": "bfloat16",
"tokenizer_source": "Qwen/Qwen2.5-14B-Instruct",
"adaptive_merge_parameters": {
"task_weights": {
"IFEval": 1.0,
"MATH": 1.3,
"GPQA": 1.1,
"MUSR": 1.2,
"MMLU-PRO": 1.0
},
"smoothing_factor": 0.15
},
"gradient_clipping": 1.0
}
},
{
"rank": 99,
"name": "sometimesanotion/Qwenvergence-14B-v3-Prose",
"scores": {
"average": 37.37,
"IFEval": 49.18,
"BBH": 49.80,
"MATH": 35.57,
"GPQA": 19.35,
"MUSR": 21.77,
"MMLU-PRO": 48.55
},
"known_config": None
},
{
"rank": 102,
"name": "CultriX/SeQwence-14B-v5",
"scores": {
"average": 37.27,
"IFEval": 59.20,
"BBH": 50.00,
"MATH": 31.04,
"GPQA": 16.00,
"MUSR": 18.33,
"MMLU-PRO": 49.05
},
"known_config": None
},
{
"rank": 103,
"name": "sometimesanotion/Qwen-14B-ProseStock-v4",
"scores": {
"average": 37.23,
"IFEval": 49.42,
"BBH": 49.54,
"MATH": 35.50,
"GPQA": 18.46,
"MUSR": 21.70,
"MMLU-PRO": 48.74
},
"known_config": None
},
{
"rank": 104,
"name": "sometimesanotion/IF-reasoning-experiment-40",
"scores": {
"average": 37.21,
"IFEval": 63.30,
"BBH": 44.31,
"MATH": 27.72,
"GPQA": 17.34,
"MUSR": 25.86,
"MMLU-PRO": 44.72
},
"known_config": {
"name": "sometimesanotion/IF-reasoning-experiment-40",
"merge_method": "slerp",
"base_model": "sometimesanotion/Qwenvergence-Abliterate",
"tokenizer_source": "base",
"dtype": "float32",
"out_dtype": "bfloat16",
"parameters": {
"t": [
{"value": 0.40}
]
},
"slices": [
{
"sources": [
{
"model": "sometimesanotion/Qwenvergence-Abliterate",
"layer_range": [0, 48]
},
{
"model": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3+sometimesanotion/Qwenvergence-Abliterate-64",
"layer_range": [0, 48]
}
]
}
]
}
},
{
"rank": 105,
"name": "CultriX/SeQwence-14B-EvolMerge",
"scores": {
"average": 37.20,
"IFEval": 53.82,
"BBH": 50.78,
"MATH": 31.80,
"GPQA": 17.45,
"MUSR": 20.26,
"MMLU-PRO": 49.10
},
"known_config": {
"base_model": "CultriX/SeQwence-14Bv1",
"dtype": "bfloat16",
"merge_method": "dare_ties",
"parameters": {
"int8_mask": 1.0,
"normalize": 1.0
},
"slices": [
{
"sources": [
{
"layer_range": [0, 48],
"model": "CultriX/SeQwence-14Bv1",
"parameters": {
"density": [
0.9723868064882017,
1.0,
1.0,
1.0,
1.0,
0.9714039829478123
],
"weight": [
0.303941801676895,
0.364404551023674,
0.315900913803921,
0.3276032249804535,
0.32167313684876814,
0.4385348686221433
]
}
},
{
"layer_range": [0, 48],
"model": "CultriX/Qwestion-14B",
"parameters": {
"density": [
1.0,
0.9914516102369406,
1.0,
0.8035966798672015,
0.8192028457518323,
0.9514479609471497
],
"weight": [
0.23754044230348376,
0.26302919982461254,
0.26313082788173275,
0.17815237275761467,
0.34301750695974753,
0.5374787613924082
]
}
},
{
"layer_range": [0, 48],
"model": "CultriX/Qwen2.5-14B-Wernicke",
"parameters": {
"density": [
0.9250003667144193,
0.9603820599250329,
0.8766642760655986,
1.0,
0.9993615706551808,
0.7459506348277176
],
"weight": [
0.48038202535582214,
0.5870170049221364,
0.27054455623315504,
0.06016442415521043,
0.4012739361231067,
0.26890177448533076
]
}
}
]
}
]
}
}
]
# ---------------------------------------------------------
# PART 2: PARSING LOGIC -- PRINTS OR SCRAPES
# ---------------------------------------------------------
def print_benchmark_and_config_info(model_info):
"""
Prints an overview (to stdout) of the benchmark scores for one model,
then prints its known MergeKit config if present, otherwise prints a "No config found" note.
"""
print("---")
print(f"Model Rank: {model_info['rank']}")
print(f"Model Name: {model_info['name']}")
print(f"Model average score across benchmarks in %: {model_info['scores']['average']}")
print(f"Models average score on IFEval benchmarks in %: {model_info['scores']['IFEval']}")
print(f"Models average score on BBH benchmarks in %: {model_info['scores']['BBH']}")
print(f"Models average score on MATH benchmarks in %: {model_info['scores']['MATH']}")
print(f"Models average score in GPQA benchmarks in %: {model_info['scores']['GPQA']}")
print(f"Models average score in MUSR benchmarks in %: {model_info['scores']['MUSR']}")
print(f"Models average score in MMLU-PRO benchmarks in %: {model_info['scores']['MMLU-PRO']}")
if model_info["known_config"] is not None:
print("###")
# For demonstration, let's just print the dictionary in a 'rough' YAML style
# If you want perfect YAML, consider using pyyaml to dump it.
_print_dict_as_yaml(model_info["known_config"], indent_level=0)
print("###")
else:
print("(No MergeKit configuration found.)")
print("")
print("You can try the following Python script to scrape the model page:")
print("######################################################################")
print(
f'''import requests
from bs4 import BeautifulSoup
def scrape_model_page(model_url):
try:
response = requests.get(model_url)
if response.status_code != 200:
return f"Error: Unable to fetch the page (Status Code: {{response.status_code}})"
soup = BeautifulSoup(response.text, "html.parser")
yaml_config = soup.find("pre")
yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found."
metadata_section = soup.find("div", class_="metadata")
metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found."
return {{
"yaml_configuration": yaml_text,
"metadata": metadata_text
}}
except Exception as e:
return f"Error: {{str(e)}}"
if __name__ == "__main__":
model_url = "https://huggingface.co/{model_info['name']}"
result = scrape_model_page(model_url)
print(result)'''
)
print("######################################################################")
def _print_dict_as_yaml(data, indent_level=0):
"""
Recursively prints dict 'data' as pseudo-YAML to stdout.
(We do it manually because the user data can be nested.)
"""
indent = " " * indent_level
if isinstance(data, dict):
for k, v in data.items():
if isinstance(v, dict):
print(f"{indent}{k}:")
_print_dict_as_yaml(v, indent_level+1)
elif isinstance(v, list):
print(f"{indent}{k}:")
for item in v:
if isinstance(item, dict):
print(f"{indent}-")
_print_dict_as_yaml(item, indent_level+2)
else:
print(f"{indent}- {item}")
else:
print(f"{indent}{k}: {v}")
else:
print(f"{indent}{data}")
def run_parsing_script():
"""
Loops over all models in benchmark_data, calling print_benchmark_and_config_info()
to generate the entire "great results" text.
We capture the stdout prints, then return them as a single string.
"""
old_stdout = sys.stdout
captured_output = io.StringIO()
sys.stdout = captured_output
for model in benchmark_data:
print_benchmark_and_config_info(model)
sys.stdout = old_stdout
return captured_output.getvalue()
# ---------------------------------------------------------
# PART 3: GRADIO APP
# ---------------------------------------------------------
def parse_and_show_results():
"""
Gradio-compatible function that runs the parser
and returns the captured output text.
"""
return run_parsing_script()
with gr.Blocks() as demo:
gr.Markdown("# Full-Dataset Dynamic Benchmark Parsing")
gr.Markdown("Click the button below to parse all models (Rank 44 to 105) from the dataset:")
parse_btn = gr.Button("Parse Benchmarks")
results_box = gr.Textbox(label="Parsed Benchmark Results", lines=25)
parse_btn.click(fn=parse_and_show_results, outputs=results_box)
demo.launch()