Spaces:
Running
Running
import requests | |
from bs4 import BeautifulSoup | |
# 1. A list of model benchmark data from your “DATA START”. Each entry contains: | |
# - rank | |
# - name | |
# - scores (average, IFEval, BBH, MATH, GPQA, MUSR, MMLU-PRO) | |
# - hf_url: the Hugging Face URL to scrape for a MergeKit config | |
# - known_config: if we already know the configuration, store it here; otherwise None. | |
benchmark_data = [ | |
{ | |
"rank": 44, | |
"name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3", | |
"scores": { | |
"average": 40.10, | |
"IFEval": 72.57, | |
"BBH": 48.58, | |
"MATH": 34.44, | |
"GPQA": 17.34, | |
"MUSR": 19.39, | |
"MMLU-PRO": 48.26 | |
}, | |
"hf_url": "https://huggingface.co/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3", # placeholder | |
# This one has a known config from your data | |
"known_config": { | |
"models": [ | |
{"model": "CultriX/SeQwence-14Bv1"}, | |
{"model": "allknowingroger/Qwenslerp5-14B"} | |
], | |
"merge_method": "slerp", | |
"base_model": "CultriX/SeQwence-14Bv1", | |
"dtype": "bfloat16", | |
"parameters": { | |
"t": [0, 0.5, 1, 0.5, 0] | |
} | |
} | |
}, | |
{ | |
"rank": 45, | |
"name": "sthenno-com/miscii-14b-1225", | |
"scores": { | |
"average": 40.08, | |
"IFEval": 78.78, | |
"BBH": 50.91, | |
"MATH": 31.57, | |
"GPQA": 17.00, | |
"MUSR": 14.77, | |
"MMLU-PRO": 47.46 | |
}, | |
"hf_url": "https://huggingface.co/sthenno-com/miscii-14b-1225", | |
"known_config": None | |
}, | |
{ | |
"rank": 46, | |
"name": "djuna/Q2.5-Veltha-14B-0.5", | |
"scores": { | |
"average": 39.96, | |
"IFEval": 77.96, | |
"BBH": 50.32, | |
"MATH": 33.84, | |
"GPQA": 15.77, | |
"MUSR": 14.17, | |
"MMLU-PRO": 47.72 | |
}, | |
"hf_url": "https://huggingface.co/djuna/Q2.5-Veltha-14B-0.5", | |
"known_config": None | |
}, | |
{ | |
"rank": 48, | |
"name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock", | |
"scores": { | |
"average": 39.81, | |
"IFEval": 71.62, | |
"BBH": 48.76, | |
"MATH": 33.99, | |
"GPQA": 17.34, | |
"MUSR": 19.23, | |
"MMLU-PRO": 47.95 | |
}, | |
"hf_url": "https://huggingface.co/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock", | |
"known_config": None | |
}, | |
{ | |
"rank": 50, | |
"name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01", | |
"scores": { | |
"average": 39.46, | |
"IFEval": 68.72, | |
"BBH": 47.71, | |
"MATH": 35.05, | |
"GPQA": 18.23, | |
"MUSR": 19.56, | |
"MMLU-PRO": 47.50 | |
}, | |
"hf_url": "https://huggingface.co/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01", | |
"known_config": None | |
}, | |
{ | |
"rank": 52, | |
"name": "arcee-ai/Virtuoso-Small", | |
"scores": { | |
"average": 39.43, | |
"IFEval": 79.35, | |
"BBH": 50.40, | |
"MATH": 34.29, | |
"GPQA": 11.52, | |
"MUSR": 14.44, | |
"MMLU-PRO": 46.57 | |
}, | |
"hf_url": "https://huggingface.co/arcee-ai/Virtuoso-Small", | |
"known_config": None | |
}, | |
{ | |
"rank": 54, | |
"name": "sometimesanotion/Qwentinuum-14B-v6", | |
"scores": { | |
"average": 39.23, | |
"IFEval": 63.04, | |
"BBH": 50.23, | |
"MATH": 33.84, | |
"GPQA": 18.23, | |
"MUSR": 21.18, | |
"MMLU-PRO": 48.89 | |
}, | |
"hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v6", | |
"known_config": None | |
}, | |
{ | |
"rank": 55, | |
"name": "djuna/Q2.5-Veltha-14B", | |
"scores": { | |
"average": 39.21, | |
"IFEval": 82.92, | |
"BBH": 49.75, | |
"MATH": 28.02, | |
"GPQA": 14.54, | |
"MUSR": 12.26, | |
"MMLU-PRO": 47.76 | |
}, | |
"hf_url": "https://huggingface.co/djuna/Q2.5-Veltha-14B", | |
"known_config": None | |
}, | |
{ | |
"rank": 57, | |
"name": "allknowingroger/QwenSlerp6-14B", | |
"scores": { | |
"average": 39.02, | |
"IFEval": 68.67, | |
"BBH": 47.59, | |
"MATH": 34.14, | |
"GPQA": 16.44, | |
"MUSR": 18.32, | |
"MMLU-PRO": 48.95 | |
}, | |
"hf_url": "https://huggingface.co/allknowingroger/QwenSlerp6-14B", | |
"known_config": None | |
}, | |
{ | |
"rank": 58, | |
"name": "allknowingroger/QwenSlerp5-14B", | |
"scores": { | |
"average": 38.94, | |
"IFEval": 71.19, | |
"BBH": 47.39, | |
"MATH": 33.16, | |
"GPQA": 15.32, | |
"MUSR": 17.81, | |
"MMLU-PRO": 48.78 | |
}, | |
"hf_url": "https://huggingface.co/allknowingroger/QwenSlerp5-14B", | |
"known_config": None | |
}, | |
{ | |
"rank": 59, | |
"name": "sometimesanotion/Qwentinuum-14B-v5", | |
"scores": { | |
"average": 38.87, | |
"IFEval": 62.86, | |
"BBH": 50.28, | |
"MATH": 31.57, | |
"GPQA": 18.34, | |
"MUSR": 21.09, | |
"MMLU-PRO": 49.09 | |
}, | |
"hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v5", | |
"known_config": None | |
}, | |
{ | |
"rank": 60, | |
"name": "sometimesanotion/Qwenvergence-14B-v6-Prose", | |
"scores": { | |
"average": 38.82, | |
"IFEval": 59.90, | |
"BBH": 50.12, | |
"MATH": 34.89, | |
"GPQA": 18.46, | |
"MUSR": 21.02, | |
"MMLU-PRO": 48.56 | |
}, | |
"hf_url": "https://huggingface.co/sometimesanotion/Qwenvergence-14B-v6-Prose", | |
"known_config": None | |
}, | |
{ | |
"rank": 61, | |
"name": "CultriX/Qwen2.5-14B-Brocav3", | |
"scores": { | |
"average": 38.76, | |
"IFEval": 69.52, | |
"BBH": 49.05, | |
"MATH": 32.25, | |
"GPQA": 14.54, | |
"MUSR": 19.25, | |
"MMLU-PRO": 47.97 | |
}, | |
"hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Brocav3", | |
"known_config": None | |
}, | |
{ | |
"rank": 62, | |
"name": "sometimesanotion/Qwentinuum-14B-v7", | |
"scores": { | |
"average": 38.76, | |
"IFEval": 61.09, | |
"BBH": 50.35, | |
"MATH": 33.38, | |
"GPQA": 18.79, | |
"MUSR": 19.95, | |
"MMLU-PRO": 49.00 | |
}, | |
"hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v7", | |
"known_config": None | |
}, | |
{ | |
"rank": 64, | |
"name": "sometimesanotion/Qwentinuum-14B-v3", | |
"scores": { | |
"average": 38.74, | |
"IFEval": 61.58, | |
"BBH": 50.04, | |
"MATH": 32.85, | |
"GPQA": 18.34, | |
"MUSR": 20.62, | |
"MMLU-PRO": 49.03 | |
}, | |
"hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v3", | |
"known_config": None | |
}, | |
{ | |
"rank": 65, | |
"name": "allura-org/TQ2.5-14B-Aletheia-v1", | |
"scores": { | |
"average": 38.74, | |
"IFEval": 75.30, | |
"BBH": 50.88, | |
"MATH": 29.53, | |
"GPQA": 14.99, | |
"MUSR": 14.61, | |
"MMLU-PRO": 47.12 | |
}, | |
"hf_url": "https://huggingface.co/allura-org/TQ2.5-14B-Aletheia-v1", | |
"known_config": None | |
}, | |
{ | |
"rank": 66, | |
"name": "qingy2024/Fusion4-14B-Instruct", | |
"scores": { | |
"average": 38.73, | |
"IFEval": 76.49, | |
"BBH": 50.70, | |
"MATH": 33.91, | |
"GPQA": 10.74, | |
"MUSR": 13.97, | |
"MMLU-PRO": 46.60 | |
}, | |
"hf_url": "https://huggingface.co/qingy2024/Fusion4-14B-Instruct", | |
"known_config": None | |
}, | |
{ | |
"rank": 68, | |
"name": "CultriX/Qwen2.5-14B-Brocav7", | |
"scores": { | |
"average": 38.52, | |
"IFEval": 67.24, | |
"BBH": 48.91, | |
"MATH": 31.87, | |
"GPQA": 15.66, | |
"MUSR": 20.15, | |
"MMLU-PRO": 47.31 | |
}, | |
"hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Brocav7", | |
"known_config": None | |
}, | |
{ | |
"rank": 71, | |
"name": "sometimesanotion/Qwentinuum-14B-v6-Prose", | |
"scores": { | |
"average": 38.46, | |
"IFEval": 56.43, | |
"BBH": 50.14, | |
"MATH": 35.57, | |
"GPQA": 18.46, | |
"MUSR": 21.34, | |
"MMLU-PRO": 48.80 | |
}, | |
"hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v6-Prose", | |
"known_config": None | |
}, | |
{ | |
"rank": 76, | |
"name": "CultriX/Qwen2.5-14B-Brocav6", | |
"scores": { | |
"average": 38.32, | |
"IFEval": 69.95, | |
"BBH": 47.82, | |
"MATH": 29.61, | |
"GPQA": 15.66, | |
"MUSR": 18.88, | |
"MMLU-PRO": 47.99 | |
}, | |
"hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Brocav6", | |
"known_config": None | |
}, | |
{ | |
"rank": 80, | |
"name": "CultriX/SeQwence-14Bv1", | |
"scores": { | |
"average": 38.20, | |
"IFEval": 66.78, | |
"BBH": 47.19, | |
"MATH": 33.53, | |
"GPQA": 14.88, | |
"MUSR": 18.80, | |
"MMLU-PRO": 48.00 | |
}, | |
"hf_url": "https://huggingface.co/CultriX/SeQwence-14Bv1", | |
"known_config": None | |
}, | |
{ | |
"rank": 85, | |
"name": "sometimesanotion/Qwentinuum-14B-v013", | |
"scores": { | |
"average": 37.96, | |
"IFEval": 67.11, | |
"BBH": 43.97, | |
"MATH": 33.01, | |
"GPQA": 14.32, | |
"MUSR": 24.99, | |
"MMLU-PRO": 44.34 | |
}, | |
"hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v013", | |
"known_config": None | |
}, | |
{ | |
"rank": 86, | |
"name": "CultriX/Qwen2.5-14B-Wernickev3", | |
"scores": { | |
"average": 37.94, | |
"IFEval": 70.48, | |
"BBH": 44.58, | |
"MATH": 32.78, | |
"GPQA": 14.99, | |
"MUSR": 18.69, | |
"MMLU-PRO": 46.13 | |
}, | |
"hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev3", | |
"known_config": None | |
}, | |
{ | |
"rank": 88, | |
"name": "allknowingroger/QwenSlerp4-14B", | |
"scores": { | |
"average": 37.80, | |
"IFEval": 63.28, | |
"BBH": 49.38, | |
"MATH": 30.97, | |
"GPQA": 16.33, | |
"MUSR": 17.59, | |
"MMLU-PRO": 49.28 | |
}, | |
"hf_url": "https://huggingface.co/allknowingroger/QwenSlerp4-14B", | |
"known_config": None | |
}, | |
{ | |
"rank": 89, | |
"name": "CultriX/Qwen2.5-14B-Broca", | |
"scores": { | |
"average": 37.72, | |
"IFEval": 56.04, | |
"BBH": 50.03, | |
"MATH": 34.59, | |
"GPQA": 18.23, | |
"MUSR": 18.95, | |
"MMLU-PRO": 48.49 | |
}, | |
"hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Broca", | |
"known_config": None | |
}, | |
{ | |
"rank": 90, | |
"name": "CultriX/Qwen2.5-14B-Emerged", | |
"scores": { | |
"average": 37.66, | |
"IFEval": 70.00, | |
"BBH": 45.93, | |
"MATH": 30.74, | |
"GPQA": 14.32, | |
"MUSR": 18.47, | |
"MMLU-PRO": 46.51 | |
}, | |
"hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Emerged", | |
"known_config": None | |
}, | |
{ | |
"rank": 91, | |
"name": "sometimesanotion/Qwentinuum-14B-v8", | |
"scores": { | |
"average": 37.65, | |
"IFEval": 54.12, | |
"BBH": 50.11, | |
"MATH": 34.14, | |
"GPQA": 17.79, | |
"MUSR": 20.75, | |
"MMLU-PRO": 49.02 | |
}, | |
"hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v8", | |
"known_config": None | |
}, | |
{ | |
"rank": 92, | |
"name": "qingy2024/Fusion-14B-Instruct", | |
"scores": { | |
"average": 37.64, | |
"IFEval": 72.60, | |
"BBH": 48.58, | |
"MATH": 30.97, | |
"GPQA": 13.98, | |
"MUSR": 14.81, | |
"MMLU-PRO": 44.93 | |
}, | |
"hf_url": "https://huggingface.co/qingy2024/Fusion-14B-Instruct", | |
"known_config": None | |
}, | |
{ | |
"rank": 94, | |
"name": "CultriX/Qwestion-14B", | |
"scores": { | |
"average": 37.63, | |
"IFEval": 63.18, | |
"BBH": 48.76, | |
"MATH": 31.72, | |
"GPQA": 15.77, | |
"MUSR": 17.22, | |
"MMLU-PRO": 49.14 | |
}, | |
"hf_url": "https://huggingface.co/CultriX/Qwestion-14B", | |
"known_config": None | |
}, | |
{ | |
"rank": 99, | |
"name": "sometimesanotion/Qwenvergence-14B-v3-Prose", | |
"scores": { | |
"average": 37.37, | |
"IFEval": 49.18, | |
"BBH": 49.80, | |
"MATH": 35.57, | |
"GPQA": 19.35, | |
"MUSR": 21.77, | |
"MMLU-PRO": 48.55 | |
}, | |
"hf_url": "https://huggingface.co/sometimesanotion/Qwenvergence-14B-v3-Prose", | |
"known_config": None | |
}, | |
{ | |
"rank": 102, | |
"name": "CultriX/SeQwence-14B-v5", | |
"scores": { | |
"average": 37.27, | |
"IFEval": 59.20, | |
"BBH": 50.00, | |
"MATH": 31.04, | |
"GPQA": 16.00, | |
"MUSR": 18.33, | |
"MMLU-PRO": 49.05 | |
}, | |
"hf_url": "https://huggingface.co/CultriX/SeQwence-14B-v5", | |
"known_config": None | |
}, | |
{ | |
"rank": 103, | |
"name": "sometimesanotion/Qwen-14B-ProseStock-v4", | |
"scores": { | |
"average": 37.23, | |
"IFEval": 49.42, | |
"BBH": 49.54, | |
"MATH": 35.50, | |
"GPQA": 18.46, | |
"MUSR": 21.70, | |
"MMLU-PRO": 48.74 | |
}, | |
"hf_url": "https://huggingface.co/sometimesanotion/Qwen-14B-ProseStock-v4", | |
"known_config": None | |
}, | |
{ | |
"rank": 104, | |
"name": "sometimesanotion/IF-reasoning-experiment-40", | |
"scores": { | |
"average": 37.21, | |
"IFEval": 63.30, | |
"BBH": 44.31, | |
"MATH": 27.72, | |
"GPQA": 17.34, | |
"MUSR": 25.86, | |
"MMLU-PRO": 44.72 | |
}, | |
"hf_url": "https://huggingface.co/sometimesanotion/IF-reasoning-experiment-40", | |
"known_config": None | |
}, | |
{ | |
"rank": 105, | |
"name": "CultriX/SeQwence-14B-EvolMerge", | |
"scores": { | |
"average": 37.20, | |
"IFEval": 53.82, | |
"BBH": 50.78, | |
"MATH": 31.80, | |
"GPQA": 17.45, | |
"MUSR": 20.26, | |
"MMLU-PRO": 49.10 | |
}, | |
"hf_url": "https://huggingface.co/CultriX/SeQwence-14B-EvolMerge", | |
"known_config": None | |
} | |
] | |
def scrape_model_page(model_url): | |
""" | |
Scrapes the Hugging Face model page for potential MergeKit YAML configuration | |
or any other YAML blocks. Returns a dict with "yaml_configuration" and "metadata" | |
if successful, or an error string if something goes wrong. | |
""" | |
try: | |
response = requests.get(model_url) | |
if response.status_code != 200: | |
return f"Error: Unable to fetch the page (Status Code: {response.status_code})" | |
soup = BeautifulSoup(response.text, "html.parser") | |
# Attempt to locate a <pre> block that might contain YAML | |
yaml_config = soup.find("pre") | |
yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found." | |
# Attempt to locate metadata (modify as needed based on how the page is structured) | |
metadata_section = soup.find("div", class_="metadata") | |
metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found." | |
return { | |
"yaml_configuration": yaml_text, | |
"metadata": metadata_text | |
} | |
except Exception as e: | |
return f"Error: {str(e)}" | |
def print_benchmark_and_config_info(model_info): | |
""" | |
Prints an overview of the benchmark scores for the given model, | |
checks for known or scraped MergeKit config, and prints accordingly. | |
""" | |
# Print the benchmark summary | |
print(f"---\nModel Rank: {model_info['rank']}") | |
print(f"Model Name: {model_info['name']}") | |
print(f"Model average score across benchmarks in %: {model_info['scores']['average']}") | |
print(f"Models average score on IFEval benchmarks in %: {model_info['scores']['IFEval']}") | |
print(f"Models average score on BBH benchmarks in %: {model_info['scores']['BBH']}") | |
print(f"Models average score on MATH benchmarks in %: {model_info['scores']['MATH']}") | |
print(f"Models average score in GPQA benchmarks in %: {model_info['scores']['GPQA']}") | |
print(f"Models average score in MUSR benchmarks in %: {model_info['scores']['MUSR']}") | |
print(f"Models average score in MMLU-PRO benchmarks in %: {model_info['scores']['MMLU-PRO']}") | |
# If we have a known config for this model, just print it. | |
if model_info["known_config"] is not None: | |
print("###") | |
print("models:") | |
for m in model_info["known_config"]["models"]: | |
print(f" - model: {m['model']}") | |
print(f"merge_method: {model_info['known_config']['merge_method']}") | |
print(f"base_model: {model_info['known_config']['base_model']}") | |
print(f"dtype: {model_info['known_config']['dtype']}") | |
print("parameters:") | |
print(f" t: {model_info['known_config']['parameters']['t']} # V shaped curve: Hermes for input & output, WizardMath in the middle layers") | |
print("###") | |
return | |
# Otherwise, attempt to scrape the model page | |
scrape_result = scrape_model_page(model_info["hf_url"]) | |
# If we got an error or can't find YAML, we show the scraping code | |
if isinstance(scrape_result, str) or ("No YAML configuration found." in scrape_result["yaml_configuration"]): | |
print("(No MergeKit configuration found.)\n") | |
print("You can try the following Python script to scrape the model page:\n") | |
print("#" * 70) | |
print( | |
f'''import requests | |
from bs4 import BeautifulSoup | |
def scrape_model_page(model_url): | |
try: | |
response = requests.get(model_url) | |
if response.status_code != 200: | |
return f"Error: Unable to fetch the page (Status Code: {{response.status_code}})" | |
soup = BeautifulSoup(response.text, "html.parser") | |
yaml_config = soup.find("pre") | |
yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found." | |
metadata_section = soup.find("div", class_="metadata") | |
metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found." | |
return {{ | |
"yaml_configuration": yaml_text, | |
"metadata": metadata_text | |
}} | |
except Exception as e: | |
return f"Error: {{str(e)}}" | |
if __name__ == "__main__": | |
model_url = "{model_info['hf_url']}" | |
result = scrape_model_page(model_url) | |
print(result)''' | |
) | |
print("#" * 70) | |
else: | |
# If we found a config, print it between triple-hash signs | |
print("###") | |
print(scrape_result["yaml_configuration"]) | |
print("###") | |
if __name__ == "__main__": | |
# 2. Loop through all models, printing benchmark data and MergeKit config info | |
for model in benchmark_data: | |
print_benchmark_and_config_info(model) |