Spaces:

CultriX
/

Tiny-LeaderBoard

Running

App Files Files Community

Tiny-LeaderBoard / scrape-leaderboard.py

CultriX

Update scrape-leaderboard.py

006d441 verified 6 months ago

raw

history blame

19.7 kB

	import requests
	from bs4 import BeautifulSoup

	# 1. A list of model benchmark data from your “DATA START”. Each entry contains:
	# - rank
	# - name
	# - scores (average, IFEval, BBH, MATH, GPQA, MUSR, MMLU-PRO)
	# - hf_url: the Hugging Face URL to scrape for a MergeKit config
	# - known_config: if we already know the configuration, store it here; otherwise None.
	benchmark_data = [
	{
	"rank": 44,
	"name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3",
	"scores": {
	"average": 40.10,
	"IFEval": 72.57,
	"BBH": 48.58,
	"MATH": 34.44,
	"GPQA": 17.34,
	"MUSR": 19.39,
	"MMLU-PRO": 48.26
	},
	"hf_url": "https://huggingface.co/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3", # placeholder
	# This one has a known config from your data
	"known_config": {
	"models": [
	{"model": "CultriX/SeQwence-14Bv1"},
	{"model": "allknowingroger/Qwenslerp5-14B"}
	],
	"merge_method": "slerp",
	"base_model": "CultriX/SeQwence-14Bv1",
	"dtype": "bfloat16",
	"parameters": {
	"t": [0, 0.5, 1, 0.5, 0]
	}
	}
	},
	{
	"rank": 45,
	"name": "sthenno-com/miscii-14b-1225",
	"scores": {
	"average": 40.08,
	"IFEval": 78.78,
	"BBH": 50.91,
	"MATH": 31.57,
	"GPQA": 17.00,
	"MUSR": 14.77,
	"MMLU-PRO": 47.46
	},
	"hf_url": "https://huggingface.co/sthenno-com/miscii-14b-1225",
	"known_config": None
	},
	{
	"rank": 46,
	"name": "djuna/Q2.5-Veltha-14B-0.5",
	"scores": {
	"average": 39.96,
	"IFEval": 77.96,
	"BBH": 50.32,
	"MATH": 33.84,
	"GPQA": 15.77,
	"MUSR": 14.17,
	"MMLU-PRO": 47.72
	},
	"hf_url": "https://huggingface.co/djuna/Q2.5-Veltha-14B-0.5",
	"known_config": None
	},
	{
	"rank": 48,
	"name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock",
	"scores": {
	"average": 39.81,
	"IFEval": 71.62,
	"BBH": 48.76,
	"MATH": 33.99,
	"GPQA": 17.34,
	"MUSR": 19.23,
	"MMLU-PRO": 47.95
	},
	"hf_url": "https://huggingface.co/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock",
	"known_config": None
	},
	{
	"rank": 50,
	"name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01",
	"scores": {
	"average": 39.46,
	"IFEval": 68.72,
	"BBH": 47.71,
	"MATH": 35.05,
	"GPQA": 18.23,
	"MUSR": 19.56,
	"MMLU-PRO": 47.50
	},
	"hf_url": "https://huggingface.co/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01",
	"known_config": None
	},
	{
	"rank": 52,
	"name": "arcee-ai/Virtuoso-Small",
	"scores": {
	"average": 39.43,
	"IFEval": 79.35,
	"BBH": 50.40,
	"MATH": 34.29,
	"GPQA": 11.52,
	"MUSR": 14.44,
	"MMLU-PRO": 46.57
	},
	"hf_url": "https://huggingface.co/arcee-ai/Virtuoso-Small",
	"known_config": None
	},
	{
	"rank": 54,
	"name": "sometimesanotion/Qwentinuum-14B-v6",
	"scores": {
	"average": 39.23,
	"IFEval": 63.04,
	"BBH": 50.23,
	"MATH": 33.84,
	"GPQA": 18.23,
	"MUSR": 21.18,
	"MMLU-PRO": 48.89
	},
	"hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v6",
	"known_config": None
	},
	{
	"rank": 55,
	"name": "djuna/Q2.5-Veltha-14B",
	"scores": {
	"average": 39.21,
	"IFEval": 82.92,
	"BBH": 49.75,
	"MATH": 28.02,
	"GPQA": 14.54,
	"MUSR": 12.26,
	"MMLU-PRO": 47.76
	},
	"hf_url": "https://huggingface.co/djuna/Q2.5-Veltha-14B",
	"known_config": None
	},
	{
	"rank": 57,
	"name": "allknowingroger/QwenSlerp6-14B",
	"scores": {
	"average": 39.02,
	"IFEval": 68.67,
	"BBH": 47.59,
	"MATH": 34.14,
	"GPQA": 16.44,
	"MUSR": 18.32,
	"MMLU-PRO": 48.95
	},
	"hf_url": "https://huggingface.co/allknowingroger/QwenSlerp6-14B",
	"known_config": None
	},
	{
	"rank": 58,
	"name": "allknowingroger/QwenSlerp5-14B",
	"scores": {
	"average": 38.94,
	"IFEval": 71.19,
	"BBH": 47.39,
	"MATH": 33.16,
	"GPQA": 15.32,
	"MUSR": 17.81,
	"MMLU-PRO": 48.78
	},
	"hf_url": "https://huggingface.co/allknowingroger/QwenSlerp5-14B",
	"known_config": None
	},
	{
	"rank": 59,
	"name": "sometimesanotion/Qwentinuum-14B-v5",
	"scores": {
	"average": 38.87,
	"IFEval": 62.86,
	"BBH": 50.28,
	"MATH": 31.57,
	"GPQA": 18.34,
	"MUSR": 21.09,
	"MMLU-PRO": 49.09
	},
	"hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v5",
	"known_config": None
	},
	{
	"rank": 60,
	"name": "sometimesanotion/Qwenvergence-14B-v6-Prose",
	"scores": {
	"average": 38.82,
	"IFEval": 59.90,
	"BBH": 50.12,
	"MATH": 34.89,
	"GPQA": 18.46,
	"MUSR": 21.02,
	"MMLU-PRO": 48.56
	},
	"hf_url": "https://huggingface.co/sometimesanotion/Qwenvergence-14B-v6-Prose",
	"known_config": None
	},
	{
	"rank": 61,
	"name": "CultriX/Qwen2.5-14B-Brocav3",
	"scores": {
	"average": 38.76,
	"IFEval": 69.52,
	"BBH": 49.05,
	"MATH": 32.25,
	"GPQA": 14.54,
	"MUSR": 19.25,
	"MMLU-PRO": 47.97
	},
	"hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Brocav3",
	"known_config": None
	},
	{
	"rank": 62,
	"name": "sometimesanotion/Qwentinuum-14B-v7",
	"scores": {
	"average": 38.76,
	"IFEval": 61.09,
	"BBH": 50.35,
	"MATH": 33.38,
	"GPQA": 18.79,
	"MUSR": 19.95,
	"MMLU-PRO": 49.00
	},
	"hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v7",
	"known_config": None
	},
	{
	"rank": 64,
	"name": "sometimesanotion/Qwentinuum-14B-v3",
	"scores": {
	"average": 38.74,
	"IFEval": 61.58,
	"BBH": 50.04,
	"MATH": 32.85,
	"GPQA": 18.34,
	"MUSR": 20.62,
	"MMLU-PRO": 49.03
	},
	"hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v3",
	"known_config": None
	},
	{
	"rank": 65,
	"name": "allura-org/TQ2.5-14B-Aletheia-v1",
	"scores": {
	"average": 38.74,
	"IFEval": 75.30,
	"BBH": 50.88,
	"MATH": 29.53,
	"GPQA": 14.99,
	"MUSR": 14.61,
	"MMLU-PRO": 47.12
	},
	"hf_url": "https://huggingface.co/allura-org/TQ2.5-14B-Aletheia-v1",
	"known_config": None
	},
	{
	"rank": 66,
	"name": "qingy2024/Fusion4-14B-Instruct",
	"scores": {
	"average": 38.73,
	"IFEval": 76.49,
	"BBH": 50.70,
	"MATH": 33.91,
	"GPQA": 10.74,
	"MUSR": 13.97,
	"MMLU-PRO": 46.60
	},
	"hf_url": "https://huggingface.co/qingy2024/Fusion4-14B-Instruct",
	"known_config": None
	},
	{
	"rank": 68,
	"name": "CultriX/Qwen2.5-14B-Brocav7",
	"scores": {
	"average": 38.52,
	"IFEval": 67.24,
	"BBH": 48.91,
	"MATH": 31.87,
	"GPQA": 15.66,
	"MUSR": 20.15,
	"MMLU-PRO": 47.31
	},
	"hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Brocav7",
	"known_config": None
	},
	{
	"rank": 71,
	"name": "sometimesanotion/Qwentinuum-14B-v6-Prose",
	"scores": {
	"average": 38.46,
	"IFEval": 56.43,
	"BBH": 50.14,
	"MATH": 35.57,
	"GPQA": 18.46,
	"MUSR": 21.34,
	"MMLU-PRO": 48.80
	},
	"hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v6-Prose",
	"known_config": None
	},
	{
	"rank": 76,
	"name": "CultriX/Qwen2.5-14B-Brocav6",
	"scores": {
	"average": 38.32,
	"IFEval": 69.95,
	"BBH": 47.82,
	"MATH": 29.61,
	"GPQA": 15.66,
	"MUSR": 18.88,
	"MMLU-PRO": 47.99
	},
	"hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Brocav6",
	"known_config": None
	},
	{
	"rank": 80,
	"name": "CultriX/SeQwence-14Bv1",
	"scores": {
	"average": 38.20,
	"IFEval": 66.78,
	"BBH": 47.19,
	"MATH": 33.53,
	"GPQA": 14.88,
	"MUSR": 18.80,
	"MMLU-PRO": 48.00
	},
	"hf_url": "https://huggingface.co/CultriX/SeQwence-14Bv1",
	"known_config": None
	},
	{
	"rank": 85,
	"name": "sometimesanotion/Qwentinuum-14B-v013",
	"scores": {
	"average": 37.96,
	"IFEval": 67.11,
	"BBH": 43.97,
	"MATH": 33.01,
	"GPQA": 14.32,
	"MUSR": 24.99,
	"MMLU-PRO": 44.34
	},
	"hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v013",
	"known_config": None
	},
	{
	"rank": 86,
	"name": "CultriX/Qwen2.5-14B-Wernickev3",
	"scores": {
	"average": 37.94,
	"IFEval": 70.48,
	"BBH": 44.58,
	"MATH": 32.78,
	"GPQA": 14.99,
	"MUSR": 18.69,
	"MMLU-PRO": 46.13
	},
	"hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev3",
	"known_config": None
	},
	{
	"rank": 88,
	"name": "allknowingroger/QwenSlerp4-14B",
	"scores": {
	"average": 37.80,
	"IFEval": 63.28,
	"BBH": 49.38,
	"MATH": 30.97,
	"GPQA": 16.33,
	"MUSR": 17.59,
	"MMLU-PRO": 49.28
	},
	"hf_url": "https://huggingface.co/allknowingroger/QwenSlerp4-14B",
	"known_config": None
	},
	{
	"rank": 89,
	"name": "CultriX/Qwen2.5-14B-Broca",
	"scores": {
	"average": 37.72,
	"IFEval": 56.04,
	"BBH": 50.03,
	"MATH": 34.59,
	"GPQA": 18.23,
	"MUSR": 18.95,
	"MMLU-PRO": 48.49
	},
	"hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Broca",
	"known_config": None
	},
	{
	"rank": 90,
	"name": "CultriX/Qwen2.5-14B-Emerged",
	"scores": {
	"average": 37.66,
	"IFEval": 70.00,
	"BBH": 45.93,
	"MATH": 30.74,
	"GPQA": 14.32,
	"MUSR": 18.47,
	"MMLU-PRO": 46.51
	},
	"hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Emerged",
	"known_config": None
	},
	{
	"rank": 91,
	"name": "sometimesanotion/Qwentinuum-14B-v8",
	"scores": {
	"average": 37.65,
	"IFEval": 54.12,
	"BBH": 50.11,
	"MATH": 34.14,
	"GPQA": 17.79,
	"MUSR": 20.75,
	"MMLU-PRO": 49.02
	},
	"hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v8",
	"known_config": None
	},
	{
	"rank": 92,
	"name": "qingy2024/Fusion-14B-Instruct",
	"scores": {
	"average": 37.64,
	"IFEval": 72.60,
	"BBH": 48.58,
	"MATH": 30.97,
	"GPQA": 13.98,
	"MUSR": 14.81,
	"MMLU-PRO": 44.93
	},
	"hf_url": "https://huggingface.co/qingy2024/Fusion-14B-Instruct",
	"known_config": None
	},
	{
	"rank": 94,
	"name": "CultriX/Qwestion-14B",
	"scores": {
	"average": 37.63,
	"IFEval": 63.18,
	"BBH": 48.76,
	"MATH": 31.72,
	"GPQA": 15.77,
	"MUSR": 17.22,
	"MMLU-PRO": 49.14
	},
	"hf_url": "https://huggingface.co/CultriX/Qwestion-14B",
	"known_config": None
	},
	{
	"rank": 99,
	"name": "sometimesanotion/Qwenvergence-14B-v3-Prose",
	"scores": {
	"average": 37.37,
	"IFEval": 49.18,
	"BBH": 49.80,
	"MATH": 35.57,
	"GPQA": 19.35,
	"MUSR": 21.77,
	"MMLU-PRO": 48.55
	},
	"hf_url": "https://huggingface.co/sometimesanotion/Qwenvergence-14B-v3-Prose",
	"known_config": None
	},
	{
	"rank": 102,
	"name": "CultriX/SeQwence-14B-v5",
	"scores": {
	"average": 37.27,
	"IFEval": 59.20,
	"BBH": 50.00,
	"MATH": 31.04,
	"GPQA": 16.00,
	"MUSR": 18.33,
	"MMLU-PRO": 49.05
	},
	"hf_url": "https://huggingface.co/CultriX/SeQwence-14B-v5",
	"known_config": None
	},
	{
	"rank": 103,
	"name": "sometimesanotion/Qwen-14B-ProseStock-v4",
	"scores": {
	"average": 37.23,
	"IFEval": 49.42,
	"BBH": 49.54,
	"MATH": 35.50,
	"GPQA": 18.46,
	"MUSR": 21.70,
	"MMLU-PRO": 48.74
	},
	"hf_url": "https://huggingface.co/sometimesanotion/Qwen-14B-ProseStock-v4",
	"known_config": None
	},
	{
	"rank": 104,
	"name": "sometimesanotion/IF-reasoning-experiment-40",
	"scores": {
	"average": 37.21,
	"IFEval": 63.30,
	"BBH": 44.31,
	"MATH": 27.72,
	"GPQA": 17.34,
	"MUSR": 25.86,
	"MMLU-PRO": 44.72
	},
	"hf_url": "https://huggingface.co/sometimesanotion/IF-reasoning-experiment-40",
	"known_config": None
	},
	{
	"rank": 105,
	"name": "CultriX/SeQwence-14B-EvolMerge",
	"scores": {
	"average": 37.20,
	"IFEval": 53.82,
	"BBH": 50.78,
	"MATH": 31.80,
	"GPQA": 17.45,
	"MUSR": 20.26,
	"MMLU-PRO": 49.10
	},
	"hf_url": "https://huggingface.co/CultriX/SeQwence-14B-EvolMerge",
	"known_config": None
	}
	]


	def scrape_model_page(model_url):
	"""
	Scrapes the Hugging Face model page for potential MergeKit YAML configuration
	or any other YAML blocks. Returns a dict with "yaml_configuration" and "metadata"
	if successful, or an error string if something goes wrong.
	"""
	try:
	response = requests.get(model_url)
	if response.status_code != 200:
	return f"Error: Unable to fetch the page (Status Code: {response.status_code})"

	soup = BeautifulSoup(response.text, "html.parser")

	# Attempt to locate a <pre> block that might contain YAML
	yaml_config = soup.find("pre")
	yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found."

	# Attempt to locate metadata (modify as needed based on how the page is structured)
	metadata_section = soup.find("div", class_="metadata")
	metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found."

	return {
	"yaml_configuration": yaml_text,
	"metadata": metadata_text
	}

	except Exception as e:
	return f"Error: {str(e)}"


	def print_benchmark_and_config_info(model_info):
	"""
	Prints an overview of the benchmark scores for the given model,
	checks for known or scraped MergeKit config, and prints accordingly.
	"""
	# Print the benchmark summary
	print(f"---\nModel Rank: {model_info['rank']}")
	print(f"Model Name: {model_info['name']}")
	print(f"Model average score across benchmarks in %: {model_info['scores']['average']}")
	print(f"Models average score on IFEval benchmarks in %: {model_info['scores']['IFEval']}")
	print(f"Models average score on BBH benchmarks in %: {model_info['scores']['BBH']}")
	print(f"Models average score on MATH benchmarks in %: {model_info['scores']['MATH']}")
	print(f"Models average score in GPQA benchmarks in %: {model_info['scores']['GPQA']}")
	print(f"Models average score in MUSR benchmarks in %: {model_info['scores']['MUSR']}")
	print(f"Models average score in MMLU-PRO benchmarks in %: {model_info['scores']['MMLU-PRO']}")

	# If we have a known config for this model, just print it.
	if model_info["known_config"] is not None:
	print("###")
	print("models:")
	for m in model_info["known_config"]["models"]:
	print(f" - model: {m['model']}")
	print(f"merge_method: {model_info['known_config']['merge_method']}")
	print(f"base_model: {model_info['known_config']['base_model']}")
	print(f"dtype: {model_info['known_config']['dtype']}")
	print("parameters:")
	print(f" t: {model_info['known_config']['parameters']['t']} # V shaped curve: Hermes for input & output, WizardMath in the middle layers")
	print("###")
	return

	# Otherwise, attempt to scrape the model page
	scrape_result = scrape_model_page(model_info["hf_url"])
	# If we got an error or can't find YAML, we show the scraping code
	if isinstance(scrape_result, str) or ("No YAML configuration found." in scrape_result["yaml_configuration"]):
	print("(No MergeKit configuration found.)\n")
	print("You can try the following Python script to scrape the model page:\n")
	print("#" * 70)
	print(
	f'''import requests
	from bs4 import BeautifulSoup

	def scrape_model_page(model_url):
	try:
	response = requests.get(model_url)
	if response.status_code != 200:
	return f"Error: Unable to fetch the page (Status Code: {{response.status_code}})"

	soup = BeautifulSoup(response.text, "html.parser")

	yaml_config = soup.find("pre")
	yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found."

	metadata_section = soup.find("div", class_="metadata")
	metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found."

	return {{
	"yaml_configuration": yaml_text,
	"metadata": metadata_text
	}}

	except Exception as e:
	return f"Error: {{str(e)}}"

	if __name__ == "__main__":
	model_url = "{model_info['hf_url']}"
	result = scrape_model_page(model_url)
	print(result)'''
	)
	print("#" * 70)
	else:
	# If we found a config, print it between triple-hash signs
	print("###")
	print(scrape_result["yaml_configuration"])
	print("###")


	if __name__ == "__main__":
	# 2. Loop through all models, printing benchmark data and MergeKit config info
	for model in benchmark_data:
	print_benchmark_and_config_info(model)