Spaces:
Running
Running
File size: 7,226 Bytes
c56b33e 17aa8f3 982fdda bdbadad 90cb3d2 07b8fd8 b2fd468 bb7c504 c56b33e 90cb3d2 85e629e 90cb3d2 07b8fd8 90cb3d2 c56b33e 4bcc990 07b8fd8 4bcc990 c56b33e 4bcc990 c56b33e 4bcc990 07b8fd8 4bcc990 c56b33e 4bcc990 c56b33e 4bcc990 07b8fd8 4bcc990 c56b33e b2fd468 c56b33e 07b8fd8 c56b33e 90cb3d2 c56b33e bdbadad 07b8fd8 bdbadad 982fdda c56b33e b2fd468 c56b33e 90cb3d2 c56b33e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
## Comprehensive Model Performance Analysis
### Importing Required Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gradio as gr
import requests
from bs4 import BeautifulSoup
import io
import os
import base64
import zipfile
from PIL import Image
from io import BytesIO
### Input Data
# Data with links to Hugging Face repositories
data_full = [
['CultriX/Qwen2.5-14B-SLERPv7', 'https://huggingface.co/CultriX/Qwen2.5-14B-SLERPv7', 0.7205, 0.8272, 0.7541, 0.6581, 0.5, 0.729],
['djuna/Q2.5-Veltha-14B-0.5', 'https://huggingface.co/djuna/Q2.5-Veltha-14B-0.5', 0.7492, 0.8386, 0.7305, 0.598, 0.43, 0.7817],
['CultriX/Qwen2.5-14B-FinalMerge', 'https://huggingface.co/CultriX/Qwen2.5-14B-FinalMerge', 0.7248, 0.8277, 0.7113, 0.7052, 0.57, 0.7001],
['CultriX/Qwen2.5-14B-MultiCultyv2', 'https://huggingface.co/CultriX/Qwen2.5-14B-MultiCultyv2', 0.7295, 0.8359, 0.7363, 0.5767, 0.44, 0.7316],
['CultriX/Qwen2.5-14B-Brocav7', 'https://huggingface.co/CultriX/Qwen2.5-14B-Brocav7', 0.7445, 0.8353, 0.7508, 0.6292, 0.46, 0.7629],
['CultriX/Qwen2.5-14B-Broca', 'https://huggingface.co/CultriX/Qwen2.5-14B-Broca', 0.7456, 0.8352, 0.748, 0.6034, 0.44, 0.7716],
['CultriX/Qwen2.5-14B-Brocav3', 'https://huggingface.co/CultriX/Qwen2.5-14B-Brocav3', 0.7395, 0.8388, 0.7393, 0.6405, 0.47, 0.7659],
['CultriX/Qwen2.5-14B-Brocav4', 'https://huggingface.co/CultriX/Qwen2.5-14B-Brocav4', 0.7432, 0.8377, 0.7444, 0.6277, 0.48, 0.758],
['CultriX/Qwen2.5-14B-Brocav2', 'https://huggingface.co/CultriX/Qwen2.5-14B-Brocav2', 0.7492, 0.8302, 0.7508, 0.6377, 0.51, 0.7478],
['CultriX/Qwen2.5-14B-Brocav5', 'https://huggingface.co/CultriX/Qwen2.5-14B-Brocav5', 0.7445, 0.8313, 0.7547, 0.6376, 0.5, 0.7304],
['CultriX/Qwen2.5-14B-Brocav6', 'https://huggingface.co/CultriX/Qwen2.5-14B-Brocav6', 0.7179, 0.8354, 0.7531, 0.6378, 0.49, 0.7524],
['CultriX/Qwenfinity-2.5-14B', 'https://huggingface.co/CultriX/Qwenfinity-2.5-14B', 0.7347, 0.8254, 0.7279, 0.7267, 0.56, 0.697],
['CultriX/Qwen2.5-14B-Emergedv2', 'https://huggingface.co/CultriX/Qwen2.5-14B-Emergedv2', 0.7137, 0.8335, 0.7363, 0.5836, 0.44, 0.7344],
['CultriX/Qwen2.5-14B-Unity', 'https://huggingface.co/CultriX/Qwen2.5-14B-Unity', 0.7063, 0.8343, 0.7423, 0.682, 0.57, 0.7498],
['CultriX/Qwen2.5-14B-MultiCultyv3', 'https://huggingface.co/CultriX/Qwen2.5-14B-MultiCultyv3', 0.7132, 0.8216, 0.7395, 0.6792, 0.55, 0.712],
['CultriX/Qwen2.5-14B-Emergedv3', 'https://huggingface.co/CultriX/Qwen2.5-14B-Emergedv3', 0.7436, 0.8312, 0.7519, 0.6585, 0.55, 0.7068],
['CultriX/SeQwence-14Bv1', 'https://huggingface.co/CultriX/SeQwence-14Bv1', 0.7278, 0.841, 0.7541, 0.6816, 0.52, 0.7539],
['CultriX/Qwen2.5-14B-Wernickev2', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev2', 0.7391, 0.8168, 0.7273, 0.622, 0.45, 0.7572],
['CultriX/Qwen2.5-14B-Wernickev3', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev3', 0.7357, 0.8148, 0.7245, 0.7023, 0.55, 0.7869],
['CultriX/Qwen2.5-14B-Wernickev4', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev4', 0.7355, 0.829, 0.7497, 0.6306, 0.48, 0.7635],
['CultriX/SeQwential-14B-v1', 'https://huggingface.co/CultriX/SeQwential-14B-v1', 0.7355, 0.8205, 0.7549, 0.6367, 0.48, 0.7626],
['CultriX/Qwen2.5-14B-Wernickev5', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev5', 0.7224, 0.8272, 0.7541, 0.679, 0.51, 0.7578],
['CultriX/Qwen2.5-14B-Wernickev6', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev6', 0.6994, 0.7549, 0.5816, 0.6991, 0.58, 0.7267],
['CultriX/Qwen2.5-14B-Wernickev7', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev7', 0.7147, 0.7599, 0.6097, 0.7056, 0.57, 0.7164],
['CultriX/Qwen2.5-14B-FinalMerge-tmp2', 'https://huggingface.co/CultriX/Qwen2.5-14B-FinalMerge-tmp2', 0.7255, 0.8192, 0.7535, 0.6671, 0.5, 0.7612],
]
columns = ["Model Configuration", "Model Link", "tinyArc", "tinyHellaswag", "tinyMMLU", "tinyTruthfulQA", "tinyTruthfulQA_mc1", "tinyWinogrande"]
df_full = pd.DataFrame(data_full, columns=columns)
### Visualization and Analytics Functions
# 1. Plot Average Scores
def plot_average_scores():
df_full["Average Score"] = df_full.iloc[:, 2:].mean(axis=1)
df_avg_sorted = df_full.sort_values(by="Average Score", ascending=False)
plt.figure(figsize=(12, 8))
plt.barh(df_avg_sorted["Model Configuration"], df_avg_sorted["Average Score"])
plt.title("Average Performance of Models Across Tasks", fontsize=16)
plt.xlabel("Average Score", fontsize=14)
plt.ylabel("Model Configuration", fontsize=14)
plt.gca().invert_yaxis()
plt.grid(axis='x', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()
# 2. Plot Task Performance
def plot_task_performance():
df_full_melted = df_full.melt(id_vars=["Model Configuration", "Model Link"], var_name="Task", value_name="Score")
plt.figure(figsize=(14, 10))
for model in df_full["Model Configuration"]:
model_data = df_full_melted[df_full_melted["Model Configuration"] == model]
plt.plot(model_data["Task"], model_data["Score"], marker="o", label=model)
plt.title("Performance of All Models Across Tasks", fontsize=16)
plt.xlabel("Task", fontsize=14)
plt.ylabel("Score", fontsize=14)
plt.xticks(rotation=45)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=9)
plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()
# 3. Plot Task-Specific Top Models
def plot_task_specific_top_models():
top_models = df_full.iloc[:, 2:].idxmax()
top_scores = df_full.iloc[:, 2:].max()
results = pd.DataFrame({"Top Model": top_models, "Score": top_scores}).reset_index().rename(columns={"index": "Task"})
plt.figure(figsize=(12, 6))
plt.bar(results["Task"], results["Score"])
plt.title("Task-Specific Top Models", fontsize=16)
plt.xlabel("Task", fontsize=14)
plt.ylabel("Score", fontsize=14)
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.tight_layout()
plt.show()
### YAML Configuration and Scraping Utilities
# 1. Scrape MergeKit Configuration
def scrape_mergekit_config(model_name):
model_link = df_full.loc[df_full["Model Configuration"] == model_name, "Model Link"].values[0]
response = requests.get(model_link)
if response.status_code != 200:
return f"Failed to fetch model page for {model_name}. Please check the link."
soup = BeautifulSoup(response.text, "html.parser")
yaml_config = soup.find("pre")
return yaml_config.text.strip() if yaml_config else f"No YAML configuration found for {model_name}."
### Performance Heatmap
def plot_heatmap():
plt.figure(figsize=(12, 8))
sns.heatmap(df_full.iloc[:, 2:], annot=True, cmap="YlGnBu", xticklabels=columns[2:], yticklabels=df_full["Model Configuration"])
plt.title("Performance Heatmap", fontsize=16)
plt.tight_layout()
plt.show()
### Gradio App
# Building the Interface
with gr.Blocks() as demo:
gr.Markdown("# Comprehensive Model Performance Analysis")
gr.Image(type="pil", label="Average Performance Plot")
gr.Image(type="pil", label="Task Performance Plot")
gr.Image(type="pil", label="Task-Specific Top Models")
demo.launch()
|