CultriX commited on
Commit
94bcb53
·
verified ·
1 Parent(s): 7334418

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +587 -44
app.py CHANGED
@@ -1,9 +1,9 @@
 
 
1
  import pandas as pd
2
  import matplotlib.pyplot as plt
3
  import seaborn as sns
4
  import gradio as gr
5
- import requests
6
- from bs4 import BeautifulSoup
7
  import io
8
  import os
9
  import base64
@@ -13,11 +13,12 @@ from io import BytesIO
13
  import tempfile
14
  import sys
15
 
16
-
17
  # --------------------------------------------------------------------
18
- # PART 1: YOUR EXISTING (TINY) DATA & PLOTS
19
  # --------------------------------------------------------------------
20
 
 
 
21
  data_full = [
22
  ['CultriX/Qwen2.5-14B-SLERPv7', 'https://huggingface.co/CultriX/Qwen2.5-14B-SLERPv7', 0.7205, 0.8272, 0.7541, 0.6581, 0.5, 0.729],
23
  ['djuna/Q2.5-Veltha-14B-0.5', 'https://huggingface.co/djuna/Q2.5-Veltha-14B-0.5', 0.7492, 0.8386, 0.7305, 0.598, 0.43, 0.7817],
@@ -44,10 +45,12 @@ data_full = [
44
  ['CultriX/Qwen2.5-14B-Wernickev6', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev6', 0.6994, 0.7549, 0.5816, 0.6991, 0.58, 0.7267],
45
  ['CultriX/Qwen2.5-14B-Wernickev7', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev7', 0.7147, 0.7599, 0.6097, 0.7056, 0.57, 0.7164],
46
  ['CultriX/Qwen2.5-14B-FinalMerge-tmp2', 'https://huggingface.co/CultriX/Qwen2.5-14B-FinalMerge-tmp2', 0.7255, 0.8192, 0.7535, 0.6671, 0.5, 0.7612],
47
- ['CultriX/Qwen2.5-14B-BrocaV8', 'https://huggingface.co/CultriX/Qwen2.5-14B-BrocaV8', 0.7415, 0.8396, 0.7334, 0.5785, 0.4300, 0.7646],
 
 
 
 
48
  ]
49
- columns = ["Model Configuration", "Model Link", "tinyArc", "tinyHellaswag",
50
- "tinyMMLU", "tinyTruthfulQA", "tinyTruthfulQA_mc1", "tinyWinogrande"]
51
  df_full = pd.DataFrame(data_full, columns=columns)
52
 
53
  def plot_average_scores():
@@ -75,8 +78,10 @@ def plot_average_scores():
75
  return pil_image, temp_image_file.name
76
 
77
  def plot_task_performance():
78
- df_full_melted = df_full.melt(id_vars=["Model Configuration", "Model Link"],
79
- var_name="Task", value_name="Score")
 
 
80
 
81
  plt.figure(figsize=(16, 12))
82
  for model in df_full["Model Configuration"]:
@@ -127,8 +132,13 @@ def plot_task_specific_top_models():
127
 
128
  def plot_heatmap():
129
  plt.figure(figsize=(14, 10))
130
- sns.heatmap(df_full.iloc[:, 2:], annot=True, cmap="YlGnBu",
131
- xticklabels=columns[2:], yticklabels=df_full["Model Configuration"])
 
 
 
 
 
132
  plt.title("Performance Heatmap", fontsize=16)
133
  plt.tight_layout()
134
 
@@ -143,7 +153,15 @@ def plot_heatmap():
143
  return pil_image, temp_image_file.name
144
 
145
  def scrape_mergekit_config(model_name):
146
- model_link = df_full.loc[df_full["Model Configuration"] == model_name, "Model Link"].values[0]
 
 
 
 
 
 
 
 
147
  response = requests.get(model_link)
148
  if response.status_code != 200:
149
  return f"Failed to fetch model page for {model_name}. Please check the link."
@@ -155,12 +173,18 @@ def scrape_mergekit_config(model_name):
155
  return f"No YAML configuration found for {model_name}."
156
 
157
  def download_yaml(yaml_content, model_name):
 
 
 
158
  if "No YAML configuration found" in yaml_content or "Failed to fetch model page" in yaml_content:
159
  return None
160
  filename = f"{model_name.replace('/', '_')}_config.yaml"
161
  return gr.File(value=yaml_content.encode(), filename=filename)
162
 
163
  def scrape_model_page(model_url):
 
 
 
164
  try:
165
  response = requests.get(model_url)
166
  if response.status_code != 200:
@@ -176,9 +200,18 @@ def scrape_model_page(model_url):
176
  return f"Error: {str(e)}"
177
 
178
  def display_scraped_model_data(model_url):
 
 
 
179
  return scrape_model_page(model_url)
180
 
181
  def download_all_data():
 
 
 
 
 
 
182
  import io
183
  csv_buffer = io.StringIO()
184
  df_full.to_csv(csv_buffer, index=False)
@@ -200,28 +233,27 @@ def download_all_data():
200
  with zipfile.ZipFile(zip_buffer, 'w') as zf:
201
  zf.writestr("model_scores.csv", csv_data)
202
 
 
203
  for name, (pil_image, filename) in plot_dict.items():
204
  image_bytes = io.BytesIO()
205
  pil_image.save(image_bytes, format='PNG')
206
  image_bytes.seek(0)
207
  zf.writestr(filename, image_bytes.read())
208
 
209
- # Also try scraping each model for a YAML config
210
  for model_name in df_full["Model Configuration"].to_list():
211
  yaml_content = scrape_mergekit_config(model_name)
212
  if ("No YAML configuration found" not in yaml_content) and ("Failed to fetch model page" not in yaml_content):
213
- zf.writestr(f"{model_name.replace('/', '_')}_config.yaml", yaml_content.encode())
214
 
215
  zip_buffer.seek(0)
216
  return zip_buffer, "analysis_data.zip"
217
 
218
-
219
  # --------------------------------------------------------------------
220
- # PART 2: FULL "DATA START" SNIPPET (RANKS 44–105) + Parser
221
  # --------------------------------------------------------------------
 
222
  benchmark_data = [
223
- # The entire dataset from your "DATA START", rank 44..105
224
- # (the code you posted with "knowledge of config" or scraping logic)
225
  {
226
  "rank": 44,
227
  "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3",
@@ -248,21 +280,533 @@ benchmark_data = [
248
  }
249
  }
250
  },
251
- # ... rest of the snippet ...
252
- # (Exactly copy/paste your big block from rank=44 to rank=105)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  ]
254
 
255
-
256
  def snippet_scrape_model_page(url):
257
  """
258
- Same as scrape_model_page, but we keep it separate for clarity.
 
259
  """
260
- return scrape_model_page(url)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
  def snippet_print_benchmark_and_config_info(model_info):
263
  """
264
- Prints an overview for each model (your "DATA START" logic),
265
- either known config or scraping snippet.
266
  """
267
  print(f"---\nModel Rank: {model_info['rank']}")
268
  print(f"Model Name: {model_info['name']}")
@@ -274,7 +818,7 @@ def snippet_print_benchmark_and_config_info(model_info):
274
  print(f"Models average score in MUSR benchmarks in %: {model_info['scores']['MUSR']}")
275
  print(f"Models average score in MMLU-PRO benchmarks in %: {model_info['scores']['MMLU-PRO']}")
276
 
277
- # If there's a known_config, print it as YAML
278
  if model_info["known_config"] is not None:
279
  print("###")
280
  print("models:")
@@ -284,25 +828,22 @@ def snippet_print_benchmark_and_config_info(model_info):
284
  print(f"base_model: {model_info['known_config']['base_model']}")
285
  print(f"dtype: {model_info['known_config']['dtype']}")
286
  print("parameters:")
287
- print(f" t: {model_info['known_config']['parameters']['t']} # V shaped curve: Hermes for input & output, WizardMath in the middle layers")
 
288
  print("###")
289
  return
290
 
291
- # Otherwise, scrape
292
  scraped = snippet_scrape_model_page(model_info["hf_url"])
293
  if isinstance(scraped, str):
294
  # Means it's an error string or something
295
- if "Error:" in scraped:
296
- print("(No MergeKit configuration found or error occurred.)\n")
297
- # optionally print snippet
298
- else:
299
- print(scraped)
300
  return
301
  else:
302
- # It's presumably a dict: { "yaml_configuration": "...", "metadata": "..." }
303
- if ("No YAML configuration found." in scraped["yaml_configuration"]):
304
  print("(No MergeKit configuration found.)\n")
305
- # Print your snippet code
306
  print("You can try the following Python script to scrape the model page:\n")
307
  print("#" * 70)
308
  print(f'''import requests
@@ -336,14 +877,15 @@ if __name__ == "__main__":
336
  print(result)''')
337
  print("#" * 70)
338
  else:
 
339
  print("###")
340
  print(scraped["yaml_configuration"])
341
  print("###")
342
 
343
  def run_non_tiny_benchmarks():
344
  """
345
- Captures the stdout from printing each model in benchmark_data
346
- (ranks 44 to 105), returning a single string for Gradio to display.
347
  """
348
  old_stdout = sys.stdout
349
  buffer = io.StringIO()
@@ -355,14 +897,13 @@ def run_non_tiny_benchmarks():
355
  sys.stdout = old_stdout
356
  return buffer.getvalue()
357
 
358
-
359
  # --------------------------------------------------------------------
360
- # PART 3: GRADIO APP (Your existing UI plus the "Parse Non-Tiny" button)
361
  # --------------------------------------------------------------------
362
  with gr.Blocks() as demo:
363
  gr.Markdown("# Comprehensive Model Performance Analysis with Hugging Face Links")
364
 
365
- # The existing UI
366
  with gr.Row():
367
  btn1 = gr.Button("Show Average Performance")
368
  img1 = gr.Image(type="pil", label="Average Performance Plot")
@@ -387,6 +928,7 @@ with gr.Blocks() as demo:
387
  heatmap_download = gr.File(label="Download Heatmap")
388
  btn4.click(plot_heatmap, outputs=[heatmap_img, heatmap_download])
389
 
 
390
  with gr.Row():
391
  model_selector = gr.Dropdown(choices=df_full["Model Configuration"].tolist(), label="Select a Model")
392
  with gr.Column():
@@ -398,12 +940,13 @@ with gr.Blocks() as demo:
398
  yaml_download = gr.File(label="Download MergeKit Configuration")
399
  save_yaml_btn.click(download_yaml, inputs=[yaml_output, model_selector], outputs=yaml_download)
400
 
 
401
  with gr.Row():
402
  download_all_btn = gr.Button("Download Everything")
403
  all_downloads = gr.File(label="Download All Data")
404
  download_all_btn.click(download_all_data, outputs=all_downloads)
405
 
406
- # Live scraping feature
407
  gr.Markdown("## Live Scraping Features")
408
  with gr.Row():
409
  url_input = gr.Textbox(label="Enter Hugging Face Model URL", placeholder="https://huggingface.co/<model>")
@@ -411,11 +954,11 @@ with gr.Blocks() as demo:
411
  live_scrape_output = gr.Textbox(label="Scraped Data", lines=15)
412
  live_scrape_btn.click(display_scraped_model_data, inputs=url_input, outputs=live_scrape_output)
413
 
414
- # NEW: Non-Tiny Benchmarks button
415
  gr.Markdown("## Non-Tiny Benchmark Parser (Ranks 44–105)")
416
  with gr.Row():
417
  parse_non_tiny_btn = gr.Button("Parse Non-Tiny Benchmarks")
418
  parse_non_tiny_output = gr.Textbox(label="Non-Tiny Benchmark Output", lines=30)
419
  parse_non_tiny_btn.click(fn=run_non_tiny_benchmarks, outputs=parse_non_tiny_output)
420
 
421
- demo.launch()
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
  import pandas as pd
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
6
  import gradio as gr
 
 
7
  import io
8
  import os
9
  import base64
 
13
  import tempfile
14
  import sys
15
 
 
16
  # --------------------------------------------------------------------
17
+ # PART 1: TINY DATA + PLOTS
18
  # --------------------------------------------------------------------
19
 
20
+ # This dataframe is your “tiny” version of model performance data.
21
+ # Used for plotting & demonstration in the Gradio app.
22
  data_full = [
23
  ['CultriX/Qwen2.5-14B-SLERPv7', 'https://huggingface.co/CultriX/Qwen2.5-14B-SLERPv7', 0.7205, 0.8272, 0.7541, 0.6581, 0.5, 0.729],
24
  ['djuna/Q2.5-Veltha-14B-0.5', 'https://huggingface.co/djuna/Q2.5-Veltha-14B-0.5', 0.7492, 0.8386, 0.7305, 0.598, 0.43, 0.7817],
 
45
  ['CultriX/Qwen2.5-14B-Wernickev6', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev6', 0.6994, 0.7549, 0.5816, 0.6991, 0.58, 0.7267],
46
  ['CultriX/Qwen2.5-14B-Wernickev7', 'https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev7', 0.7147, 0.7599, 0.6097, 0.7056, 0.57, 0.7164],
47
  ['CultriX/Qwen2.5-14B-FinalMerge-tmp2', 'https://huggingface.co/CultriX/Qwen2.5-14B-FinalMerge-tmp2', 0.7255, 0.8192, 0.7535, 0.6671, 0.5, 0.7612],
48
+ ['CultriX/Qwen2.5-14B-BrocaV8', 'https://huggingface.co/CultriX/Qwen2.5-14B-BrocaV8', 0.7415, 0.8396, 0.7334, 0.5785, 0.43, 0.7646],
49
+ ]
50
+ columns = [
51
+ "Model Configuration", "Model Link", "tinyArc", "tinyHellaswag",
52
+ "tinyMMLU", "tinyTruthfulQA", "tinyTruthfulQA_mc1", "tinyWinogrande"
53
  ]
 
 
54
  df_full = pd.DataFrame(data_full, columns=columns)
55
 
56
  def plot_average_scores():
 
78
  return pil_image, temp_image_file.name
79
 
80
  def plot_task_performance():
81
+ df_full_melted = df_full.melt(
82
+ id_vars=["Model Configuration", "Model Link"],
83
+ var_name="Task", value_name="Score"
84
+ )
85
 
86
  plt.figure(figsize=(16, 12))
87
  for model in df_full["Model Configuration"]:
 
132
 
133
  def plot_heatmap():
134
  plt.figure(figsize=(14, 10))
135
+ sns.heatmap(
136
+ df_full.iloc[:, 2:],
137
+ annot=True,
138
+ cmap="YlGnBu",
139
+ xticklabels=columns[2:],
140
+ yticklabels=df_full["Model Configuration"]
141
+ )
142
  plt.title("Performance Heatmap", fontsize=16)
143
  plt.tight_layout()
144
 
 
153
  return pil_image, temp_image_file.name
154
 
155
  def scrape_mergekit_config(model_name):
156
+ """
157
+ For the *tiny* table’s model links.
158
+ Scrapes <pre> tags on the huggingface model page to find a YAML config.
159
+ """
160
+ df_row = df_full.loc[df_full["Model Configuration"] == model_name]
161
+ if df_row.empty:
162
+ return f"No data found for model {model_name}."
163
+
164
+ model_link = df_row["Model Link"].values[0]
165
  response = requests.get(model_link)
166
  if response.status_code != 200:
167
  return f"Failed to fetch model page for {model_name}. Please check the link."
 
173
  return f"No YAML configuration found for {model_name}."
174
 
175
  def download_yaml(yaml_content, model_name):
176
+ """
177
+ Let users download the scraped YAML if it exists.
178
+ """
179
  if "No YAML configuration found" in yaml_content or "Failed to fetch model page" in yaml_content:
180
  return None
181
  filename = f"{model_name.replace('/', '_')}_config.yaml"
182
  return gr.File(value=yaml_content.encode(), filename=filename)
183
 
184
  def scrape_model_page(model_url):
185
+ """
186
+ Used for the "Live Scraping" text box in the Gradio UI.
187
+ """
188
  try:
189
  response = requests.get(model_url)
190
  if response.status_code != 200:
 
200
  return f"Error: {str(e)}"
201
 
202
  def display_scraped_model_data(model_url):
203
+ """
204
+ Helper for the "Live Scraping Features" section of the Gradio app.
205
+ """
206
  return scrape_model_page(model_url)
207
 
208
  def download_all_data():
209
+ """
210
+ Builds and returns a zip of:
211
+ - the CSV of your 'tiny' data,
212
+ - four plots (average performance, task performance, top models, heatmap),
213
+ - any YAML configurations for the 'tiny' table's models (if found).
214
+ """
215
  import io
216
  csv_buffer = io.StringIO()
217
  df_full.to_csv(csv_buffer, index=False)
 
233
  with zipfile.ZipFile(zip_buffer, 'w') as zf:
234
  zf.writestr("model_scores.csv", csv_data)
235
 
236
+ # Add the images
237
  for name, (pil_image, filename) in plot_dict.items():
238
  image_bytes = io.BytesIO()
239
  pil_image.save(image_bytes, format='PNG')
240
  image_bytes.seek(0)
241
  zf.writestr(filename, image_bytes.read())
242
 
243
+ # Also try scraping each model in the *tiny* dataset for a YAML config
244
  for model_name in df_full["Model Configuration"].to_list():
245
  yaml_content = scrape_mergekit_config(model_name)
246
  if ("No YAML configuration found" not in yaml_content) and ("Failed to fetch model page" not in yaml_content):
247
+ zf.writestr(f"{model_name.replace('/', '_')}_config.yaml", yaml_content.encode())
248
 
249
  zip_buffer.seek(0)
250
  return zip_buffer, "analysis_data.zip"
251
 
 
252
  # --------------------------------------------------------------------
253
+ # PART 2: THE "DATA START" SNIPPET (RANKS 44–105) + Parser
254
  # --------------------------------------------------------------------
255
+ # This is your larger dataset, rank = 44..105
256
  benchmark_data = [
 
 
257
  {
258
  "rank": 44,
259
  "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3",
 
280
  }
281
  }
282
  },
283
+ {
284
+ "rank": 45,
285
+ "name": "sthenno-com/miscii-14b-1225",
286
+ "scores": {
287
+ "average": 40.08,
288
+ "IFEval": 78.78,
289
+ "BBH": 50.91,
290
+ "MATH": 31.57,
291
+ "GPQA": 17.00,
292
+ "MUSR": 14.77,
293
+ "MMLU-PRO": 47.46
294
+ },
295
+ "hf_url": "https://huggingface.co/sthenno-com/miscii-14b-1225",
296
+ "known_config": None
297
+ },
298
+ {
299
+ "rank": 46,
300
+ "name": "djuna/Q2.5-Veltha-14B-0.5",
301
+ "scores": {
302
+ "average": 39.96,
303
+ "IFEval": 77.96,
304
+ "BBH": 50.32,
305
+ "MATH": 33.84,
306
+ "GPQA": 15.77,
307
+ "MUSR": 14.17,
308
+ "MMLU-PRO": 47.72
309
+ },
310
+ "hf_url": "https://huggingface.co/djuna/Q2.5-Veltha-14B-0.5",
311
+ "known_config": None
312
+ },
313
+ {
314
+ "rank": 48,
315
+ "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock",
316
+ "scores": {
317
+ "average": 39.81,
318
+ "IFEval": 71.62,
319
+ "BBH": 48.76,
320
+ "MATH": 33.99,
321
+ "GPQA": 17.34,
322
+ "MUSR": 19.23,
323
+ "MMLU-PRO": 47.95
324
+ },
325
+ "hf_url": "https://huggingface.co/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-model_stock",
326
+ "known_config": None
327
+ },
328
+ {
329
+ "rank": 50,
330
+ "name": "sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01",
331
+ "scores": {
332
+ "average": 39.46,
333
+ "IFEval": 68.72,
334
+ "BBH": 47.71,
335
+ "MATH": 35.05,
336
+ "GPQA": 18.23,
337
+ "MUSR": 19.56,
338
+ "MMLU-PRO": 47.50
339
+ },
340
+ "hf_url": "https://huggingface.co/sometimesanotion/Qwen2.5-14B-Vimarckoso-v3-Prose01",
341
+ "known_config": None
342
+ },
343
+ {
344
+ "rank": 52,
345
+ "name": "arcee-ai/Virtuoso-Small",
346
+ "scores": {
347
+ "average": 39.43,
348
+ "IFEval": 79.35,
349
+ "BBH": 50.40,
350
+ "MATH": 34.29,
351
+ "GPQA": 11.52,
352
+ "MUSR": 14.44,
353
+ "MMLU-PRO": 46.57
354
+ },
355
+ "hf_url": "https://huggingface.co/arcee-ai/Virtuoso-Small",
356
+ "known_config": None
357
+ },
358
+ {
359
+ "rank": 54,
360
+ "name": "sometimesanotion/Qwentinuum-14B-v6",
361
+ "scores": {
362
+ "average": 39.23,
363
+ "IFEval": 63.04,
364
+ "BBH": 50.23,
365
+ "MATH": 33.84,
366
+ "GPQA": 18.23,
367
+ "MUSR": 21.18,
368
+ "MMLU-PRO": 48.89
369
+ },
370
+ "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v6",
371
+ "known_config": None
372
+ },
373
+ {
374
+ "rank": 55,
375
+ "name": "djuna/Q2.5-Veltha-14B",
376
+ "scores": {
377
+ "average": 39.21,
378
+ "IFEval": 82.92,
379
+ "BBH": 49.75,
380
+ "MATH": 28.02,
381
+ "GPQA": 14.54,
382
+ "MUSR": 12.26,
383
+ "MMLU-PRO": 47.76
384
+ },
385
+ "hf_url": "https://huggingface.co/djuna/Q2.5-Veltha-14B",
386
+ "known_config": None
387
+ },
388
+ {
389
+ "rank": 57,
390
+ "name": "allknowingroger/QwenSlerp6-14B",
391
+ "scores": {
392
+ "average": 39.02,
393
+ "IFEval": 68.67,
394
+ "BBH": 47.59,
395
+ "MATH": 34.14,
396
+ "GPQA": 16.44,
397
+ "MUSR": 18.32,
398
+ "MMLU-PRO": 48.95
399
+ },
400
+ "hf_url": "https://huggingface.co/allknowingroger/QwenSlerp6-14B",
401
+ "known_config": None
402
+ },
403
+ {
404
+ "rank": 58,
405
+ "name": "allknowingroger/QwenSlerp5-14B",
406
+ "scores": {
407
+ "average": 38.94,
408
+ "IFEval": 71.19,
409
+ "BBH": 47.39,
410
+ "MATH": 33.16,
411
+ "GPQA": 15.32,
412
+ "MUSR": 17.81,
413
+ "MMLU-PRO": 48.78
414
+ },
415
+ "hf_url": "https://huggingface.co/allknowingroger/QwenSlerp5-14B",
416
+ "known_config": None
417
+ },
418
+ {
419
+ "rank": 59,
420
+ "name": "sometimesanotion/Qwentinuum-14B-v5",
421
+ "scores": {
422
+ "average": 38.87,
423
+ "IFEval": 62.86,
424
+ "BBH": 50.28,
425
+ "MATH": 31.57,
426
+ "GPQA": 18.34,
427
+ "MUSR": 21.09,
428
+ "MMLU-PRO": 49.09
429
+ },
430
+ "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v5",
431
+ "known_config": None
432
+ },
433
+ {
434
+ "rank": 60,
435
+ "name": "sometimesanotion/Qwenvergence-14B-v6-Prose",
436
+ "scores": {
437
+ "average": 38.82,
438
+ "IFEval": 59.90,
439
+ "BBH": 50.12,
440
+ "MATH": 34.89,
441
+ "GPQA": 18.46,
442
+ "MUSR": 21.02,
443
+ "MMLU-PRO": 48.56
444
+ },
445
+ "hf_url": "https://huggingface.co/sometimesanotion/Qwenvergence-14B-v6-Prose",
446
+ "known_config": None
447
+ },
448
+ {
449
+ "rank": 61,
450
+ "name": "CultriX/Qwen2.5-14B-Brocav3",
451
+ "scores": {
452
+ "average": 38.76,
453
+ "IFEval": 69.52,
454
+ "BBH": 49.05,
455
+ "MATH": 32.25,
456
+ "GPQA": 14.54,
457
+ "MUSR": 19.25,
458
+ "MMLU-PRO": 47.97
459
+ },
460
+ "hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Brocav3",
461
+ "known_config": None
462
+ },
463
+ {
464
+ "rank": 62,
465
+ "name": "sometimesanotion/Qwentinuum-14B-v7",
466
+ "scores": {
467
+ "average": 38.76,
468
+ "IFEval": 61.09,
469
+ "BBH": 50.35,
470
+ "MATH": 33.38,
471
+ "GPQA": 18.79,
472
+ "MUSR": 19.95,
473
+ "MMLU-PRO": 49.00
474
+ },
475
+ "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v7",
476
+ "known_config": None
477
+ },
478
+ {
479
+ "rank": 64,
480
+ "name": "sometimesanotion/Qwentinuum-14B-v3",
481
+ "scores": {
482
+ "average": 38.74,
483
+ "IFEval": 61.58,
484
+ "BBH": 50.04,
485
+ "MATH": 32.85,
486
+ "GPQA": 18.34,
487
+ "MUSR": 20.62,
488
+ "MMLU-PRO": 49.03
489
+ },
490
+ "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v3",
491
+ "known_config": None
492
+ },
493
+ {
494
+ "rank": 65,
495
+ "name": "allura-org/TQ2.5-14B-Aletheia-v1",
496
+ "scores": {
497
+ "average": 38.74,
498
+ "IFEval": 75.30,
499
+ "BBH": 50.88,
500
+ "MATH": 29.53,
501
+ "GPQA": 14.99,
502
+ "MUSR": 14.61,
503
+ "MMLU-PRO": 47.12
504
+ },
505
+ "hf_url": "https://huggingface.co/allura-org/TQ2.5-14B-Aletheia-v1",
506
+ "known_config": None
507
+ },
508
+ {
509
+ "rank": 66,
510
+ "name": "qingy2024/Fusion4-14B-Instruct",
511
+ "scores": {
512
+ "average": 38.73,
513
+ "IFEval": 76.49,
514
+ "BBH": 50.70,
515
+ "MATH": 33.91,
516
+ "GPQA": 10.74,
517
+ "MUSR": 13.97,
518
+ "MMLU-PRO": 46.60
519
+ },
520
+ "hf_url": "https://huggingface.co/qingy2024/Fusion4-14B-Instruct",
521
+ "known_config": None
522
+ },
523
+ {
524
+ "rank": 68,
525
+ "name": "CultriX/Qwen2.5-14B-Brocav7",
526
+ "scores": {
527
+ "average": 38.52,
528
+ "IFEval": 67.24,
529
+ "BBH": 48.91,
530
+ "MATH": 31.87,
531
+ "GPQA": 15.66,
532
+ "MUSR": 20.15,
533
+ "MMLU-PRO": 47.31
534
+ },
535
+ "hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Brocav7",
536
+ "known_config": None
537
+ },
538
+ {
539
+ "rank": 71,
540
+ "name": "sometimesanotion/Qwentinuum-14B-v6-Prose",
541
+ "scores": {
542
+ "average": 38.46,
543
+ "IFEval": 56.43,
544
+ "BBH": 50.14,
545
+ "MATH": 35.57,
546
+ "GPQA": 18.46,
547
+ "MUSR": 21.34,
548
+ "MMLU-PRO": 48.80
549
+ },
550
+ "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v6-Prose",
551
+ "known_config": None
552
+ },
553
+ {
554
+ "rank": 76,
555
+ "name": "CultriX/Qwen2.5-14B-Brocav6",
556
+ "scores": {
557
+ "average": 38.32,
558
+ "IFEval": 69.95,
559
+ "BBH": 47.82,
560
+ "MATH": 29.61,
561
+ "GPQA": 15.66,
562
+ "MUSR": 18.88,
563
+ "MMLU-PRO": 47.99
564
+ },
565
+ "hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Brocav6",
566
+ "known_config": None
567
+ },
568
+ {
569
+ "rank": 80,
570
+ "name": "CultriX/SeQwence-14Bv1",
571
+ "scores": {
572
+ "average": 38.20,
573
+ "IFEval": 66.78,
574
+ "BBH": 47.19,
575
+ "MATH": 33.53,
576
+ "GPQA": 14.88,
577
+ "MUSR": 18.80,
578
+ "MMLU-PRO": 48.00
579
+ },
580
+ "hf_url": "https://huggingface.co/CultriX/SeQwence-14Bv1",
581
+ "known_config": None
582
+ },
583
+ {
584
+ "rank": 85,
585
+ "name": "sometimesanotion/Qwentinuum-14B-v013",
586
+ "scores": {
587
+ "average": 37.96,
588
+ "IFEval": 67.11,
589
+ "BBH": 43.97,
590
+ "MATH": 33.01,
591
+ "GPQA": 14.32,
592
+ "MUSR": 24.99,
593
+ "MMLU-PRO": 44.34
594
+ },
595
+ "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v013",
596
+ "known_config": None
597
+ },
598
+ {
599
+ "rank": 86,
600
+ "name": "CultriX/Qwen2.5-14B-Wernickev3",
601
+ "scores": {
602
+ "average": 37.94,
603
+ "IFEval": 70.48,
604
+ "BBH": 44.58,
605
+ "MATH": 32.78,
606
+ "GPQA": 14.99,
607
+ "MUSR": 18.69,
608
+ "MMLU-PRO": 46.13
609
+ },
610
+ "hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Wernickev3",
611
+ "known_config": None
612
+ },
613
+ {
614
+ "rank": 88,
615
+ "name": "allknowingroger/QwenSlerp4-14B",
616
+ "scores": {
617
+ "average": 37.80,
618
+ "IFEval": 63.28,
619
+ "BBH": 49.38,
620
+ "MATH": 30.97,
621
+ "GPQA": 16.33,
622
+ "MUSR": 17.59,
623
+ "MMLU-PRO": 49.28
624
+ },
625
+ "hf_url": "https://huggingface.co/allknowingroger/QwenSlerp4-14B",
626
+ "known_config": None
627
+ },
628
+ {
629
+ "rank": 89,
630
+ "name": "CultriX/Qwen2.5-14B-Broca",
631
+ "scores": {
632
+ "average": 37.72,
633
+ "IFEval": 56.04,
634
+ "BBH": 50.03,
635
+ "MATH": 34.59,
636
+ "GPQA": 18.23,
637
+ "MUSR": 18.95,
638
+ "MMLU-PRO": 48.49
639
+ },
640
+ "hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Broca",
641
+ "known_config": None
642
+ },
643
+ {
644
+ "rank": 90,
645
+ "name": "CultriX/Qwen2.5-14B-Emerged",
646
+ "scores": {
647
+ "average": 37.66,
648
+ "IFEval": 70.00,
649
+ "BBH": 45.93,
650
+ "MATH": 30.74,
651
+ "GPQA": 14.32,
652
+ "MUSR": 18.47,
653
+ "MMLU-PRO": 46.51
654
+ },
655
+ "hf_url": "https://huggingface.co/CultriX/Qwen2.5-14B-Emerged",
656
+ "known_config": None
657
+ },
658
+ {
659
+ "rank": 91,
660
+ "name": "sometimesanotion/Qwentinuum-14B-v8",
661
+ "scores": {
662
+ "average": 37.65,
663
+ "IFEval": 54.12,
664
+ "BBH": 50.11,
665
+ "MATH": 34.14,
666
+ "GPQA": 17.79,
667
+ "MUSR": 20.75,
668
+ "MMLU-PRO": 49.02
669
+ },
670
+ "hf_url": "https://huggingface.co/sometimesanotion/Qwentinuum-14B-v8",
671
+ "known_config": None
672
+ },
673
+ {
674
+ "rank": 92,
675
+ "name": "qingy2024/Fusion-14B-Instruct",
676
+ "scores": {
677
+ "average": 37.64,
678
+ "IFEval": 72.60,
679
+ "BBH": 48.58,
680
+ "MATH": 30.97,
681
+ "GPQA": 13.98,
682
+ "MUSR": 14.81,
683
+ "MMLU-PRO": 44.93
684
+ },
685
+ "hf_url": "https://huggingface.co/qingy2024/Fusion-14B-Instruct",
686
+ "known_config": None
687
+ },
688
+ {
689
+ "rank": 94,
690
+ "name": "CultriX/Qwestion-14B",
691
+ "scores": {
692
+ "average": 37.63,
693
+ "IFEval": 63.18,
694
+ "BBH": 48.76,
695
+ "MATH": 31.72,
696
+ "GPQA": 15.77,
697
+ "MUSR": 17.22,
698
+ "MMLU-PRO": 49.14
699
+ },
700
+ "hf_url": "https://huggingface.co/CultriX/Qwestion-14B",
701
+ "known_config": None
702
+ },
703
+ {
704
+ "rank": 99,
705
+ "name": "sometimesanotion/Qwenvergence-14B-v3-Prose",
706
+ "scores": {
707
+ "average": 37.37,
708
+ "IFEval": 49.18,
709
+ "BBH": 49.80,
710
+ "MATH": 35.57,
711
+ "GPQA": 19.35,
712
+ "MUSR": 21.77,
713
+ "MMLU-PRO": 48.55
714
+ },
715
+ "hf_url": "https://huggingface.co/sometimesanotion/Qwenvergence-14B-v3-Prose",
716
+ "known_config": None
717
+ },
718
+ {
719
+ "rank": 102,
720
+ "name": "CultriX/SeQwence-14B-v5",
721
+ "scores": {
722
+ "average": 37.27,
723
+ "IFEval": 59.20,
724
+ "BBH": 50.00,
725
+ "MATH": 31.04,
726
+ "GPQA": 16.00,
727
+ "MUSR": 18.33,
728
+ "MMLU-PRO": 49.05
729
+ },
730
+ "hf_url": "https://huggingface.co/CultriX/SeQwence-14B-v5",
731
+ "known_config": None
732
+ },
733
+ {
734
+ "rank": 103,
735
+ "name": "sometimesanotion/Qwen-14B-ProseStock-v4",
736
+ "scores": {
737
+ "average": 37.23,
738
+ "IFEval": 49.42,
739
+ "BBH": 49.54,
740
+ "MATH": 35.50,
741
+ "GPQA": 18.46,
742
+ "MUSR": 21.70,
743
+ "MMLU-PRO": 48.74
744
+ },
745
+ "hf_url": "https://huggingface.co/sometimesanotion/Qwen-14B-ProseStock-v4",
746
+ "known_config": None
747
+ },
748
+ {
749
+ "rank": 104,
750
+ "name": "sometimesanotion/IF-reasoning-experiment-40",
751
+ "scores": {
752
+ "average": 37.21,
753
+ "IFEval": 63.30,
754
+ "BBH": 44.31,
755
+ "MATH": 27.72,
756
+ "GPQA": 17.34,
757
+ "MUSR": 25.86,
758
+ "MMLU-PRO": 44.72
759
+ },
760
+ "hf_url": "https://huggingface.co/sometimesanotion/IF-reasoning-experiment-40",
761
+ "known_config": None
762
+ },
763
+ {
764
+ "rank": 105,
765
+ "name": "CultriX/SeQwence-14B-EvolMerge",
766
+ "scores": {
767
+ "average": 37.20,
768
+ "IFEval": 53.82,
769
+ "BBH": 50.78,
770
+ "MATH": 31.80,
771
+ "GPQA": 17.45,
772
+ "MUSR": 20.26,
773
+ "MMLU-PRO": 49.10
774
+ },
775
+ "hf_url": "https://huggingface.co/CultriX/SeQwence-14B-EvolMerge",
776
+ "known_config": None
777
+ }
778
  ]
779
 
 
780
  def snippet_scrape_model_page(url):
781
  """
782
+ Equivalent scraping function for the larger dataset
783
+ to look for <pre> YAML and a .metadata section.
784
  """
785
+ try:
786
+ response = requests.get(url)
787
+ if response.status_code != 200:
788
+ return f"Error: Unable to fetch the page (Status Code: {response.status_code})"
789
+
790
+ soup = BeautifulSoup(response.text, "html.parser")
791
+
792
+ yaml_config = soup.find("pre")
793
+ yaml_text = yaml_config.text.strip() if yaml_config else "No YAML configuration found."
794
+
795
+ metadata_section = soup.find("div", class_="metadata")
796
+ metadata_text = metadata_section.text.strip() if metadata_section else "No metadata found."
797
+
798
+ return {
799
+ "yaml_configuration": yaml_text,
800
+ "metadata": metadata_text
801
+ }
802
+
803
+ except Exception as e:
804
+ return f"Error: {str(e)}"
805
 
806
  def snippet_print_benchmark_and_config_info(model_info):
807
  """
808
+ Prints an overview for each model in the rank=44..105 dataset.
809
+ If known_config is not None, prints it. Otherwise attempts to scrape.
810
  """
811
  print(f"---\nModel Rank: {model_info['rank']}")
812
  print(f"Model Name: {model_info['name']}")
 
818
  print(f"Models average score in MUSR benchmarks in %: {model_info['scores']['MUSR']}")
819
  print(f"Models average score in MMLU-PRO benchmarks in %: {model_info['scores']['MMLU-PRO']}")
820
 
821
+ # If there's a known_config, print it in YAML form and stop.
822
  if model_info["known_config"] is not None:
823
  print("###")
824
  print("models:")
 
828
  print(f"base_model: {model_info['known_config']['base_model']}")
829
  print(f"dtype: {model_info['known_config']['dtype']}")
830
  print("parameters:")
831
+ t_vals = model_info["known_config"]["parameters"]["t"]
832
+ print(f" t: {t_vals} # V shaped curve: Hermes for input & output, WizardMath in the middle layers")
833
  print("###")
834
  return
835
 
836
+ # Otherwise, do scraping:
837
  scraped = snippet_scrape_model_page(model_info["hf_url"])
838
  if isinstance(scraped, str):
839
  # Means it's an error string or something
840
+ print("(No MergeKit configuration found or scraping error.)")
841
+ print(scraped)
 
 
 
842
  return
843
  else:
844
+ # It's presumably a dict
845
+ if "No YAML configuration found." in scraped["yaml_configuration"]:
846
  print("(No MergeKit configuration found.)\n")
 
847
  print("You can try the following Python script to scrape the model page:\n")
848
  print("#" * 70)
849
  print(f'''import requests
 
877
  print(result)''')
878
  print("#" * 70)
879
  else:
880
+ # Found some YAML
881
  print("###")
882
  print(scraped["yaml_configuration"])
883
  print("###")
884
 
885
  def run_non_tiny_benchmarks():
886
  """
887
+ Captures the stdout from printing each model in benchmark_data (ranks 44..105),
888
+ returning the entire output as a single string for Gradio to display.
889
  """
890
  old_stdout = sys.stdout
891
  buffer = io.StringIO()
 
897
  sys.stdout = old_stdout
898
  return buffer.getvalue()
899
 
 
900
  # --------------------------------------------------------------------
901
+ # PART 3: The Gradio App
902
  # --------------------------------------------------------------------
903
  with gr.Blocks() as demo:
904
  gr.Markdown("# Comprehensive Model Performance Analysis with Hugging Face Links")
905
 
906
+ # The existing UI for the “tiny” data
907
  with gr.Row():
908
  btn1 = gr.Button("Show Average Performance")
909
  img1 = gr.Image(type="pil", label="Average Performance Plot")
 
928
  heatmap_download = gr.File(label="Download Heatmap")
929
  btn4.click(plot_heatmap, outputs=[heatmap_img, heatmap_download])
930
 
931
+ # Scraping & YAML handling for the *tiny* table
932
  with gr.Row():
933
  model_selector = gr.Dropdown(choices=df_full["Model Configuration"].tolist(), label="Select a Model")
934
  with gr.Column():
 
940
  yaml_download = gr.File(label="Download MergeKit Configuration")
941
  save_yaml_btn.click(download_yaml, inputs=[yaml_output, model_selector], outputs=yaml_download)
942
 
943
+ # Download everything (CSV, plots, any found YAML)
944
  with gr.Row():
945
  download_all_btn = gr.Button("Download Everything")
946
  all_downloads = gr.File(label="Download All Data")
947
  download_all_btn.click(download_all_data, outputs=all_downloads)
948
 
949
+ # Live Scraping
950
  gr.Markdown("## Live Scraping Features")
951
  with gr.Row():
952
  url_input = gr.Textbox(label="Enter Hugging Face Model URL", placeholder="https://huggingface.co/<model>")
 
954
  live_scrape_output = gr.Textbox(label="Scraped Data", lines=15)
955
  live_scrape_btn.click(display_scraped_model_data, inputs=url_input, outputs=live_scrape_output)
956
 
957
+ # Non-Tiny Benchmarks
958
  gr.Markdown("## Non-Tiny Benchmark Parser (Ranks 44–105)")
959
  with gr.Row():
960
  parse_non_tiny_btn = gr.Button("Parse Non-Tiny Benchmarks")
961
  parse_non_tiny_output = gr.Textbox(label="Non-Tiny Benchmark Output", lines=30)
962
  parse_non_tiny_btn.click(fn=run_non_tiny_benchmarks, outputs=parse_non_tiny_output)
963
 
964
+ demo.launch()