elmadany commited on
Commit
1409d6f
·
verified ·
1 Parent(s): e3b1a31

Update src/helper.py

Browse files
Files changed (1) hide show
  1. src/helper.py +65 -14
src/helper.py CHANGED
@@ -122,7 +122,60 @@ def build_langname_to_isos(iso2name):
122
  name2isos[name].add(iso)
123
  return name2isos
124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
 
 
 
 
 
 
 
 
 
 
126
  def get_model_table(model_name):
127
  """
128
  Generates a performance table for a specific model, showing cluster, task, and score.
@@ -305,6 +358,18 @@ def df_to_html(df, col_minwidth=90, col_maxwidth=140, model_col_width=400):
305
 
306
 
307
 
 
 
 
 
 
 
 
 
 
 
 
 
308
 
309
  def get_lang_table(lang_name):
310
  iso_codes = LANGNAME2ISOS.get(lang_name, [])
@@ -354,17 +419,3 @@ def get_lang_table(lang_name):
354
  table = table.drop(columns=['rank_symbol', '__overall_score_float'])
355
  return table
356
 
357
-
358
-
359
- cluster_tabs, main_overall_tab, all_df, metric_map = load_leaderboards()
360
-
361
- LANGNAME2ISOS = build_langname_to_isos(LANG_ISO2NAME)
362
- #show only African langs
363
- LANG_NAME_LIST = sorted([lang for lang in LANGNAME2ISOS.keys() if lang not in ['eng', 'fra', 'English', 'French']])
364
- # TASK_NAME_LIST = sorted(list(TASKS_LIST.values()))
365
- # Create a list of choices in the format "Task Name (id)"
366
- TASK_NAME_LIST = sorted([f"{name} ({key})" for key, name in TASKS_LIST.items()])
367
- TASK_NAME2KEY = {v: k for k, v in TASKS_LIST.items()}
368
-
369
- # Get the list of unique model names for the new dropdown
370
- MODEL_NAME_LIST = sorted(all_df['model'].unique()) if not all_df.empty else []
 
122
  name2isos[name].add(iso)
123
  return name2isos
124
 
125
+ def compare_models(model_1_name, model_2_name):
126
+ """
127
+ Prepares a DataFrame comparing the performance of two models task-by-task.
128
+ """
129
+ if model_1_name == model_2_name:
130
+ return pd.DataFrame([{"Info": "Please select two different models to compare."}])
131
+
132
+ # Get data for each model from the main leaderboard results
133
+ df1 = all_df[(all_df['model'] == model_1_name) & (all_df['leaderboard'] == 'main')][['task', 'score', 'metric']].rename(columns={'score': model_1_name})
134
+ df2 = all_df[(all_df['model'] == model_2_name) & (all_df['leaderboard'] == 'main')][['task', 'score']].rename(columns={'score': model_2_name})
135
+
136
+ if df1.empty or df2.empty:
137
+ return pd.DataFrame([{"Info": "One or both selected models have no 'main' leaderboard data to compare."}])
138
+
139
+ # Merge the two dataframes on the task ID
140
+ comp_df = pd.merge(df1, df2, on='task', how='outer')
141
+
142
+ # Add descriptive columns
143
+ comp_df['Cluster'] = comp_df['task'].map(TASK_TO_CLUSTER_MAP)
144
+ comp_df['Task Name'] = comp_df['task'].map(TASKS_LIST)
145
+ comp_df['Metric'] = comp_df['metric'].map(metrics_list)
146
+ comp_df.fillna({'Cluster': 'Uncategorized'}, inplace=True)
147
+
148
+ # Calculate the score difference, ensuring scores are numeric
149
+ score1 = pd.to_numeric(comp_df[model_1_name], errors='coerce')
150
+ score2 = pd.to_numeric(comp_df[model_2_name], errors='coerce')
151
+ comp_df['Difference'] = score1 - score2
152
+
153
+ # Format the difference column with colors
154
+ def format_diff(d):
155
+ if pd.isna(d):
156
+ return "---"
157
+ if d > 0.001: # Model 1 is better
158
+ return f"<span style='color:green; font-weight:bold;'>+{d:.2f}</span>"
159
+ elif d < -0.001: # Model 2 is better
160
+ return f"<span style='color:red; font-weight:bold;'>{d:.2f}</span>"
161
+ else:
162
+ return f"{d:.2f}"
163
+
164
+ # Format all score columns
165
+ comp_df[model_1_name] = comp_df[model_1_name].apply(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
166
+ comp_df[model_2_name] = comp_df[model_2_name].apply(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
167
+ comp_df['Difference'] = comp_df['Difference'].apply(format_diff)
168
 
169
+ # --- MODIFIED: Added 'task' to the list of final columns ---
170
+ final_cols = ['Cluster', 'Task Name', 'task', 'Metric', model_1_name, model_2_name, 'Difference']
171
+ comp_df = comp_df[final_cols]
172
+ comp_df = comp_df.sort_values(by=['Cluster', 'Task Name']).reset_index(drop=True)
173
+
174
+ # --- NEW: Renamed 'task' column to 'Task ID' for display ---
175
+ comp_df.rename(columns={'task': 'Task ID'}, inplace=True)
176
+
177
+ return comp_df
178
+
179
  def get_model_table(model_name):
180
  """
181
  Generates a performance table for a specific model, showing cluster, task, and score.
 
358
 
359
 
360
 
361
+ cluster_tabs, main_overall_tab, all_df, metric_map = load_leaderboards()
362
+
363
+ LANGNAME2ISOS = build_langname_to_isos(LANG_ISO2NAME)
364
+ #show only African langs
365
+ LANG_NAME_LIST = sorted([lang for lang in LANGNAME2ISOS.keys() if lang not in ['eng', 'fra', 'English', 'French']])
366
+ # TASK_NAME_LIST = sorted(list(TASKS_LIST.values()))
367
+ # Create a list of choices in the format "Task Name (id)"
368
+ TASK_NAME_LIST = sorted([f"{name} ({key})" for key, name in TASKS_LIST.items()])
369
+ TASK_NAME2KEY = {v: k for k, v in TASKS_LIST.items()}
370
+
371
+ # Get the list of unique model names for the new dropdown
372
+ MODEL_NAME_LIST = sorted(all_df['model'].unique()) if not all_df.empty else []
373
 
374
  def get_lang_table(lang_name):
375
  iso_codes = LANGNAME2ISOS.get(lang_name, [])
 
419
  table = table.drop(columns=['rank_symbol', '__overall_score_float'])
420
  return table
421