Spaces:

UBC-NLP
/

sahara

Running

App Files Files Community

elmadany commited on 21 days ago

Commit

1409d6f

verified ·

1 Parent(s): e3b1a31

Update src/helper.py

Browse files

Files changed (1) hide show

src/helper.py +65 -14

src/helper.py CHANGED Viewed

@@ -122,7 +122,60 @@ def build_langname_to_isos(iso2name):
         name2isos[name].add(iso)
     return name2isos
 def get_model_table(model_name):
     """
     Generates a performance table for a specific model, showing cluster, task, and score.
@@ -305,6 +358,18 @@ def df_to_html(df, col_minwidth=90, col_maxwidth=140, model_col_width=400):
 def get_lang_table(lang_name):
     iso_codes = LANGNAME2ISOS.get(lang_name, [])
@@ -354,17 +419,3 @@ def get_lang_table(lang_name):
     table = table.drop(columns=['rank_symbol', '__overall_score_float'])
     return table
-cluster_tabs, main_overall_tab, all_df, metric_map = load_leaderboards()
-LANGNAME2ISOS = build_langname_to_isos(LANG_ISO2NAME)
-#show only African langs
-LANG_NAME_LIST = sorted([lang for lang in LANGNAME2ISOS.keys() if lang not in ['eng', 'fra', 'English', 'French']])
-# TASK_NAME_LIST = sorted(list(TASKS_LIST.values()))
-# Create a list of choices in the format "Task Name (id)"
-TASK_NAME_LIST = sorted([f"{name} ({key})" for key, name in TASKS_LIST.items()])
-TASK_NAME2KEY = {v: k for k, v in TASKS_LIST.items()}
-# Get the list of unique model names for the new dropdown
-MODEL_NAME_LIST = sorted(all_df['model'].unique()) if not all_df.empty else []

         name2isos[name].add(iso)
     return name2isos
+def compare_models(model_1_name, model_2_name):
+    """
+    Prepares a DataFrame comparing the performance of two models task-by-task.
+    """
+    if model_1_name == model_2_name:
+        return pd.DataFrame([{"Info": "Please select two different models to compare."}])
+    # Get data for each model from the main leaderboard results
+    df1 = all_df[(all_df['model'] == model_1_name) & (all_df['leaderboard'] == 'main')][['task', 'score', 'metric']].rename(columns={'score': model_1_name})
+    df2 = all_df[(all_df['model'] == model_2_name) & (all_df['leaderboard'] == 'main')][['task', 'score']].rename(columns={'score': model_2_name})
+    if df1.empty or df2.empty:
+        return pd.DataFrame([{"Info": "One or both selected models have no 'main' leaderboard data to compare."}])
+    # Merge the two dataframes on the task ID
+    comp_df = pd.merge(df1, df2, on='task', how='outer')
+    # Add descriptive columns
+    comp_df['Cluster'] = comp_df['task'].map(TASK_TO_CLUSTER_MAP)
+    comp_df['Task Name'] = comp_df['task'].map(TASKS_LIST)
+    comp_df['Metric'] = comp_df['metric'].map(metrics_list)
+    comp_df.fillna({'Cluster': 'Uncategorized'}, inplace=True)
+    # Calculate the score difference, ensuring scores are numeric
+    score1 = pd.to_numeric(comp_df[model_1_name], errors='coerce')
+    score2 = pd.to_numeric(comp_df[model_2_name], errors='coerce')
+    comp_df['Difference'] = score1 - score2
+    # Format the difference column with colors
+    def format_diff(d):
+        if pd.isna(d):
+            return "---"
+        if d > 0.001:  # Model 1 is better
+            return f"<span style='color:green; font-weight:bold;'>+{d:.2f}</span>"
+        elif d < -0.001:  # Model 2 is better
+            return f"<span style='color:red; font-weight:bold;'>{d:.2f}</span>"
+        else:
+            return f"{d:.2f}"
+    # Format all score columns
+    comp_df[model_1_name] = comp_df[model_1_name].apply(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
+    comp_df[model_2_name] = comp_df[model_2_name].apply(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
+    comp_df['Difference'] = comp_df['Difference'].apply(format_diff)
+    # --- MODIFIED: Added 'task' to the list of final columns ---
+    final_cols = ['Cluster', 'Task Name', 'task', 'Metric', model_1_name, model_2_name, 'Difference']
+    comp_df = comp_df[final_cols]
+    comp_df = comp_df.sort_values(by=['Cluster', 'Task Name']).reset_index(drop=True)
+    # --- NEW: Renamed 'task' column to 'Task ID' for display ---
+    comp_df.rename(columns={'task': 'Task ID'}, inplace=True)
+    return comp_df
 def get_model_table(model_name):
     """
     Generates a performance table for a specific model, showing cluster, task, and score.
+cluster_tabs, main_overall_tab, all_df, metric_map = load_leaderboards()
+LANGNAME2ISOS = build_langname_to_isos(LANG_ISO2NAME)
+#show only African langs
+LANG_NAME_LIST = sorted([lang for lang in LANGNAME2ISOS.keys() if lang not in ['eng', 'fra', 'English', 'French']])
+# TASK_NAME_LIST = sorted(list(TASKS_LIST.values()))
+# Create a list of choices in the format "Task Name (id)"
+TASK_NAME_LIST = sorted([f"{name} ({key})" for key, name in TASKS_LIST.items()])
+TASK_NAME2KEY = {v: k for k, v in TASKS_LIST.items()}
+# Get the list of unique model names for the new dropdown
+MODEL_NAME_LIST = sorted(all_df['model'].unique()) if not all_df.empty else []
 def get_lang_table(lang_name):
     iso_codes = LANGNAME2ISOS.get(lang_name, [])
     table = table.drop(columns=['rank_symbol', '__overall_score_float'])
     return table