Spaces:

UBC-NLP
/

sahara

Running

App Files Files Community

elmadany commited on Jul 15

Commit

6121f8b

verified ·

1 Parent(s): 3513e8e

Upload 6 files

Browse files

Files changed (6) hide show

app.py +230 -0
envs.py +12 -0
helper.py +312 -0
requirements.txt +16 -0
src/envs.py +12 -0
src/helper.py +312 -0

app.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import gradio as gr
+from src.helper import *
+# Custom CSS to replicate the Google-style card design from the image
+custom_head_html = """
+<link rel="stylesheet" href="https://africa.dlnlp.ai/sahara/font-awesome/css/font-awesome.min.css">
+<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css">
+<script src="https://ajax.googleapis.com/ajax/libs/jquery/3.2.0/jquery.min.js"></script>
+<script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js"></script>
+<link rel="stylesheet" type="text/css" href="./public/css/style.min.css">
+<script defer src="./public/js/script.js"></script>
+<link rel="preconnect" href="https://fonts.googleapis.com">
+<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Rubik:wght@400;600&display=swap" rel="stylesheet">
+"""
+new_header_html = """
+<center>
+          <br><br><br>
+          <img src="https://africa.dlnlp.ai/sahara/img/sahara_web_main.jpg" alt="Sahara logo" width="60%">
+        </p>
+</center>
+<br style="height:1px;">
+"""
+google_style_css = """
+div.gradio-container-5-34-1{
+background:#FFFBF5 !important;
+}
+div.svelte-1nguped {
+    background: white !important;
+}
+/* Main Content Area */
+        .content-section {
+            padding: 60px 0;
+        }
+        .content-card {
+            background-color: #fff;
+            border-radius: 12px;
+            box-shadow: 0 10px 15px -3px rgba(0,0,0,0.1), 0 4px 6px -2px rgba(0,0,0,0.05);
+            padding: 40px;
+            margin-bottom: 40px;
+        }
+.btn-cite {
+            color: #7d3561;
+            font-size: 16px;
+            margin: 0 3px; /* Add spacing between multiple icons */
+        }
+        .content-card h4 {
+          font-family: "Rubik", sans-serif;
+          color: #7d3561;
+        }
+        .content-card h2 {
+          font-family: "Rubik", sans-serif;
+  font-size: 30px;
+  font-weight: 600;
+  line-height: 1.25;
+  letter-spacing: -1px;
+  color: #2f3b7d;
+  text-transform:none;
+            /* font-size: 30px;
+            font-weight: bold;
+            color: #D97706; /* Brand Orange */
+            margin-top: 0;
+            margin-bottom: 20px; */
+        }
+        .content-card p {
+            /* font-size: 18px; */
+            /* line-height: 1.7; */
+        }
+div.svelte-wv8on1{
+    # border: 2px solid #074e4a !important;
+    border-top: 0 !important;
+     /* background-color: #fff2eb !important; */
+     padding: 10px !important;
+}
+.padding.svelte-phx28p {
+    padding:0 !important;
+}
+.tab-wrapper.svelte-1tcem6n.svelte-1tcem6n {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    position: relative;
+    height: 0 !important;
+    padding-bottom: 0 !important;
+}
+.selected.svelte-1tcem6n.svelte-1tcem6n {
+    background-color: #7d3561 !important;
+    color: #fff !important;
+}
+.tabs.svelte-1tcem6n.svelte-1tcem6n {
+    /* border: 1px solid #dca02a !important; */
+    border-top: 0 !important;
+    /* background-color: #dca02a !important; */
+}
+button.svelte-1tcem6n.svelte-1tcem6n {
+    color: #7d3561 !important;
+    /* border: 1px solid #dca02a !important; */
+    font-weight: bold;
+    /* font-size: 16px; */
+    padding: 8px 5px;
+}
+.tab-container.svelte-1tcem6n.svelte-1tcem6n:after {
+    content: "";
+    position: absolute;
+    bottom: 0;
+    left: 0;
+    right: 0;
+    height: 2px;
+    background-color: #7d3561 !important;
+}
+.gradio-container-5-34-1 .prose table,
+.gradio-container-5-34-1 .prose tr,
+.gradio-container-5-34-1 .prose td,
+.gradio-container-5-34-1 .prose th {
+    border: 0 !important;
+    border-top: 2px solid #dca02a;
+    border-bottom: 2px solid #dca02a;
+}
+.gradio-container-5-34-1 .prose table {
+    border-top: 2px solid #dca02a !important;
+    border-bottom: 2px solid #dca02a !important;
+    margin-bottom:20px;
+    margin-left: auto;
+    margin-right: auto;
+    width: 100%;
+    border-collapse: collapse;
+    table-layout: fixed;
+}
+.gradio-container-5-34-1 .prose thead tr {
+    border-bottom: 2px solid #dca02a !important;
+}
+.gradio-container-5-34-1 .prose th {
+    color: #7d3561;
+    font-weight: bold;
+    /* font-size: 20px; */
+    padding: 8px 5px;
+    vertical-align: middle;
+    border: 0 !important;
+}
+.gradio-container-5-34-1 .prose td {
+    /* font-size: 18px; */
+    padding: 8px 5px;
+    border: 0 !important;
+    vertical-align: middle;
+}
+.gradio-container-5-34-1 .prose th:first-child,
+.gradio-container-5-34-1 .prose td:first-child {
+    min-width: 400px !important;
+    max-width: 400px !important;
+    width:400px !important;
+    text-align: left !important;
+}
+.gradio-container-5-34-1 .prose th:not(:first-child),
+.gradio-container-5-34-1 .prose td:not(:first-child) {
+    min-width: 90px;
+    max-width: 140px;
+    width: auto;
+    text-align: center;
+}
+"""
+introduction_text = """
+"""
+# with gr.Blocks(title="Sahara Leaderboard", css=custom_css) as demo:
+# with gr.Blocks(title="Sahara Leaderboard") as demo:
+with gr.Blocks(css=google_style_css) as demo:
+    # Use elem_classes to apply our custom CSS to this group
+    gr.HTML(new_header_html)
+    # gr.Markdown(introduction_text)
+    # with gr.Group(elem_classes="content-card"):
+    #     gr.Markdown(introduction_text)
+    #     gr.Markdown(
+    #     "HI # 🏆 Model Evaluation Leaderboard (Clustered, Private HF Dataset)\n"
+    #     "- Language dropdown uses names, not ISO codes; e.g. Swahili = (swa+swh results).\n"
+    #     "- Tabs by cluster; each cluster tab shows only its tasks, with fixed column width via CSS HTML tables."
+    # )
+    with gr.Group(elem_classes="content-card"):
+        gr.Markdown("<br>")
+        with gr.Tabs():
+            # Main leaderboard
+            with gr.Tab("Main Leaderboard"):
+                gr.HTML("<br><br><center><h2>Main Leaderboard</h2></center><br>")
+                gr.HTML(df_to_html(main_overall_tab))
+            # Task Clusters leaderboards
+            with gr.Tab("Task-Clusters Leaderboards"):
+                gr.HTML("<br><br><center><h2>Task-Clusters Leaderboards</h2></center><br>")
+                CLUSTERS_NAME=[cname for cname, cdf in cluster_tabs.items()]
+                clusters_dropdown = gr.Dropdown(choices=CLUSTERS_NAME, label="Select Task-CLuster", interactive=True)
+                def get_claster_table(cluster_name):
+                    for cname, cdf in cluster_tabs.items():
+                        if cname== cluster_name:
+                            return cdf
+                cluster_table_component = gr.HTML(df_to_html(get_claster_table(CLUSTERS_NAME[0])) if CLUSTERS_NAME else "<b>No cluser found</b>")
+                def update_cluster_table(cluster_name):
+                    df = get_claster_table(cluster_name)
+                    return df_to_html(df)
+                clusters_dropdown.change(update_cluster_table, clusters_dropdown, cluster_table_component)
+            # for cname, cdf in cluster_tabs.items():
+            #     with gr.Tab(f"{cname}"):
+            #         gr.HTML(df_to_html(cdf))
+            # Languages Leaderboards
+            with gr.Tab("Language-Specific Leaderboards"):
+                gr.HTML("<br><br><center><h2>Language-Specific Leaderboards</h2></center><br>")
+                lang_dropdown = gr.Dropdown(choices=LANG_NAME_LIST, label="Select Language", interactive=True)
+                lang_table_component = gr.HTML(df_to_html(get_lang_table(LANG_NAME_LIST[0])) if LANG_NAME_LIST else "<b>No languages found</b>")
+                def update_lang_table(lang_name):
+                    df = get_lang_table(lang_name)
+                    return df_to_html(df)
+                lang_dropdown.change(update_lang_table, lang_dropdown, lang_table_component)
+if __name__ == "__main__":
+    demo.launch(share=True)

envs.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import os
+from huggingface_hub import HfApi
+# Info to change for your repository
+# ----------------------------------
+HF_TOKEN = os.environ.get("HF_TOKEN")
+SAHARA_DATA = os.environ.get("SAHARA_DATA")
+SAHARA_RESULTS = os.environ.get("SAHARA_RESULTS")
+API = HfApi(token=HF_TOKEN)

helper.py ADDED Viewed

	@@ -0,0 +1,312 @@

+import pandas as pd
+from statistics import mean
+import pandas as pd
+import json
+import numpy as np
+from statistics import mean
+import re
+from datasets import load_dataset
+import os
+from collections import defaultdict
+from src.envs import API, SAHARA_DATA, SAHARA_RESULTS
+TASKS_LIST={
+    'xlni':'Cross-Lingual Natural Language Inference',
+    'lid':'Language Identification',
+    'news': 'News Classification',
+    'sentiment':'Sentiment Analysis',
+    'topic':'Topic Classification',
+    'mt_eng2xx':'Machine Translation - English to African',
+    'mt_fra2xx':'Machine Translation - French to African',
+    'mt_xx2xx':'Machine Translation - African to African',
+    'paraphrase':'Paraphrase',
+    'summary':'Summarization',
+    'title':'Title Generation',
+    'mmlu':'General Knowledge',
+    'mgsm':'Mathematical Word Problems',
+    'belebele':'Reading Comprehension',
+    'squad_qa':'Context-based Question Answering',
+    'ner':'Named Entity Recognition',
+    'phrase':'Phrase Chunking',
+    'pos':'Part-of-Speech Tagging',
+}
+CLUSTERS = {
+    "Text Classification": [
+        'xlni', 'lid', 'news', 'sentiment', 'topic',
+    ],
+    "Text Generation": [
+        'mt_eng2xx', 'mt_fra2xx', 'mt_xx2xx', 'paraphrase', 'summary', 'title',
+    ],
+    "MCCR": [
+        'mmlu', 'mgsm', 'belebele', 'squad_qa',
+    ],
+    "Tokens": [
+        'ner', 'phrase', 'pos',
+    ],
+}
+ALL_TASKS = [t for cluster in CLUSTERS.values() for t in cluster]
+# ===== Authenticate and Load Data From Private HF Repo =====
+def load_private_leaderboard_df():
+    ds = load_dataset(
+        path=SAHARA_DATA,
+        name=None,
+        data_files=SAHARA_RESULTS,
+        split="train",
+        download_mode="force_redownload"
+    )
+    return ds.to_pandas()
+metrics_list={
+    'bleu_1k':'spBleu<sup>1K</sup>',
+    'accuracy':'Accuracy',
+    'f1':'Macro-F1',
+    'exact_match':'Exact Match',
+    'rougeL':'RougeL',
+}
+LANG_ISO2NAME = {
+    'eng': 'English',
+    'fra': 'French',
+    # 'ara': 'Arabic',
+    'amh': 'Amharic',
+    'ewe': 'Ewe',
+    'hau': 'Hausa',
+    'ibo': 'Igbo',
+    'kin': 'Kinyarwanda',
+    'lin': 'Lingala',
+    'lug': 'Ganda',
+    'orm': 'Oromo',
+    'sna': 'Shona',
+    'sot': 'Southern Sotho',
+    'swa': 'Swahili', 'swh': 'Swahili',
+    'twi': 'Twi',
+    'wol': 'Wolof',
+    'xho': 'Xhosa',
+    'yor': 'Yoruba',
+    'zul': 'Zulu',
+    'afr': 'Afrikaans',
+    'run': 'Rundi',
+    'tir': 'Tigrinya',
+    'som': 'Somali',
+    'pcm': 'Nigerian Pidgin',
+    'teo': 'Teso',
+    'nyn': 'Nyankore/Nyankole',
+    'lgg': 'Lugbara',
+    'bem': 'Bemba/Chibemba',
+    'tsn': 'Tswana',
+    'bbj': 'Ghomálá',
+    'mos': 'Moore',
+    'bam': 'Bambara',
+    'fon': 'Fon',
+    'ach': 'Acholi',
+    'nso': 'Sepedi',
+    'tso': 'Tsonga',
+    'fuv': 'Fulfude Nigeria',
+    'gaz': 'Oromo, West Central',
+    'kea': 'Kabuverdianu',
+    'nya': 'Nyanja',
+    'ssw': 'Swati',
+    'luo': 'Dholuo/Luo',
+    'ven': 'Venda',
+    'kir':"Kirundi",
+}
+# ===== Build Language Name→ISOs map =====
+def build_langname_to_isos(iso2name):
+    name2isos = defaultdict(set)
+    for iso, name in iso2name.items():
+        name2isos[name].add(iso)
+    return name2isos
+LANGNAME2ISOS = build_langname_to_isos(LANG_ISO2NAME)
+#show only African langs
+LANG_NAME_LIST = sorted([lang for lang in LANGNAME2ISOS.keys() if lang not in ['eng', 'fra', 'English', 'French']])
+def get_task_metric_map(df):
+    mapping = {}
+    for _, row in df.iterrows():
+        mapping[row["task"]] = row["metric"]
+    return mapping
+def cluster_average(row, tasks):
+    vals = []
+    for t in tasks:
+        try:
+            v = float(row[t])
+            vals.append(v)
+        except Exception:
+            continue
+    return np.mean(vals) if vals else np.nan
+def add_medals_to_models(df, score_col="overall score"):
+    score_float_col = "__score_float"
+    df[score_float_col] = df[score_col].apply(lambda x: float(x) if x != "---" else np.nan)
+    df = df.sort_values(by=score_float_col, ascending=False, kind="mergesort").reset_index(drop=True)
+    def get_rank_symbols(scores):
+        unique_scores = sorted(set([s for s in scores if not pd.isna(s)]), reverse=True)
+        symbols = ["🏆", "🥈", "🥉"]
+        score_to_symbol = {s: symbols[i] for i, s in enumerate(unique_scores[:3])}
+        return [score_to_symbol.get(s, "") for s in scores]
+    df['rank_symbol'] = get_rank_symbols(df[score_float_col].tolist())
+    df['model'] = df['rank_symbol'] + ' ' + df['model']
+    df = df.drop(columns=['rank_symbol', score_float_col])
+    return df
+def format_cluster_table(df, cluster_tasks, metric_map):
+    col_order = ["model"] + cluster_tasks
+    for t in cluster_tasks:
+        if t not in df.columns:
+            df[t] = '---'
+    df = df[col_order]
+    for t in cluster_tasks:
+        df[t] = df[t].apply(lambda x: f"{x:.2f}" if isinstance(x, (int, float, np.integer, np.floating)) else x)
+    df["Cluster Score"] = df[cluster_tasks].apply(
+        lambda row: cluster_average(row, cluster_tasks), axis=1
+    )
+    df["Cluster Score"] = df["Cluster Score"].apply(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
+    df = df[["model", "Cluster Score"] + cluster_tasks]
+    # rename = {t: f"{t}\n{metric_map.get(t, '')}" for t in cluster_tasks}
+    rename = {t: f"{TASKS_LIST[t]}<br>Metric: {metrics_list[metric_map.get(t, '')]}" for t in cluster_tasks}
+    df = df.rename(columns=rename)
+    df = add_medals_to_models(df, score_col="Cluster Score")
+    return df
+def format_main_overall_table(df, metric_map):
+    main = df.copy()
+    for cname, tasks in CLUSTERS.items():
+        main[cname] = main[tasks].apply(lambda row: cluster_average(row, tasks), axis=1)
+    cluster_cols = list(CLUSTERS.keys())
+    main["Overall Score"] = main[cluster_cols].apply(
+        lambda row: np.nanmean([x for x in row if pd.notna(x)]), axis=1
+    )
+    for c in cluster_cols + ["Overall Score"]:
+        main[c] = main[c].apply(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
+    main = main[["model", "Overall Score"] + cluster_cols]
+    main = add_medals_to_models(main, score_col="Overall Score")
+    main.rename(columns={'Overall Score': 'Sahara Score'}, inplace=True)
+    return main
+def load_leaderboards():
+    df = load_private_leaderboard_df()
+    metric_map = get_task_metric_map(df)
+    main_df = df[df['leaderboard'] == 'main'].copy()
+    if main_df.empty:
+        cluster_tabs = {c: pd.DataFrame([{"Info": "No data"}]) for c in CLUSTERS}
+        main_overall_tab = pd.DataFrame([{"Info": "No data"}])
+        return cluster_tabs, main_overall_tab, [], {}, df, metric_map
+    main_tasks_df = main_df.pivot_table(index='model', columns='task', values='score').reset_index()
+    cluster_tabs = {}
+    for cname, tasks in CLUSTERS.items():
+        cluster_tabs[cname] = format_cluster_table(main_tasks_df, tasks, metric_map)
+    for t in ALL_TASKS:
+        if t not in main_tasks_df.columns:
+            main_tasks_df[t] = np.nan
+    main_overall_tab = format_main_overall_table(main_tasks_df, metric_map)
+    all_langs = sorted([lb for lb in df['leaderboard'].unique() if lb not in ['main']])
+    return cluster_tabs, main_overall_tab, df, metric_map
+def df_to_html(df, col_minwidth=90, col_maxwidth=140, model_col_width=400):
+    # Remove any column whose name contains "task"
+    drop_cols = [col for col in df.columns if "task" in col]
+    df = df.drop(columns=drop_cols, errors="ignore")
+    df.columns.name = None
+    html=""
+    # html = f"""
+    # <style>
+    #     .gradio-container-5-34-1 .prose table {{
+    #         border-top: 2px solid #dca02a;
+    #         border-bottom: 2px solid #dca02a;
+    #         margin-bottom:20px;
+    #         margin-left: auto;
+    #         margin-right: auto;
+    #         width: 100%;
+    #         border-collapse: collapse;
+    #         table-layout: fixed;
+    #     }}
+    #     .gradio-container-5-34-1 .prose thead tr {{
+    #         background: #fffbe9;
+    #         border-bottom: 2px solid #dca02a;
+    #     }}
+    #     .gradio-container-5-34-1 .prose th {{
+    #         color: #7d3561;
+    #         font-weight: bold;
+    #         font-size: 20px;
+    #         background: #fffbe9;
+    #         padding: 8px 5px;
+    #         vertical-align: middle;
+    #         border: 0px solid #e0e0e0;
+    #     }}
+    #     td {{
+    #         font-size: 18px;
+    #         padding: 8px 5px;
+    #         border: 0px solid #e0e0e0;
+    #         vertical-align: middle;
+    #     }}
+    #     th:first-child, td:first-child {{
+    #         min-width: {model_col_width}px !important;
+    #         max-width: {model_col_width}px !important;
+    #         width: {model_col_width}px !important;
+    #         text-align: left !important;
+    #     }}
+    #     th:not(:first-child), td:not(:first-child) {{
+    #         min-width: {col_minwidth}px;
+    #         max-width: {col_maxwidth}px;
+    #         width: auto;
+    #         text-align: center;
+    #     }}
+    # </style>
+    # """
+    html += df.to_html(index=False, escape=False)
+    return html
+cluster_tabs, main_overall_tab, all_df, metric_map = load_leaderboards()
+def get_lang_table(lang_name):
+    iso_codes = LANGNAME2ISOS.get(lang_name, [])
+    if not iso_codes:
+        return pd.DataFrame([{"Info": "No data for this language"}])
+    # Find all leaderboards containing any ISO in this language group
+    pattern = re.compile(r"(^|-)(" + "|".join(re.escape(iso) for iso in iso_codes) + r")(-|$)")
+    matched_langs = [lb for lb in all_df['leaderboard'].unique() if lb not in ['main'] and pattern.search(lb)]
+    lang_df = all_df[all_df['leaderboard'].isin(matched_langs)].copy()
+    if lang_df.empty:
+        return pd.DataFrame([{"Info": "No data for this language"}])
+    def make_task_col(row):
+        lb = row['leaderboard']
+        task = row['task']
+        metric = row['metric']
+        if '-' in lb:
+            pair_lang = lb.split('-')
+            pair = lb.replace('-', '_')
+            # return f"{TASKS_LIST[task]}({task}) {LANG_ISO2NAME[pair_lang[0]]} to {LANG_ISO2NAME[pair_lang[1]]} ({pair})\n{metric}"
+            return f"{TASKS_LIST[task]} <br> {LANG_ISO2NAME[pair_lang[0]]} to {LANG_ISO2NAME[pair_lang[1]]} <br> Metric: {metrics_list[metric]}"
+        else:
+            return f"{TASKS_LIST[task]} <br>  Metric: {metrics_list[metric]}"
+    lang_df['task_col'] = lang_df.apply(make_task_col, axis=1)
+    table = lang_df.pivot_table(index='model', columns='task_col', values='score').reset_index()
+    score_cols = [col for col in table.columns if col != 'model']
+    for col in score_cols:
+        table[col] = table[col].apply(lambda x: f"{x:.2f}" if isinstance(x, (int, float, np.integer, np.floating)) else x)
+    def avg_score(row):
+        vals = []
+        for col in score_cols:
+            try:
+                v = float(row[col])
+                vals.append(v)
+            except Exception:
+                continue
+        return np.mean(vals) if vals else np.nan
+    table.insert(1, 'Language Score', table.apply(avg_score, axis=1).apply(lambda x: f"{x:.2f}" if pd.notna(x) else "---"))
+    table['__overall_score_float'] = table['Language Score'].apply(lambda x: float(x) if x != "---" else np.nan)
+    table = table.sort_values(by='__overall_score_float', ascending=False, kind="mergesort").reset_index(drop=True)
+    def get_rank_symbols(scores):
+        unique_scores = sorted(set([s for s in scores if not pd.isna(s)]), reverse=True)
+        symbols = ["🏆", "🥈", "🥉"]
+        score_to_symbol = {s: symbols[i] for i, s in enumerate(unique_scores[:3])}
+        return [score_to_symbol.get(s, "") for s in scores]
+    table['rank_symbol'] = get_rank_symbols(table['__overall_score_float'].tolist())
+    table['model'] = table['rank_symbol'] + ' ' + table['model']
+    table = table.drop(columns=['rank_symbol', '__overall_score_float'])
+    return table

requirements.txt ADDED Viewed

	@@ -0,0 +1,16 @@

+APScheduler
+black
+datasets
+gradio
+gradio[oauth]
+gradio_leaderboard==0.0.13
+gradio_client
+huggingface-hub>=0.18.0
+matplotlib
+numpy
+pandas
+python-dateutil
+tqdm
+transformers
+tokenizers>=0.15.0
+sentencepiece

src/envs.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import os
+from huggingface_hub import HfApi
+# Info to change for your repository
+# ----------------------------------
+HF_TOKEN = os.environ.get("HF_TOKEN")
+SAHARA_DATA = os.environ.get("SAHARA_DATA")
+SAHARA_RESULTS = os.environ.get("SAHARA_RESULTS")
+API = HfApi(token=HF_TOKEN)

src/helper.py ADDED Viewed

	@@ -0,0 +1,312 @@

+import pandas as pd
+from statistics import mean
+import pandas as pd
+import json
+import numpy as np
+from statistics import mean
+import re
+from datasets import load_dataset
+import os
+from collections import defaultdict
+from src.envs import API, SAHARA_DATA, SAHARA_RESULTS
+TASKS_LIST={
+    'xlni':'Cross-Lingual Natural Language Inference',
+    'lid':'Language Identification',
+    'news': 'News Classification',
+    'sentiment':'Sentiment Analysis',
+    'topic':'Topic Classification',
+    'mt_eng2xx':'Machine Translation - English to African',
+    'mt_fra2xx':'Machine Translation - French to African',
+    'mt_xx2xx':'Machine Translation - African to African',
+    'paraphrase':'Paraphrase',
+    'summary':'Summarization',
+    'title':'Title Generation',
+    'mmlu':'General Knowledge',
+    'mgsm':'Mathematical Word Problems',
+    'belebele':'Reading Comprehension',
+    'squad_qa':'Context-based Question Answering',
+    'ner':'Named Entity Recognition',
+    'phrase':'Phrase Chunking',
+    'pos':'Part-of-Speech Tagging',
+}
+CLUSTERS = {
+    "Text Classification": [
+        'xlni', 'lid', 'news', 'sentiment', 'topic',
+    ],
+    "Text Generation": [
+        'mt_eng2xx', 'mt_fra2xx', 'mt_xx2xx', 'paraphrase', 'summary', 'title',
+    ],
+    "MCCR": [
+        'mmlu', 'mgsm', 'belebele', 'squad_qa',
+    ],
+    "Tokens": [
+        'ner', 'phrase', 'pos',
+    ],
+}
+ALL_TASKS = [t for cluster in CLUSTERS.values() for t in cluster]
+# ===== Authenticate and Load Data From Private HF Repo =====
+def load_private_leaderboard_df():
+    ds = load_dataset(
+        path=SAHARA_DATA,
+        name=None,
+        data_files=SAHARA_RESULTS,
+        split="train",
+        download_mode="force_redownload"
+    )
+    return ds.to_pandas()
+metrics_list={
+    'bleu_1k':'spBleu<sup>1K</sup>',
+    'accuracy':'Accuracy',
+    'f1':'Macro-F1',
+    'exact_match':'Exact Match',
+    'rougeL':'RougeL',
+}
+LANG_ISO2NAME = {
+    'eng': 'English',
+    'fra': 'French',
+    # 'ara': 'Arabic',
+    'amh': 'Amharic',
+    'ewe': 'Ewe',
+    'hau': 'Hausa',
+    'ibo': 'Igbo',
+    'kin': 'Kinyarwanda',
+    'lin': 'Lingala',
+    'lug': 'Ganda',
+    'orm': 'Oromo',
+    'sna': 'Shona',
+    'sot': 'Southern Sotho',
+    'swa': 'Swahili', 'swh': 'Swahili',
+    'twi': 'Twi',
+    'wol': 'Wolof',
+    'xho': 'Xhosa',
+    'yor': 'Yoruba',
+    'zul': 'Zulu',
+    'afr': 'Afrikaans',
+    'run': 'Rundi',
+    'tir': 'Tigrinya',
+    'som': 'Somali',
+    'pcm': 'Nigerian Pidgin',
+    'teo': 'Teso',
+    'nyn': 'Nyankore/Nyankole',
+    'lgg': 'Lugbara',
+    'bem': 'Bemba/Chibemba',
+    'tsn': 'Tswana',
+    'bbj': 'Ghomálá',
+    'mos': 'Moore',
+    'bam': 'Bambara',
+    'fon': 'Fon',
+    'ach': 'Acholi',
+    'nso': 'Sepedi',
+    'tso': 'Tsonga',
+    'fuv': 'Fulfude Nigeria',
+    'gaz': 'Oromo, West Central',
+    'kea': 'Kabuverdianu',
+    'nya': 'Nyanja',
+    'ssw': 'Swati',
+    'luo': 'Dholuo/Luo',
+    'ven': 'Venda',
+    'kir':"Kirundi",
+}
+# ===== Build Language Name→ISOs map =====
+def build_langname_to_isos(iso2name):
+    name2isos = defaultdict(set)
+    for iso, name in iso2name.items():
+        name2isos[name].add(iso)
+    return name2isos
+LANGNAME2ISOS = build_langname_to_isos(LANG_ISO2NAME)
+#show only African langs
+LANG_NAME_LIST = sorted([lang for lang in LANGNAME2ISOS.keys() if lang not in ['eng', 'fra', 'English', 'French']])
+def get_task_metric_map(df):
+    mapping = {}
+    for _, row in df.iterrows():
+        mapping[row["task"]] = row["metric"]
+    return mapping
+def cluster_average(row, tasks):
+    vals = []
+    for t in tasks:
+        try:
+            v = float(row[t])
+            vals.append(v)
+        except Exception:
+            continue
+    return np.mean(vals) if vals else np.nan
+def add_medals_to_models(df, score_col="overall score"):
+    score_float_col = "__score_float"
+    df[score_float_col] = df[score_col].apply(lambda x: float(x) if x != "---" else np.nan)
+    df = df.sort_values(by=score_float_col, ascending=False, kind="mergesort").reset_index(drop=True)
+    def get_rank_symbols(scores):
+        unique_scores = sorted(set([s for s in scores if not pd.isna(s)]), reverse=True)
+        symbols = ["🏆", "🥈", "🥉"]
+        score_to_symbol = {s: symbols[i] for i, s in enumerate(unique_scores[:3])}
+        return [score_to_symbol.get(s, "") for s in scores]
+    df['rank_symbol'] = get_rank_symbols(df[score_float_col].tolist())
+    df['model'] = df['rank_symbol'] + ' ' + df['model']
+    df = df.drop(columns=['rank_symbol', score_float_col])
+    return df
+def format_cluster_table(df, cluster_tasks, metric_map):
+    col_order = ["model"] + cluster_tasks
+    for t in cluster_tasks:
+        if t not in df.columns:
+            df[t] = '---'
+    df = df[col_order]
+    for t in cluster_tasks:
+        df[t] = df[t].apply(lambda x: f"{x:.2f}" if isinstance(x, (int, float, np.integer, np.floating)) else x)
+    df["Cluster Score"] = df[cluster_tasks].apply(
+        lambda row: cluster_average(row, cluster_tasks), axis=1
+    )
+    df["Cluster Score"] = df["Cluster Score"].apply(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
+    df = df[["model", "Cluster Score"] + cluster_tasks]
+    # rename = {t: f"{t}\n{metric_map.get(t, '')}" for t in cluster_tasks}
+    rename = {t: f"{TASKS_LIST[t]}<br>Metric: {metrics_list[metric_map.get(t, '')]}" for t in cluster_tasks}
+    df = df.rename(columns=rename)
+    df = add_medals_to_models(df, score_col="Cluster Score")
+    return df
+def format_main_overall_table(df, metric_map):
+    main = df.copy()
+    for cname, tasks in CLUSTERS.items():
+        main[cname] = main[tasks].apply(lambda row: cluster_average(row, tasks), axis=1)
+    cluster_cols = list(CLUSTERS.keys())
+    main["Overall Score"] = main[cluster_cols].apply(
+        lambda row: np.nanmean([x for x in row if pd.notna(x)]), axis=1
+    )
+    for c in cluster_cols + ["Overall Score"]:
+        main[c] = main[c].apply(lambda x: f"{x:.2f}" if pd.notna(x) else "---")
+    main = main[["model", "Overall Score"] + cluster_cols]
+    main = add_medals_to_models(main, score_col="Overall Score")
+    main.rename(columns={'Overall Score': 'Sahara Score'}, inplace=True)
+    return main
+def load_leaderboards():
+    df = load_private_leaderboard_df()
+    metric_map = get_task_metric_map(df)
+    main_df = df[df['leaderboard'] == 'main'].copy()
+    if main_df.empty:
+        cluster_tabs = {c: pd.DataFrame([{"Info": "No data"}]) for c in CLUSTERS}
+        main_overall_tab = pd.DataFrame([{"Info": "No data"}])
+        return cluster_tabs, main_overall_tab, [], {}, df, metric_map
+    main_tasks_df = main_df.pivot_table(index='model', columns='task', values='score').reset_index()
+    cluster_tabs = {}
+    for cname, tasks in CLUSTERS.items():
+        cluster_tabs[cname] = format_cluster_table(main_tasks_df, tasks, metric_map)
+    for t in ALL_TASKS:
+        if t not in main_tasks_df.columns:
+            main_tasks_df[t] = np.nan
+    main_overall_tab = format_main_overall_table(main_tasks_df, metric_map)
+    all_langs = sorted([lb for lb in df['leaderboard'].unique() if lb not in ['main']])
+    return cluster_tabs, main_overall_tab, df, metric_map
+def df_to_html(df, col_minwidth=90, col_maxwidth=140, model_col_width=400):
+    # Remove any column whose name contains "task"
+    drop_cols = [col for col in df.columns if "task" in col]
+    df = df.drop(columns=drop_cols, errors="ignore")
+    df.columns.name = None
+    html=""
+    # html = f"""
+    # <style>
+    #     .gradio-container-5-34-1 .prose table {{
+    #         border-top: 2px solid #dca02a;
+    #         border-bottom: 2px solid #dca02a;
+    #         margin-bottom:20px;
+    #         margin-left: auto;
+    #         margin-right: auto;
+    #         width: 100%;
+    #         border-collapse: collapse;
+    #         table-layout: fixed;
+    #     }}
+    #     .gradio-container-5-34-1 .prose thead tr {{
+    #         background: #fffbe9;
+    #         border-bottom: 2px solid #dca02a;
+    #     }}
+    #     .gradio-container-5-34-1 .prose th {{
+    #         color: #7d3561;
+    #         font-weight: bold;
+    #         font-size: 20px;
+    #         background: #fffbe9;
+    #         padding: 8px 5px;
+    #         vertical-align: middle;
+    #         border: 0px solid #e0e0e0;
+    #     }}
+    #     td {{
+    #         font-size: 18px;
+    #         padding: 8px 5px;
+    #         border: 0px solid #e0e0e0;
+    #         vertical-align: middle;
+    #     }}
+    #     th:first-child, td:first-child {{
+    #         min-width: {model_col_width}px !important;
+    #         max-width: {model_col_width}px !important;
+    #         width: {model_col_width}px !important;
+    #         text-align: left !important;
+    #     }}
+    #     th:not(:first-child), td:not(:first-child) {{
+    #         min-width: {col_minwidth}px;
+    #         max-width: {col_maxwidth}px;
+    #         width: auto;
+    #         text-align: center;
+    #     }}
+    # </style>
+    # """
+    html += df.to_html(index=False, escape=False)
+    return html
+cluster_tabs, main_overall_tab, all_df, metric_map = load_leaderboards()
+def get_lang_table(lang_name):
+    iso_codes = LANGNAME2ISOS.get(lang_name, [])
+    if not iso_codes:
+        return pd.DataFrame([{"Info": "No data for this language"}])
+    # Find all leaderboards containing any ISO in this language group
+    pattern = re.compile(r"(^|-)(" + "|".join(re.escape(iso) for iso in iso_codes) + r")(-|$)")
+    matched_langs = [lb for lb in all_df['leaderboard'].unique() if lb not in ['main'] and pattern.search(lb)]
+    lang_df = all_df[all_df['leaderboard'].isin(matched_langs)].copy()
+    if lang_df.empty:
+        return pd.DataFrame([{"Info": "No data for this language"}])
+    def make_task_col(row):
+        lb = row['leaderboard']
+        task = row['task']
+        metric = row['metric']
+        if '-' in lb:
+            pair_lang = lb.split('-')
+            pair = lb.replace('-', '_')
+            # return f"{TASKS_LIST[task]}({task}) {LANG_ISO2NAME[pair_lang[0]]} to {LANG_ISO2NAME[pair_lang[1]]} ({pair})\n{metric}"
+            return f"{TASKS_LIST[task]} <br> {LANG_ISO2NAME[pair_lang[0]]} to {LANG_ISO2NAME[pair_lang[1]]} <br> Metric: {metrics_list[metric]}"
+        else:
+            return f"{TASKS_LIST[task]} <br>  Metric: {metrics_list[metric]}"
+    lang_df['task_col'] = lang_df.apply(make_task_col, axis=1)
+    table = lang_df.pivot_table(index='model', columns='task_col', values='score').reset_index()
+    score_cols = [col for col in table.columns if col != 'model']
+    for col in score_cols:
+        table[col] = table[col].apply(lambda x: f"{x:.2f}" if isinstance(x, (int, float, np.integer, np.floating)) else x)
+    def avg_score(row):
+        vals = []
+        for col in score_cols:
+            try:
+                v = float(row[col])
+                vals.append(v)
+            except Exception:
+                continue
+        return np.mean(vals) if vals else np.nan
+    table.insert(1, 'Language Score', table.apply(avg_score, axis=1).apply(lambda x: f"{x:.2f}" if pd.notna(x) else "---"))
+    table['__overall_score_float'] = table['Language Score'].apply(lambda x: float(x) if x != "---" else np.nan)
+    table = table.sort_values(by='__overall_score_float', ascending=False, kind="mergesort").reset_index(drop=True)
+    def get_rank_symbols(scores):
+        unique_scores = sorted(set([s for s in scores if not pd.isna(s)]), reverse=True)
+        symbols = ["🏆", "🥈", "🥉"]
+        score_to_symbol = {s: symbols[i] for i, s in enumerate(unique_scores[:3])}
+        return [score_to_symbol.get(s, "") for s in scores]
+    table['rank_symbol'] = get_rank_symbols(table['__overall_score_float'].tolist())
+    table['model'] = table['rank_symbol'] + ' ' + table['model']
+    table = table.drop(columns=['rank_symbol', '__overall_score_float'])
+    return table