Spaces:
Sleeping
Sleeping
# import gradio as gr | |
# import pandas as pd | |
# from datasets import load_dataset | |
# from jiwer import wer, cer | |
# import os | |
# from datetime import datetime | |
# import re | |
# from huggingface_hub import login | |
# # Login to Hugging Face Hub (if token is available) | |
# token = os.environ.get("HG_TOKEN") | |
# if token: | |
# login(token) | |
# try: | |
# dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"] | |
# references = {row["id"]: row["text"] for row in dataset} | |
# print(f"Loaded {len(references)} reference transcriptions") | |
# except Exception as e: | |
# print(f"Error loading dataset: {str(e)}") | |
# references = {} | |
# leaderboard_file = "leaderboard.csv" | |
# if not os.path.exists(leaderboard_file): | |
# sample_data = [ | |
# ["test_1", 0.2264, 0.1094, 0.1922, "2025-03-15 10:30:45"], | |
# ["test_2", 0.3264, 0.1094, 0.1922, "2025-03-15 10:30:45"], | |
# ] | |
# pd.DataFrame(sample_data, | |
# columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]).to_csv(leaderboard_file, index=False) | |
# print(f"Created new leaderboard file with sample data") | |
# else: | |
# leaderboard_df = pd.read_csv(leaderboard_file) | |
# if "Combined_Score" not in leaderboard_df.columns: | |
# leaderboard_df["Combined_Score"] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3 | |
# leaderboard_df.to_csv(leaderboard_file, index=False) | |
# print(f"Added Combined_Score column to existing leaderboard") | |
# print(f"Loaded leaderboard with {len(leaderboard_df)} entries") | |
# def normalize_text(text): | |
# """Normalize text for WER/CER calculation""" | |
# if not isinstance(text, str): | |
# text = str(text) | |
# text = text.lower() | |
# text = re.sub(r'[^\w\s]', '', text) | |
# text = re.sub(r'\s+', ' ', text).strip() | |
# return text | |
# def calculate_metrics(predictions_df): | |
# """Calculate WER and CER for predictions.""" | |
# results = [] | |
# total_ref_words = 0 | |
# total_ref_chars = 0 | |
# for _, row in predictions_df.iterrows(): | |
# id_val = row["id"] | |
# if id_val not in references: | |
# continue | |
# reference = normalize_text(references[id_val]) | |
# hypothesis = normalize_text(row["text"]) | |
# if not reference or not hypothesis: | |
# continue | |
# reference_words = reference.split() | |
# hypothesis_words = hypothesis.split() | |
# reference_chars = list(reference) | |
# try: | |
# sample_wer = wer(reference, hypothesis) | |
# sample_cer = cer(reference, hypothesis) | |
# sample_wer = min(sample_wer, 2.0) | |
# sample_cer = min(sample_cer, 2.0) | |
# total_ref_words += len(reference_words) | |
# total_ref_chars += len(reference_chars) | |
# results.append({ | |
# "id": id_val, | |
# "reference": reference, | |
# "hypothesis": hypothesis, | |
# "ref_word_count": len(reference_words), | |
# "ref_char_count": len(reference_chars), | |
# "wer": sample_wer, | |
# "cer": sample_cer | |
# }) | |
# except Exception as e: | |
# print(f"Error processing sample {id_val}: {str(e)}") | |
# pass | |
# if not results: | |
# raise ValueError("No valid samples for WER/CER calculation") | |
# avg_wer = sum(item["wer"] for item in results) / len(results) | |
# avg_cer = sum(item["cer"] for item in results) / len(results) | |
# weighted_wer = sum(item["wer"] * item["ref_word_count"] for item in results) / total_ref_words | |
# weighted_cer = sum(item["cer"] * item["ref_char_count"] for item in results) / total_ref_chars | |
# return avg_wer, avg_cer, weighted_wer, weighted_cer, results | |
# def format_as_percentage(value): | |
# """Convert decimal to percentage with 2 decimal places""" | |
# return f"{value * 100:.2f}%" | |
# def prepare_leaderboard_for_display(df, sort_by="Combined_Score"): | |
# """Format leaderboard for display with ranking and percentages""" | |
# if df is None or len(df) == 0: | |
# return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"]) | |
# display_df = df.copy() | |
# display_df = display_df.sort_values(sort_by) | |
# display_df.insert(0, "Rank", range(1, len(display_df) + 1)) | |
# for col in ["WER", "CER", "Combined_Score"]: | |
# if col in display_df.columns: | |
# display_df[f"{col} (%)"] = display_df[col].apply(lambda x: f"{x * 100:.2f}") | |
# return display_df | |
# def update_ranking(method): | |
# """Update leaderboard ranking based on selected method""" | |
# try: | |
# current_lb = pd.read_csv(leaderboard_file) | |
# if "Combined_Score" not in current_lb.columns: | |
# current_lb["Combined_Score"] = current_lb["WER"] * 0.7 + current_lb["CER"] * 0.3 | |
# sort_column = "Combined_Score" | |
# if method == "WER Only": | |
# sort_column = "WER" | |
# elif method == "CER Only": | |
# sort_column = "CER" | |
# return prepare_leaderboard_for_display(current_lb, sort_column) | |
# except Exception as e: | |
# print(f"Error updating ranking: {str(e)}") | |
# return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"]) | |
# def process_submission(model_name, csv_file): | |
# """Process a new model submission""" | |
# if not model_name or not model_name.strip(): | |
# return "Error: Please provide a model name.", None | |
# if not csv_file: | |
# return "Error: Please upload a CSV file.", None | |
# try: | |
# df = pd.read_csv(csv_file) | |
# if len(df) == 0: | |
# return "Error: Uploaded CSV is empty.", None | |
# if set(df.columns) != {"id", "text"}: | |
# return f"Error: CSV must contain exactly 'id' and 'text' columns. Found: {', '.join(df.columns)}", None | |
# if df["id"].duplicated().any(): | |
# dup_ids = df[df["id"].duplicated()]["id"].unique() | |
# return f"Error: Duplicate IDs found: {', '.join(map(str, dup_ids[:5]))}", None | |
# missing_ids = set(references.keys()) - set(df["id"]) | |
# extra_ids = set(df["id"]) - set(references.keys()) | |
# if missing_ids: | |
# return f"Error: Missing {len(missing_ids)} IDs in submission. First few missing: {', '.join(map(str, list(missing_ids)[:5]))}", None | |
# if extra_ids: | |
# return f"Error: Found {len(extra_ids)} extra IDs not in reference dataset. First few extra: {', '.join(map(str, list(extra_ids)[:5]))}", None | |
# try: | |
# avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df) | |
# # Check for suspiciously low values | |
# if avg_wer < 0.001: | |
# return "Error: WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None | |
# except Exception as e: | |
# return f"Error calculating metrics: {str(e)}", None | |
# leaderboard = pd.read_csv(leaderboard_file) | |
# timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
# combined_score = avg_wer * 0.7 + avg_cer * 0.3 | |
# if model_name in leaderboard["Model_Name"].values: | |
# idx = leaderboard[leaderboard["Model_Name"] == model_name].index | |
# leaderboard.loc[idx, "WER"] = avg_wer | |
# leaderboard.loc[idx, "CER"] = avg_cer | |
# leaderboard.loc[idx, "Combined_Score"] = combined_score | |
# leaderboard.loc[idx, "timestamp"] = timestamp | |
# updated_leaderboard = leaderboard | |
# else: | |
# new_entry = pd.DataFrame( | |
# [[model_name, avg_wer, avg_cer, combined_score, timestamp]], | |
# columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"] | |
# ) | |
# updated_leaderboard = pd.concat([leaderboard, new_entry]) | |
# updated_leaderboard = updated_leaderboard.sort_values("Combined_Score") | |
# updated_leaderboard.to_csv(leaderboard_file, index=False) | |
# display_leaderboard = prepare_leaderboard_for_display(updated_leaderboard) | |
# return f"Submission processed successfully! WER: {format_as_percentage(avg_wer)}, CER: {format_as_percentage(avg_cer)}, Combined Score: {format_as_percentage(combined_score)}", display_leaderboard | |
# except Exception as e: | |
# return f"Error processing submission: {str(e)}", None | |
# def get_current_leaderboard(): | |
# """Get the current leaderboard data for display""" | |
# try: | |
# if os.path.exists(leaderboard_file): | |
# current_leaderboard = pd.read_csv(leaderboard_file) | |
# if "Combined_Score" not in current_leaderboard.columns: | |
# current_leaderboard["Combined_Score"] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3 | |
# current_leaderboard.to_csv(leaderboard_file, index=False) | |
# return current_leaderboard | |
# else: | |
# return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]) | |
# except Exception as e: | |
# print(f"Error getting leaderboard: {str(e)}") | |
# return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]) | |
# def create_leaderboard_table(): | |
# """Create and format the leaderboard table for display""" | |
# leaderboard_data = get_current_leaderboard() | |
# return prepare_leaderboard_for_display(leaderboard_data) | |
# with gr.Blocks(title="Bambara ASR Leaderboard") as demo: | |
# gr.Markdown( | |
# """ | |
# # π²π± Bambara ASR Leaderboard | |
# This leaderboard tracks and evaluates speech recognition models for the Bambara language. | |
# Models are ranked based on Word Error Rate (WER), Character Error Rate (CER), and a combined score. | |
# ## Current Models Performance | |
# """ | |
# ) | |
# current_data = get_current_leaderboard() | |
# if len(current_data) > 0: | |
# best_model = current_data.sort_values("Combined_Score").iloc[0] | |
# gr.Markdown(f""" | |
# ### π Current Best Model: **{best_model['Model_Name']}** | |
# * WER: **{best_model['WER']*100:.2f}%** | |
# * CER: **{best_model['CER']*100:.2f}%** | |
# * Combined Score: **{best_model['Combined_Score']*100:.2f}%** | |
# """) | |
# with gr.Tabs() as tabs: | |
# with gr.TabItem("π Model Rankings"): | |
# initial_leaderboard = create_leaderboard_table() | |
# ranking_method = gr.Radio( | |
# ["Combined Score (WER 70%, CER 30%)", "WER Only", "CER Only"], | |
# label="Ranking Method", | |
# value="Combined Score (WER 70%, CER 30%)" | |
# ) | |
# leaderboard_view = gr.DataFrame( | |
# value=initial_leaderboard, | |
# interactive=False, | |
# label="Models are ranked by selected metric - lower is better" | |
# ) | |
# ranking_method.change( | |
# fn=update_ranking, | |
# inputs=[ranking_method], | |
# outputs=[leaderboard_view] | |
# ) | |
# with gr.Accordion("Metrics Explanation", open=False): | |
# gr.Markdown( | |
# """ | |
# ## Understanding ASR Metrics | |
# ### Word Error Rate (WER) | |
# WER measures how accurately the ASR system recognizes whole words: | |
# * Lower values indicate better performance | |
# * Calculated as: (Substitutions + Insertions + Deletions) / Total Words | |
# * A WER of 0% means perfect transcription | |
# * A WER of 20% means approximately 1 in 5 words contains an error | |
# ### Character Error Rate (CER) | |
# CER measures accuracy at the character level: | |
# * More fine-grained than WER | |
# * Better at capturing partial word matches | |
# * Particularly useful for agglutinative languages like Bambara | |
# ### Combined Score | |
# * Weighted average: 70% WER + 30% CER | |
# * Provides a balanced evaluation of model performance | |
# * Used as the primary ranking metric | |
# """ | |
# ) | |
# with gr.TabItem("π Submit New Results"): | |
# gr.Markdown( | |
# """ | |
# ### Submit a new model for evaluation | |
# Upload a CSV file with the following format: | |
# * Must contain exactly two columns: 'id' and 'text' | |
# * The 'id' column should match the reference dataset IDs | |
# * The 'text' column should contain your model's transcriptions | |
# """ | |
# ) | |
# with gr.Row(): | |
# model_name_input = gr.Textbox( | |
# label="Model Name", | |
# placeholder="e.g., MALIBA-AI/bambara-asr" | |
# ) | |
# gr.Markdown("*Use a descriptive name to identify your model*") | |
# with gr.Row(): | |
# csv_upload = gr.File( | |
# label="Upload CSV File", | |
# file_types=[".csv"] | |
# ) | |
# gr.Markdown("*CSV with columns: id, text*") | |
# submit_btn = gr.Button("Submit", variant="primary") | |
# output_msg = gr.Textbox(label="Status", interactive=False) | |
# leaderboard_display = gr.DataFrame( | |
# label="Updated Leaderboard", | |
# value=initial_leaderboard, | |
# interactive=False | |
# ) | |
# submit_btn.click( | |
# fn=process_submission, | |
# inputs=[model_name_input, csv_upload], | |
# outputs=[output_msg, leaderboard_display] | |
# ) | |
# with gr.TabItem("π Benchmark Dataset"): | |
# gr.Markdown( | |
# """ | |
# ## About the Benchmark Dataset | |
# This leaderboard uses the **[sudoping01/bambara-speech-recognition-benchmark](https://huggingface.co/datasets/MALIBA-AI/bambara-speech-recognition-leaderboard)** dataset: | |
# * Contains diverse Bambara speech samples | |
# * Includes various speakers, accents, and dialects | |
# * Covers different speech styles and recording conditions | |
# * Transcribed and validated | |
# ### How to Generate Predictions | |
# To submit results to this leaderboard: | |
# 1. Download the audio files from the benchmark dataset | |
# 2. Run your ASR model on the audio files | |
# 3. Generate a CSV file with 'id' and 'text' columns | |
# 4. Submit your results using the form in the "Submit New Results" tab | |
# ### Evaluation Guidelines | |
# * Text is normalized (lowercase, punctuation removed) before metrics calculation | |
# * Extreme outliers are capped to prevent skewing results | |
# * All submissions are validated for format and completeness | |
# NB: This work is a collaboration between MALIBA-AI, RobotsMali AI4D-LAB and Djelia | |
# """ | |
# ) | |
# gr.Markdown( | |
# """ | |
# --- | |
# ### About MALIBA-AI | |
# **MALIBA-AI: Empowering Mali's Future Through Community-Driven AI Innovation** | |
# *"No Malian Language Left Behind"* | |
# This leaderboard is maintained by the MALIBA-AI initiative to track progress in Bambara speech recognition technology. | |
# For more information, visit [MALIBA-AI on Hugging Face](https://huggingface.co/MALIBA-AI). | |
# """ | |
# ) | |
# if __name__ == "__main__": | |
# demo.launch() | |
import gradio as gr | |
import pandas as pd | |
from datasets import load_dataset | |
from jiwer import wer, cer | |
import os | |
from datetime import datetime | |
import re | |
import plotly.express as px | |
import plotly.graph_objects as go | |
from huggingface_hub import login | |
import numpy as np | |
# Custom CSS inspired by Sahara leaderboard | |
custom_head_html = """ | |
<link rel="preconnect" href="https://fonts.googleapis.com"> | |
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> | |
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Rubik:wght@400;600&display=swap" rel="stylesheet"> | |
""" | |
# Header with MALIBA-AI branding | |
new_header_html = """ | |
<center> | |
<br><br> | |
<div style="display: flex; align-items: center; justify-content: center; gap: 20px; margin-bottom: 20px;"> | |
<div style="font-size: 4em;">π²π±</div> | |
<div> | |
<h1 style="margin: 0; font-family: 'Rubik', sans-serif; color: #2f3b7d; font-size: 2.5em; font-weight: 700;"> | |
Bambara ASR Leaderboard | |
</h1> | |
<p style="margin: 5px 0 0 0; font-size: 1.2em; color: #7d3561; font-weight: 600;"> | |
Powered by MALIBA-AI β’ "No Malian Language Left Behind" | |
</p> | |
</div> | |
<div style="font-size: 4em;">ποΈ</div> | |
</div> | |
</center> | |
""" | |
# Advanced CSS styling inspired by Sahara | |
sahara_style_css = """ | |
/* Global Styles */ | |
div[class*="gradio-container"] { | |
background: #FFFBF5 !important; | |
color: #000 !important; | |
font-family: 'Inter', sans-serif !important; | |
} | |
div.svelte-1nguped { | |
background: white !important; | |
} | |
.fillable.svelte-15jxnnn.svelte-15jxnnn:not(.fill_width) { | |
max-width: 1580px !important; | |
} | |
/* Navigation Buttons */ | |
.nav-button { | |
background-color: #117b75 !important; | |
color: #fff !important; | |
font-weight: bold !important; | |
border-radius: 8px !important; | |
border: none !important; | |
box-shadow: 0 2px 4px rgba(0,0,0,0.1) !important; | |
transition: all 0.3s ease !important; | |
} | |
.nav-button:hover { | |
background-color: #0f6b66 !important; | |
color: #e8850e !important; | |
transform: translateY(-1px) !important; | |
box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important; | |
} | |
/* Content Cards */ | |
.content-section { | |
padding: 40px 0; | |
} | |
.content-card { | |
background-color: #fff !important; | |
border-radius: 16px !important; | |
box-shadow: 0 10px 25px -5px rgba(0,0,0,0.1), 0 8px 10px -6px rgba(0,0,0,0.1) !important; | |
padding: 40px !important; | |
margin-bottom: 30px !important; | |
border: 1px solid rgba(0,0,0,0.05) !important; | |
} | |
/* Typography */ | |
.content-card h2 { | |
font-family: "Rubik", sans-serif !important; | |
font-size: 32px !important; | |
font-weight: 700 !important; | |
line-height: 1.25 !important; | |
letter-spacing: -1px !important; | |
color: #2f3b7d !important; | |
margin-bottom: 20px !important; | |
text-align: center !important; | |
} | |
.content-card h3 { | |
font-size: 22px !important; | |
color: #2f3b7d !important; | |
font-weight: 600 !important; | |
margin-bottom: 15px !important; | |
} | |
.content-card h4 { | |
font-family: "Rubik", sans-serif !important; | |
color: #7d3561 !important; | |
font-weight: 600 !important; | |
margin-bottom: 10px !important; | |
} | |
.title { | |
color: #7d3561 !important; | |
font-weight: 600 !important; | |
} | |
/* Tab Styling */ | |
.tab-wrapper.svelte-1tcem6n.svelte-1tcem6n { | |
display: flex; | |
align-items: center; | |
justify-content: space-between; | |
position: relative; | |
height: auto !important; | |
padding-bottom: 0 !important; | |
} | |
.selected.svelte-1tcem6n.svelte-1tcem6n { | |
background-color: #7d3561 !important; | |
color: #fff !important; | |
border-radius: 8px 8px 0 0 !important; | |
} | |
button.svelte-1tcem6n.svelte-1tcem6n { | |
color: #7d3561 !important; | |
font-weight: 600 !important; | |
font-size: 16px !important; | |
padding: 12px 20px !important; | |
background-color: #fff !important; | |
border-radius: 8px 8px 0 0 !important; | |
border: 2px solid #e9ecef !important; | |
border-bottom: none !important; | |
transition: all 0.3s ease !important; | |
} | |
button.svelte-1tcem6n.svelte-1tcem6n:hover { | |
background-color: #f8f9fa !important; | |
border-color: #7d3561 !important; | |
} | |
.tab-container.svelte-1tcem6n.svelte-1tcem6n:after { | |
content: ""; | |
position: absolute; | |
bottom: 0; | |
left: 0; | |
right: 0; | |
height: 3px; | |
background: linear-gradient(90deg, #7d3561 0%, #2f3b7d 100%) !important; | |
} | |
/* Table Styling */ | |
div[class*="gradio-container"] .prose table { | |
color: #000 !important; | |
border: 2px solid #dca02a !important; | |
border-radius: 12px !important; | |
margin-bottom: 20px !important; | |
margin-left: auto !important; | |
margin-right: auto !important; | |
width: 100% !important; | |
border-collapse: separate !important; | |
border-spacing: 0 !important; | |
overflow: hidden !important; | |
box-shadow: 0 4px 6px rgba(0,0,0,0.1) !important; | |
} | |
div[class*="gradio-container"] .prose thead tr { | |
background: linear-gradient(90deg, #7d3561 0%, #2f3b7d 100%) !important; | |
} | |
div[class*="gradio-container"] .prose th { | |
color: #fff !important; | |
font-weight: 700 !important; | |
font-size: 14px !important; | |
padding: 15px 10px !important; | |
text-align: center !important; | |
border: none !important; | |
} | |
div[class*="gradio-container"] .prose td { | |
font-size: 14px !important; | |
padding: 12px 10px !important; | |
border: none !important; | |
text-align: center !important; | |
color: #000 !important; | |
border-bottom: 1px solid #f8f9fa !important; | |
} | |
div[class*="gradio-container"] .prose tbody tr:nth-child(even) { | |
background-color: #f8f9fa !important; | |
} | |
div[class*="gradio-container"] .prose tbody tr:hover { | |
background-color: #e3f2fd !important; | |
transition: background-color 0.2s ease !important; | |
} | |
/* First column (model names) styling */ | |
div[class*="gradio-container"] .prose th:first-child, | |
div[class*="gradio-container"] .prose td:first-child { | |
text-align: left !important; | |
min-width: 250px !important; | |
font-weight: 600 !important; | |
} | |
/* Performance badges */ | |
.performance-badge { | |
display: inline-block; | |
padding: 4px 8px; | |
border-radius: 12px; | |
font-size: 12px; | |
font-weight: 600; | |
margin-left: 8px; | |
} | |
.badge-excellent { | |
background: #d4edda; | |
color: #155724; | |
} | |
.badge-good { | |
background: #fff3cd; | |
color: #856404; | |
} | |
.badge-fair { | |
background: #f8d7da; | |
color: #721c24; | |
} | |
/* Stats cards */ | |
.stats-grid { | |
display: grid; | |
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); | |
gap: 20px; | |
margin: 20px 0; | |
} | |
.stat-card { | |
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); | |
color: white; | |
padding: 20px; | |
border-radius: 12px; | |
text-align: center; | |
box-shadow: 0 4px 6px rgba(0,0,0,0.1); | |
} | |
.stat-number { | |
font-size: 2em; | |
font-weight: 700; | |
margin-bottom: 5px; | |
} | |
.stat-label { | |
font-size: 0.9em; | |
opacity: 0.9; | |
} | |
/* Form styling */ | |
.form-section { | |
background: #f8f9fa; | |
border-radius: 12px; | |
padding: 25px; | |
margin: 20px 0; | |
border-left: 4px solid #7d3561; | |
} | |
/* Citation block */ | |
.citation-block { | |
background-color: #FDF6E3 !important; | |
border-radius: 12px !important; | |
padding: 25px !important; | |
border-left: 4px solid #D97706 !important; | |
margin: 20px 0 !important; | |
} | |
/* Dropdown styling */ | |
.gradio-dropdown { | |
border-radius: 8px !important; | |
border: 2px solid #e9ecef !important; | |
} | |
.gradio-dropdown:focus { | |
border-color: #7d3561 !important; | |
box-shadow: 0 0 0 3px rgba(125, 53, 97, 0.1) !important; | |
} | |
/* Button styling */ | |
.gradio-button { | |
border-radius: 8px !important; | |
font-weight: 600 !important; | |
transition: all 0.3s ease !important; | |
} | |
.gradio-button.primary { | |
background: linear-gradient(135deg, #7d3561 0%, #2f3b7d 100%) !important; | |
border: none !important; | |
color: white !important; | |
} | |
.gradio-button.primary:hover { | |
transform: translateY(-2px) !important; | |
box-shadow: 0 4px 12px rgba(125, 53, 97, 0.3) !important; | |
} | |
/* Responsive design */ | |
@media (max-width: 768px) { | |
.content-card { | |
padding: 20px !important; | |
margin-bottom: 20px !important; | |
} | |
.content-card h2 { | |
font-size: 24px !important; | |
} | |
.stats-grid { | |
grid-template-columns: 1fr !important; | |
} | |
} | |
""" | |
# Login to Hugging Face Hub (if token is available) | |
token = os.environ.get("HG_TOKEN") | |
if token: | |
login(token) | |
# Load dataset | |
try: | |
dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"] | |
references = {row["id"]: row["text"] for row in dataset} | |
print(f"Loaded {len(references)} reference transcriptions") | |
except Exception as e: | |
print(f"Error loading dataset: {str(e)}") | |
references = {} | |
# Initialize leaderboard | |
leaderboard_file = "leaderboard.csv" | |
if not os.path.exists(leaderboard_file): | |
sample_data = [ | |
["MALIBA-AI/bambara-whisper-small", 0.2264, 0.1094, 0.1922, "2025-03-15 10:30:45", "Whisper-based", "Mali", "ASR"], | |
["OpenAI/whisper-base", 0.3264, 0.1094, 0.1922, "2025-03-15 10:30:45", "Foundation", "USA", "ASR"], | |
] | |
pd.DataFrame(sample_data, | |
columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"]).to_csv(leaderboard_file, index=False) | |
print(f"Created new leaderboard file with sample data") | |
else: | |
leaderboard_df = pd.read_csv(leaderboard_file) | |
# Add new columns if they don't exist | |
required_columns = ["Combined_Score", "Type", "Origin", "Task"] | |
for col in required_columns: | |
if col not in leaderboard_df.columns: | |
if col == "Combined_Score": | |
leaderboard_df[col] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3 | |
else: | |
default_val = "Unknown" if col != "Task" else "ASR" | |
leaderboard_df[col] = default_val | |
leaderboard_df.to_csv(leaderboard_file, index=False) | |
print(f"Loaded leaderboard with {len(leaderboard_df)} entries") | |
def normalize_text(text): | |
"""Normalize text for WER/CER calculation""" | |
if not isinstance(text, str): | |
text = str(text) | |
text = text.lower() | |
text = re.sub(r'[^\w\s]', '', text) | |
text = re.sub(r'\s+', ' ', text).strip() | |
return text | |
def calculate_metrics(predictions_df): | |
"""Calculate WER and CER for predictions.""" | |
results = [] | |
total_ref_words = 0 | |
total_ref_chars = 0 | |
for _, row in predictions_df.iterrows(): | |
id_val = row["id"] | |
if id_val not in references: | |
continue | |
reference = normalize_text(references[id_val]) | |
hypothesis = normalize_text(row["text"]) | |
if not reference or not hypothesis: | |
continue | |
reference_words = reference.split() | |
hypothesis_words = hypothesis.split() | |
reference_chars = list(reference) | |
try: | |
sample_wer = wer(reference, hypothesis) | |
sample_cer = cer(reference, hypothesis) | |
sample_wer = min(sample_wer, 2.0) | |
sample_cer = min(sample_cer, 2.0) | |
total_ref_words += len(reference_words) | |
total_ref_chars += len(reference_chars) | |
results.append({ | |
"id": id_val, | |
"reference": reference, | |
"hypothesis": hypothesis, | |
"ref_word_count": len(reference_words), | |
"ref_char_count": len(reference_chars), | |
"wer": sample_wer, | |
"cer": sample_cer | |
}) | |
except Exception as e: | |
print(f"Error processing sample {id_val}: {str(e)}") | |
pass | |
if not results: | |
raise ValueError("No valid samples for WER/CER calculation") | |
avg_wer = sum(item["wer"] for item in results) / len(results) | |
avg_cer = sum(item["cer"] for item in results) / len(results) | |
weighted_wer = sum(item["wer"] * item["ref_word_count"] for item in results) / total_ref_words | |
weighted_cer = sum(item["cer"] * item["ref_char_count"] for item in results) / total_ref_chars | |
return avg_wer, avg_cer, weighted_wer, weighted_cer, results | |
def format_as_percentage(value): | |
"""Convert decimal to percentage with 2 decimal places""" | |
return f"{value * 100:.2f}%" | |
def get_performance_badge(score): | |
"""Get performance badge based on score""" | |
if score < 0.15: | |
return "π Excellent" | |
elif score < 0.30: | |
return "π₯ Good" | |
else: | |
return "π Fair" | |
def add_medals_to_models(df, score_col="Combined_Score"): | |
"""Add medals to top-performing models""" | |
if df.empty or score_col not in df.columns: | |
return df | |
df_copy = df.copy() | |
# Convert score to float for sorting | |
df_copy[f"{score_col}_float"] = pd.to_numeric(df_copy[score_col], errors='coerce') | |
# Sort by score (ascending - lower is better for error rates) | |
df_copy = df_copy.sort_values(by=f"{score_col}_float", ascending=True, na_position='last').reset_index(drop=True) | |
# Get unique scores for ranking | |
valid_scores = df_copy[f"{score_col}_float"].dropna().unique() | |
valid_scores.sort() | |
# Assign medals | |
medals = ["π", "π₯", "π₯"] | |
def get_medal(score): | |
if pd.isna(score): | |
return "" | |
rank = np.where(valid_scores == score)[0] | |
if len(rank) > 0 and rank[0] < len(medals): | |
return medals[rank[0]] + " " | |
return "" | |
df_copy["Medal"] = df_copy[f"{score_col}_float"].apply(get_medal) | |
df_copy["Model_Name"] = df_copy["Medal"] + df_copy["Model_Name"].astype(str) | |
# Clean up temporary columns | |
df_copy = df_copy.drop(columns=[f"{score_col}_float", "Medal"]) | |
return df_copy | |
def prepare_leaderboard_for_display(df, sort_by="Combined_Score"): | |
"""Format leaderboard for display with ranking and percentages""" | |
if df is None or len(df) == 0: | |
return pd.DataFrame(columns=["Rank", "Model", "WER (%)", "CER (%)", "Combined Score (%)", "Performance", "Type", "Date"]) | |
display_df = df.copy() | |
# Add medals first | |
display_df = add_medals_to_models(display_df, sort_by) | |
# Sort by the specified column | |
display_df[f"{sort_by}_float"] = pd.to_numeric(display_df[sort_by], errors='coerce') | |
display_df = display_df.sort_values(f"{sort_by}_float", ascending=True, na_position='last') | |
# Add rank | |
display_df.insert(0, "Rank", range(1, len(display_df) + 1)) | |
# Format percentages | |
for col in ["WER", "CER", "Combined_Score"]: | |
if col in display_df.columns: | |
display_df[f"{col} (%)"] = display_df[col].apply(lambda x: f"{x * 100:.2f}" if pd.notna(x) else "---") | |
# Add performance badges | |
display_df["Performance"] = display_df["Combined_Score"].apply(lambda x: get_performance_badge(x) if pd.notna(x) else "---") | |
# Shorten model names for display | |
display_df["Model"] = display_df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in str(x) else str(x)) | |
# Format date | |
if "timestamp" in display_df.columns: | |
display_df["Date"] = pd.to_datetime(display_df["timestamp"], errors='coerce').dt.strftime("%Y-%m-%d") | |
else: | |
display_df["Date"] = "---" | |
# Select and reorder columns | |
display_columns = ["Rank", "Model", "WER (%)", "CER (%)", "Combined Score (%)", "Performance", "Type", "Date"] | |
available_columns = [col for col in display_columns if col in display_df.columns] | |
# Clean up temporary columns | |
temp_cols = [col for col in display_df.columns if col.endswith("_float")] | |
display_df = display_df.drop(columns=temp_cols, errors='ignore') | |
return display_df[available_columns] | |
def create_performance_chart(): | |
"""Create performance visualization chart""" | |
try: | |
df = pd.read_csv(leaderboard_file) | |
if len(df) == 0: | |
return None | |
# Sort by Combined_Score | |
df = df.sort_values("Combined_Score") | |
fig = go.Figure() | |
# Add WER bars | |
fig.add_trace(go.Bar( | |
name="WER", | |
x=df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in x else x), | |
y=df["WER"] * 100, | |
marker_color='#ff7f0e', | |
hovertemplate='<b>%{x}</b><br>WER: %{y:.2f}%<extra></extra>' | |
)) | |
# Add CER bars | |
fig.add_trace(go.Bar( | |
name="CER", | |
x=df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in x else x), | |
y=df["CER"] * 100, | |
marker_color='#2ca02c', | |
hovertemplate='<b>%{x}</b><br>CER: %{y:.2f}%<extra></extra>' | |
)) | |
# Add Combined Score line | |
fig.add_trace(go.Scatter( | |
name="Combined Score", | |
x=df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in x else x), | |
y=df["Combined_Score"] * 100, | |
mode='lines+markers', | |
line=dict(color='#d62728', width=3), | |
marker=dict(size=8), | |
hovertemplate='<b>%{x}</b><br>Combined Score: %{y:.2f}%<extra></extra>' | |
)) | |
fig.update_layout( | |
title={ | |
'text': "π Model Performance Comparison", | |
'x': 0.5, | |
'font': {'size': 18, 'family': 'Rubik'} | |
}, | |
xaxis_title="Model", | |
yaxis_title="Error Rate (%)", | |
hovermode='x unified', | |
height=500, | |
showlegend=True, | |
plot_bgcolor='rgba(0,0,0,0)', | |
paper_bgcolor='rgba(0,0,0,0)', | |
font=dict(family="Inter", size=12), | |
legend=dict( | |
orientation="h", | |
yanchor="bottom", | |
y=1.02, | |
xanchor="right", | |
x=1 | |
) | |
) | |
return fig | |
except Exception as e: | |
print(f"Error creating chart: {str(e)}") | |
return None | |
def get_leaderboard_stats(): | |
"""Get summary statistics for the leaderboard""" | |
try: | |
df = pd.read_csv(leaderboard_file) | |
if len(df) == 0: | |
return """ | |
<div class="stats-grid"> | |
<div class="stat-card"> | |
<div class="stat-number">0</div> | |
<div class="stat-label">Models Submitted</div> | |
</div> | |
</div> | |
""" | |
best_model = df.loc[df["Combined_Score"].idxmin()] | |
total_models = len(df) | |
avg_wer = df["WER"].mean() | |
avg_cer = df["CER"].mean() | |
return f""" | |
<div class="stats-grid"> | |
<div class="stat-card"> | |
<div class="stat-number">{total_models}</div> | |
<div class="stat-label">Models Evaluated</div> | |
</div> | |
<div class="stat-card"> | |
<div class="stat-number">{format_as_percentage(best_model['Combined_Score'])}</div> | |
<div class="stat-label">Best Combined Score</div> | |
</div> | |
<div class="stat-card"> | |
<div class="stat-number">{format_as_percentage(avg_wer)}</div> | |
<div class="stat-label">Average WER</div> | |
</div> | |
<div class="stat-card"> | |
<div class="stat-number">{format_as_percentage(avg_cer)}</div> | |
<div class="stat-label">Average CER</div> | |
</div> | |
</div> | |
<div style="text-align: center; margin-top: 20px;"> | |
<h4>π Current Champion: {best_model['Model_Name']}</h4> | |
</div> | |
""" | |
except Exception as e: | |
return f"<p>Error loading stats: {str(e)}</p>" | |
def update_ranking(method): | |
"""Update leaderboard ranking based on selected method""" | |
try: | |
current_lb = pd.read_csv(leaderboard_file) | |
if "Combined_Score" not in current_lb.columns: | |
current_lb["Combined_Score"] = current_lb["WER"] * 0.7 + current_lb["CER"] * 0.3 | |
sort_column = "Combined_Score" | |
if method == "WER Only": | |
sort_column = "WER" | |
elif method == "CER Only": | |
sort_column = "CER" | |
return prepare_leaderboard_for_display(current_lb, sort_column) | |
except Exception as e: | |
print(f"Error updating ranking: {str(e)}") | |
return pd.DataFrame(columns=["Rank", "Model", "WER (%)", "CER (%)", "Combined Score (%)", "Performance", "Type", "Date"]) | |
def compare_models(model_1_name, model_2_name): | |
"""Compare two models performance""" | |
try: | |
df = pd.read_csv(leaderboard_file) | |
if model_1_name == model_2_name: | |
return pd.DataFrame([{"Info": "Please select two different models to compare."}]) | |
model_1 = df[df["Model_Name"] == model_1_name] | |
model_2 = df[df["Model_Name"] == model_2_name] | |
if model_1.empty or model_2.empty: | |
return pd.DataFrame([{"Info": "One or both models not found in leaderboard."}]) | |
m1 = model_1.iloc[0] | |
m2 = model_2.iloc[0] | |
comparison_data = { | |
"Metric": ["WER", "CER", "Combined Score"], | |
model_1_name.split("/")[-1]: [ | |
f"{m1['WER']*100:.2f}%", | |
f"{m1['CER']*100:.2f}%", | |
f"{m1['Combined_Score']*100:.2f}%" | |
], | |
model_2_name.split("/")[-1]: [ | |
f"{m2['WER']*100:.2f}%", | |
f"{m2['CER']*100:.2f}%", | |
f"{m2['Combined_Score']*100:.2f}%" | |
], | |
"Difference": [ | |
f"{(m1['WER'] - m2['WER'])*100:+.2f}%", | |
f"{(m1['CER'] - m2['CER'])*100:+.2f}%", | |
f"{(m1['Combined_Score'] - m2['Combined_Score'])*100:+.2f}%" | |
] | |
} | |
return pd.DataFrame(comparison_data) | |
except Exception as e: | |
return pd.DataFrame([{"Error": f"Error comparing models: {str(e)}"}]) | |
def process_submission(model_name, csv_file, model_type, origin_country): | |
"""Process a new model submission with enhanced metadata""" | |
if not model_name or not model_name.strip(): | |
return "β **Error:** Please provide a model name.", None, None | |
if not csv_file: | |
return "β **Error:** Please upload a CSV file.", None, None | |
try: | |
df = pd.read_csv(csv_file) | |
if len(df) == 0: | |
return "β **Error:** Uploaded CSV is empty.", None, None | |
if set(df.columns) != {"id", "text"}: | |
return f"β **Error:** CSV must contain exactly 'id' and 'text' columns. Found: {', '.join(df.columns)}", None, None | |
if df["id"].duplicated().any(): | |
dup_ids = df[df["id"].duplicated()]["id"].unique() | |
return f"β **Error:** Duplicate IDs found: {', '.join(map(str, dup_ids[:5]))}", None, None | |
missing_ids = set(references.keys()) - set(df["id"]) | |
extra_ids = set(df["id"]) - set(references.keys()) | |
if missing_ids: | |
return f"β **Error:** Missing {len(missing_ids)} IDs in submission. First few missing: {', '.join(map(str, list(missing_ids)[:5]))}", None, None | |
if extra_ids: | |
return f"β **Error:** Found {len(extra_ids)} extra IDs not in reference dataset. First few extra: {', '.join(map(str, list(extra_ids)[:5]))}", None, None | |
try: | |
avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df) | |
if avg_wer < 0.001: | |
return "β **Error:** WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None, None | |
except Exception as e: | |
return f"β **Error calculating metrics:** {str(e)}", None, None | |
# Update leaderboard | |
leaderboard = pd.read_csv(leaderboard_file) | |
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
combined_score = avg_wer * 0.7 + avg_cer * 0.3 | |
if model_name in leaderboard["Model_Name"].values: | |
idx = leaderboard[leaderboard["Model_Name"] == model_name].index | |
leaderboard.loc[idx, "WER"] = avg_wer | |
leaderboard.loc[idx, "CER"] = avg_cer | |
leaderboard.loc[idx, "Combined_Score"] = combined_score | |
leaderboard.loc[idx, "timestamp"] = timestamp | |
leaderboard.loc[idx, "Type"] = model_type | |
leaderboard.loc[idx, "Origin"] = origin_country | |
updated_leaderboard = leaderboard | |
else: | |
new_entry = pd.DataFrame( | |
[[model_name, avg_wer, avg_cer, combined_score, timestamp, model_type, origin_country, "ASR"]], | |
columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"] | |
) | |
updated_leaderboard = pd.concat([leaderboard, new_entry]) | |
updated_leaderboard = updated_leaderboard.sort_values("Combined_Score") | |
updated_leaderboard.to_csv(leaderboard_file, index=False) | |
display_leaderboard = prepare_leaderboard_for_display(updated_leaderboard) | |
chart = create_performance_chart() | |
badge = get_performance_badge(combined_score) | |
success_msg = f""" | |
β **Submission processed successfully!** | |
**{model_name}** ({model_type} from {origin_country}) | |
- **WER:** {format_as_percentage(avg_wer)} | |
- **CER:** {format_as_percentage(avg_cer)} | |
- **Combined Score:** {format_as_percentage(combined_score)} | |
- **Performance:** {badge} | |
""" | |
return success_msg, display_leaderboard, chart | |
except Exception as e: | |
return f"β **Error processing submission:** {str(e)}", None, None | |
def get_current_leaderboard(): | |
"""Get the current leaderboard data for display""" | |
try: | |
if os.path.exists(leaderboard_file): | |
current_leaderboard = pd.read_csv(leaderboard_file) | |
# Ensure all required columns exist | |
required_columns = ["Combined_Score", "Type", "Origin", "Task"] | |
for col in required_columns: | |
if col not in current_leaderboard.columns: | |
if col == "Combined_Score": | |
current_leaderboard[col] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3 | |
else: | |
current_leaderboard[col] = "Unknown" if col != "Task" else "ASR" | |
current_leaderboard.to_csv(leaderboard_file, index=False) | |
return current_leaderboard | |
else: | |
return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"]) | |
except Exception as e: | |
print(f"Error getting leaderboard: {str(e)}") | |
return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"]) | |
def create_leaderboard_table(): | |
"""Create and format the leaderboard table for display""" | |
leaderboard_data = get_current_leaderboard() | |
return prepare_leaderboard_for_display(leaderboard_data) | |
def df_to_html(df): | |
"""Convert DataFrame to HTML with custom styling""" | |
if df.empty: | |
return "<p style='text-align: center; color: #666;'>No data available</p>" | |
# Convert DataFrame to HTML | |
html = df.to_html(index=False, escape=False, classes="leaderboard-table") | |
# Add custom styling | |
html = html.replace('<table class="leaderboard-table"', | |
'<table class="leaderboard-table" style="width: 100%; margin: 0 auto;"') | |
return html | |
# Main Gradio Interface | |
with gr.Blocks( | |
title="π²π± Bambara ASR Leaderboard | MALIBA-AI", | |
css=sahara_style_css, | |
head=custom_head_html, | |
theme=gr.themes.Soft() | |
) as demo: | |
# Header Section | |
gr.HTML(new_header_html) | |
# Navigation Buttons | |
with gr.Row(): | |
gr.Button("π MALIBA-AI Website", link="https://maliba-ai.org/", elem_classes=['nav-button']) | |
gr.Button("π HF Dataset Repo", link="https://huggingface.co/datasets/sudoping01/bambara-speech-recognition-benchmark", elem_classes=['nav-button']) | |
gr.Button("π€ MALIBA-AI Hub", link="https://huggingface.co/MALIBA-AI", elem_classes=['nav-button']) | |
gr.Button("π Documentation", link="https://huggingface.co/spaces/MALIBA-AI/bambara-asr-leaderboard", elem_classes=['nav-button']) | |
with gr.Group(elem_classes="content-card"): | |
# Stats display | |
stats_html = gr.HTML(get_leaderboard_stats()) | |
with gr.Tabs() as tabs: | |
with gr.TabItem("π Main Leaderboard", id="main"): | |
gr.HTML("<h2>Main Leaderboard</h2>") | |
initial_leaderboard = create_leaderboard_table() | |
with gr.Row(): | |
ranking_method = gr.Radio( | |
["Combined Score (WER 70%, CER 30%)", "WER Only", "CER Only"], | |
label="π Ranking Method", | |
value="Combined Score (WER 70%, CER 30%)", | |
info="Choose how to rank the models" | |
) | |
leaderboard_view = gr.DataFrame( | |
value=initial_leaderboard, | |
interactive=False, | |
label="π Leaderboard Rankings - Lower scores indicate better performance", | |
wrap=True, | |
height=400 | |
) | |
# Performance chart | |
gr.Markdown("### π Visual Performance Comparison") | |
performance_chart = gr.Plot( | |
value=create_performance_chart(), | |
label="Model Performance Visualization" | |
) | |
ranking_method.change( | |
fn=update_ranking, | |
inputs=[ranking_method], | |
outputs=[leaderboard_view] | |
) | |
with gr.Accordion("π Understanding ASR Metrics", open=False): | |
gr.Markdown(""" | |
## π― Automatic Speech Recognition Evaluation Metrics | |
### Word Error Rate (WER) | |
**WER** measures transcription accuracy at the word level: | |
- **Formula:** `(Substitutions + Insertions + Deletions) / Total Reference Words` | |
- **Range:** 0% (perfect) to 100%+ (very poor) | |
- **Interpretation:** | |
- 0-5%: π Excellent performance | |
- 5-15%: π₯ Good performance | |
- 15-30%: π Fair performance | |
- 30%+: Poor performance | |
### Character Error Rate (CER) | |
**CER** measures transcription accuracy at the character level: | |
- **Advantage:** More granular than WER, captures partial matches | |
- **Benefit for Bambara:** Particularly valuable for agglutinative languages | |
- **Typical Range:** Usually lower than WER values | |
### Combined Score (Primary Ranking Metric) | |
**Formula:** `Combined Score = 0.7 Γ WER + 0.3 Γ CER` | |
- **Rationale:** Balanced evaluation emphasizing word-level accuracy | |
- **Usage:** Primary metric for model ranking | |
### π― Performance Categories | |
- π **Excellent**: < 15% Combined Score | |
- π₯ **Good**: 15-30% Combined Score | |
- π **Fair**: > 30% Combined Score | |
""") | |
with gr.TabItem("π€ Submit New Model", id="submit"): | |
gr.HTML("<h2>Submit Your Bambara ASR Model</h2>") | |
gr.Markdown(""" | |
### π Ready to benchmark your model? Submit your results and join the leaderboard! | |
Follow these steps to submit your Bambara ASR model for evaluation. | |
""") | |
with gr.Group(elem_classes="form-section"): | |
with gr.Row(): | |
with gr.Column(scale=2): | |
model_name_input = gr.Textbox( | |
label="π€ Model Name", | |
placeholder="e.g., MALIBA-AI/bambara-whisper-large", | |
info="Use a descriptive name (organization/model format preferred)" | |
) | |
model_type = gr.Dropdown( | |
label="π·οΈ Model Type", | |
choices=["Whisper-based", "Wav2Vec2", "Foundation", "Custom", "Fine-tuned", "Multilingual", "Other"], | |
value="Custom", | |
info="Select the type/architecture of your model" | |
) | |
origin_country = gr.Dropdown( | |
label="π Origin/Institution", | |
choices=["Mali", "Senegal", "Burkina Faso", "Niger", "Guinea", "Ivory Coast", "USA", "France", "Canada", "UK", "Other"], | |
value="Mali", | |
info="Country or region of the developing institution" | |
) | |
with gr.Column(scale=1): | |
gr.Markdown(""" | |
#### π Submission Requirements | |
**CSV Format:** | |
- Columns: `id`, `text` | |
- Match all reference dataset IDs | |
- No duplicate IDs | |
- Text transcriptions in Bambara | |
**Data Quality:** | |
- Clean, normalized text | |
- Consistent formatting | |
- Complete coverage of test set | |
""") | |
csv_upload = gr.File( | |
label="π Upload Predictions CSV", | |
file_types=[".csv"], | |
info="Upload your model's transcriptions in the required CSV format" | |
) | |
submit_btn = gr.Button("π Submit Model", variant="primary", size="lg", elem_classes=['gradio-button', 'primary']) | |
output_msg = gr.Markdown(label="π’ Submission Status") | |
with gr.Row(): | |
leaderboard_display = gr.DataFrame( | |
label="π Updated Leaderboard", | |
value=initial_leaderboard, | |
interactive=False, | |
wrap=True, | |
height=400 | |
) | |
updated_chart = gr.Plot( | |
label="π Updated Performance Chart" | |
) | |
submit_btn.click( | |
fn=process_submission, | |
inputs=[model_name_input, csv_upload, model_type, origin_country], | |
outputs=[output_msg, leaderboard_display, updated_chart] | |
) | |
with gr.TabItem("π Compare Models", id="compare"): | |
gr.HTML("<h2>Compare Two Models</h2>") | |
gr.Markdown("### Select two models to compare their performance side-by-side") | |
with gr.Row(): | |
current_data = get_current_leaderboard() | |
model_names = current_data["Model_Name"].tolist() if not current_data.empty else [] | |
model_1_dropdown = gr.Dropdown( | |
choices=model_names, | |
label="π€ Model 1", | |
info="Select the first model for comparison" | |
) | |
model_2_dropdown = gr.Dropdown( | |
choices=model_names, | |
label="π€ Model 2", | |
info="Select the second model for comparison" | |
) | |
compare_btn = gr.Button("β‘ Compare Models", variant="primary", elem_classes=['gradio-button', 'primary']) | |
comparison_note = gr.Markdown(""" | |
**Note on Comparison Results:** | |
- Positive difference values (π’) indicate Model 1 performed better | |
- Negative difference values (π΄) indicate Model 2 performed better | |
- Lower error rates indicate better performance | |
""", visible=False) | |
comparison_output = gr.DataFrame( | |
label="π Model Comparison Results", | |
value=pd.DataFrame([{"Info": "Select two models and click Compare to see the results."}]), | |
interactive=False | |
) | |
def update_comparison_table(m1, m2): | |
if not m1 or not m2: | |
return gr.update(visible=False), pd.DataFrame([{"Info": "Please select both models before clicking Compare."}]) | |
if m1 == m2: | |
return gr.update(visible=False), pd.DataFrame([{"Info": "Please select two different models to compare."}]) | |
df = compare_models(m1, m2) | |
return gr.update(visible=True), df | |
compare_btn.click( | |
fn=update_comparison_table, | |
inputs=[model_1_dropdown, model_2_dropdown], | |
outputs=[comparison_note, comparison_output] | |
) | |
with gr.TabItem("π Dataset & Methodology", id="dataset"): | |
gr.HTML("<h2>Dataset & Methodology</h2>") | |
gr.Markdown(""" | |
## π― About the Bambara Speech Recognition Benchmark | |
### π Dataset Overview | |
Our benchmark is built on the **`sudoping01/bambara-speech-recognition-benchmark`** dataset, featuring: | |
- **ποΈ Diverse Audio Samples:** Various speakers, dialects, and recording conditions | |
- **π£οΈ Speaker Variety:** Multiple native Bambara speakers from different regions | |
- **π΅ Acoustic Diversity:** Different recording environments and quality levels | |
- **β Quality Assurance:** Manually validated transcriptions | |
- **π Content Variety:** Multiple domains and speaking styles | |
### π¬ Evaluation Methodology | |
#### Text Normalization Process | |
1. **Lowercase conversion** for consistency | |
2. **Punctuation removal** to focus on linguistic content | |
3. **Whitespace normalization** for standardized formatting | |
4. **Unicode normalization** for proper character handling | |
#### Quality Controls | |
- **Outlier Detection:** Extreme error rates are capped to prevent skewing | |
- **Data Validation:** Comprehensive format and completeness checks | |
- **Duplicate Prevention:** Automatic detection of duplicate submissions | |
- **Missing Data Handling:** Identification of incomplete submissions | |
### π How to Participate | |
#### Step 1: Access the Dataset | |
```python | |
from datasets import load_dataset | |
dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark") | |
``` | |
#### Step 2: Generate Predictions | |
- Process the audio files with your ASR model | |
- Generate transcriptions for each audio sample | |
- Ensure your model outputs text in Bambara language | |
#### Step 3: Format Results | |
Create a CSV file with exactly these columns: | |
- **`id`**: Sample identifier (must match dataset IDs) | |
- **`text`**: Your model's transcription | |
#### Step 4: Submit & Evaluate | |
- Upload your CSV using the submission form | |
- Your model will be automatically evaluated | |
- Results appear on the leaderboard immediately | |
### π Recognition & Impact | |
**Top-performing models will be:** | |
- Featured prominently on our leaderboard | |
- Highlighted in MALIBA-AI communications | |
- Considered for inclusion in production systems | |
- Invited to present at community events | |
### π€ Community Guidelines | |
- **Reproducibility:** Please provide model details and methodology | |
- **Fair Play:** No data leakage or unfair advantages | |
- **Collaboration:** Share insights and learnings with the community | |
- **Attribution:** Properly cite the benchmark in publications | |
### π Technical Specifications | |
| Aspect | Details | | |
|--------|---------| | |
| **Audio Format** | WAV, various sample rates | | |
| **Language** | Bambara (bam) | | |
| **Evaluation Metrics** | WER, CER, Combined Score | | |
| **Text Encoding** | UTF-8 | | |
| **Submission Format** | CSV with id, text columns | | |
""") | |
# Citation and Footer | |
with gr.Group(elem_classes="content-card"): | |
gr.HTML(""" | |
<div class="citation-block"> | |
<h2>π Citation</h2> | |
<p>If you use the Bambara ASR Leaderboard for your scientific publication, or if you find the resources useful, please cite our work:</p> | |
<pre> | |
@misc{bambara_asr_leaderboard_2025, | |
title={Bambara Speech Recognition Leaderboard}, | |
author={MALIBA-AI Team}, | |
year={2025}, | |
url={https://huggingface.co/spaces/MALIBA-AI/bambara-asr-leaderboard}, | |
note={A community initiative for advancing Bambara speech recognition technology} | |
} | |
</pre> | |
</div> | |
""") | |
gr.HTML(""" | |
<div style="text-align: center; margin-top: 30px; padding-top: 20px; border-top: 2px solid #e9ecef;"> | |
<h3 style="color: #7d3561; margin-bottom: 15px;">About MALIBA-AI</h3> | |
<p style="font-size: 16px; line-height: 1.6; max-width: 800px; margin: 0 auto;"> | |
<strong>MALIBA-AI: Empowering Mali's Future Through Community-Driven AI Innovation</strong><br> | |
<em>"No Malian Language Left Behind"</em> | |
</p> | |
<p style="margin-top: 15px;"> | |
This leaderboard is maintained by the MALIBA-AI initiative to track progress in Bambara speech recognition technology. | |
For more information, visit <a href="https://maliba-ai.org/" style="color: #7d3561; font-weight: 600;">MALIBA-AI</a> or | |
<a href="https://huggingface.co/MALIBA-AI" style="color: #7d3561; font-weight: 600;">our Hugging Face page</a>. | |
</p> | |
<div style="margin-top: 20px;"> | |
<span style="font-size: 2em;">π²π±</span> | |
<span style="margin: 0 20px; color: #7d3561; font-weight: 600;">β’</span> | |
<span style="font-size: 2em;">π€</span> | |
<span style="margin: 0 20px; color: #7d3561; font-weight: 600;">β’</span> | |
<span style="font-size: 2em;">π</span> | |
</div> | |
</div> | |
""") | |
if __name__ == "__main__": | |
demo.launch() |