Spaces:

MALIBA-AI
/

bambara-asr-leaderboard

Running

App Files Files Community

bambara-asr-leaderboard / app.py

sudoping01

Update app.py

5c340a5 verified about 2 months ago

raw

history blame contribute delete

61.7 kB

	# import gradio as gr
	# import pandas as pd
	# from datasets import load_dataset
	# from jiwer import wer, cer
	# import os
	# from datetime import datetime
	# import re

	# from huggingface_hub import login

	# # Login to Hugging Face Hub (if token is available)
	# token = os.environ.get("HG_TOKEN")
	# if token:
	# login(token)


	# try:
	# dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
	# references = {row["id"]: row["text"] for row in dataset}
	# print(f"Loaded {len(references)} reference transcriptions")
	# except Exception as e:
	# print(f"Error loading dataset: {str(e)}")
	# references = {}


	# leaderboard_file = "leaderboard.csv"
	# if not os.path.exists(leaderboard_file):

	# sample_data = [
	# ["test_1", 0.2264, 0.1094, 0.1922, "2025-03-15 10:30:45"],
	# ["test_2", 0.3264, 0.1094, 0.1922, "2025-03-15 10:30:45"],
	# ]
	# pd.DataFrame(sample_data,
	# columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]).to_csv(leaderboard_file, index=False)
	# print(f"Created new leaderboard file with sample data")
	# else:
	# leaderboard_df = pd.read_csv(leaderboard_file)


	# if "Combined_Score" not in leaderboard_df.columns:
	# leaderboard_df["Combined_Score"] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3
	# leaderboard_df.to_csv(leaderboard_file, index=False)
	# print(f"Added Combined_Score column to existing leaderboard")
	# print(f"Loaded leaderboard with {len(leaderboard_df)} entries")

	# def normalize_text(text):
	# """Normalize text for WER/CER calculation"""
	# if not isinstance(text, str):
	# text = str(text)

	# text = text.lower()
	# text = re.sub(r'[^\w\s]', '', text)
	# text = re.sub(r'\s+', ' ', text).strip()
	# return text

	# def calculate_metrics(predictions_df):
	# """Calculate WER and CER for predictions."""
	# results = []
	# total_ref_words = 0
	# total_ref_chars = 0

	# for _, row in predictions_df.iterrows():
	# id_val = row["id"]
	# if id_val not in references:
	# continue

	# reference = normalize_text(references[id_val])
	# hypothesis = normalize_text(row["text"])

	# if not reference or not hypothesis:
	# continue

	# reference_words = reference.split()
	# hypothesis_words = hypothesis.split()
	# reference_chars = list(reference)

	# try:
	# sample_wer = wer(reference, hypothesis)
	# sample_cer = cer(reference, hypothesis)

	# sample_wer = min(sample_wer, 2.0)
	# sample_cer = min(sample_cer, 2.0)

	# total_ref_words += len(reference_words)
	# total_ref_chars += len(reference_chars)

	# results.append({
	# "id": id_val,
	# "reference": reference,
	# "hypothesis": hypothesis,
	# "ref_word_count": len(reference_words),
	# "ref_char_count": len(reference_chars),
	# "wer": sample_wer,
	# "cer": sample_cer
	# })
	# except Exception as e:
	# print(f"Error processing sample {id_val}: {str(e)}")
	# pass

	# if not results:
	# raise ValueError("No valid samples for WER/CER calculation")

	# avg_wer = sum(item["wer"] for item in results) / len(results)
	# avg_cer = sum(item["cer"] for item in results) / len(results)


	# weighted_wer = sum(item["wer"] * item["ref_word_count"] for item in results) / total_ref_words
	# weighted_cer = sum(item["cer"] * item["ref_char_count"] for item in results) / total_ref_chars

	# return avg_wer, avg_cer, weighted_wer, weighted_cer, results

	# def format_as_percentage(value):
	# """Convert decimal to percentage with 2 decimal places"""
	# return f"{value * 100:.2f}%"

	# def prepare_leaderboard_for_display(df, sort_by="Combined_Score"):
	# """Format leaderboard for display with ranking and percentages"""
	# if df is None or len(df) == 0:
	# return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])


	# display_df = df.copy()


	# display_df = display_df.sort_values(sort_by)

	# display_df.insert(0, "Rank", range(1, len(display_df) + 1))

	# for col in ["WER", "CER", "Combined_Score"]:
	# if col in display_df.columns:
	# display_df[f"{col} (%)"] = display_df[col].apply(lambda x: f"{x * 100:.2f}")



	# return display_df

	# def update_ranking(method):
	# """Update leaderboard ranking based on selected method"""
	# try:
	# current_lb = pd.read_csv(leaderboard_file)

	# if "Combined_Score" not in current_lb.columns:
	# current_lb["Combined_Score"] = current_lb["WER"] * 0.7 + current_lb["CER"] * 0.3

	# sort_column = "Combined_Score"
	# if method == "WER Only":
	# sort_column = "WER"
	# elif method == "CER Only":
	# sort_column = "CER"

	# return prepare_leaderboard_for_display(current_lb, sort_column)

	# except Exception as e:
	# print(f"Error updating ranking: {str(e)}")
	# return pd.DataFrame(columns=["Rank", "Model_Name", "WER (%)", "CER (%)", "Combined_Score (%)", "timestamp"])

	# def process_submission(model_name, csv_file):
	# """Process a new model submission"""
	# if not model_name or not model_name.strip():
	# return "Error: Please provide a model name.", None

	# if not csv_file:
	# return "Error: Please upload a CSV file.", None

	# try:
	# df = pd.read_csv(csv_file)

	# if len(df) == 0:
	# return "Error: Uploaded CSV is empty.", None

	# if set(df.columns) != {"id", "text"}:
	# return f"Error: CSV must contain exactly 'id' and 'text' columns. Found: {', '.join(df.columns)}", None

	# if df["id"].duplicated().any():
	# dup_ids = df[df["id"].duplicated()]["id"].unique()
	# return f"Error: Duplicate IDs found: {', '.join(map(str, dup_ids[:5]))}", None

	# missing_ids = set(references.keys()) - set(df["id"])
	# extra_ids = set(df["id"]) - set(references.keys())

	# if missing_ids:
	# return f"Error: Missing {len(missing_ids)} IDs in submission. First few missing: {', '.join(map(str, list(missing_ids)[:5]))}", None

	# if extra_ids:
	# return f"Error: Found {len(extra_ids)} extra IDs not in reference dataset. First few extra: {', '.join(map(str, list(extra_ids)[:5]))}", None

	# try:
	# avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)

	# # Check for suspiciously low values
	# if avg_wer < 0.001:
	# return "Error: WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None

	# except Exception as e:
	# return f"Error calculating metrics: {str(e)}", None


	# leaderboard = pd.read_csv(leaderboard_file)
	# timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

	# combined_score = avg_wer * 0.7 + avg_cer * 0.3

	# if model_name in leaderboard["Model_Name"].values:
	# idx = leaderboard[leaderboard["Model_Name"] == model_name].index
	# leaderboard.loc[idx, "WER"] = avg_wer
	# leaderboard.loc[idx, "CER"] = avg_cer
	# leaderboard.loc[idx, "Combined_Score"] = combined_score
	# leaderboard.loc[idx, "timestamp"] = timestamp
	# updated_leaderboard = leaderboard
	# else:
	# new_entry = pd.DataFrame(
	# [[model_name, avg_wer, avg_cer, combined_score, timestamp]],
	# columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"]
	# )
	# updated_leaderboard = pd.concat([leaderboard, new_entry])

	# updated_leaderboard = updated_leaderboard.sort_values("Combined_Score")
	# updated_leaderboard.to_csv(leaderboard_file, index=False)

	# display_leaderboard = prepare_leaderboard_for_display(updated_leaderboard)

	# return f"Submission processed successfully! WER: {format_as_percentage(avg_wer)}, CER: {format_as_percentage(avg_cer)}, Combined Score: {format_as_percentage(combined_score)}", display_leaderboard

	# except Exception as e:
	# return f"Error processing submission: {str(e)}", None

	# def get_current_leaderboard():
	# """Get the current leaderboard data for display"""
	# try:
	# if os.path.exists(leaderboard_file):
	# current_leaderboard = pd.read_csv(leaderboard_file)

	# if "Combined_Score" not in current_leaderboard.columns:
	# current_leaderboard["Combined_Score"] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
	# current_leaderboard.to_csv(leaderboard_file, index=False)

	# return current_leaderboard
	# else:
	# return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])
	# except Exception as e:
	# print(f"Error getting leaderboard: {str(e)}")
	# return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp"])

	# def create_leaderboard_table():
	# """Create and format the leaderboard table for display"""
	# leaderboard_data = get_current_leaderboard()
	# return prepare_leaderboard_for_display(leaderboard_data)

	# with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
	# gr.Markdown(
	# """
	# # 🇲🇱 Bambara ASR Leaderboard

	# This leaderboard tracks and evaluates speech recognition models for the Bambara language.
	# Models are ranked based on Word Error Rate (WER), Character Error Rate (CER), and a combined score.

	# ## Current Models Performance
	# """
	# )

	# current_data = get_current_leaderboard()


	# if len(current_data) > 0:
	# best_model = current_data.sort_values("Combined_Score").iloc[0]
	# gr.Markdown(f"""
	# ### 🏆 Current Best Model: {best_model['Model_Name']}
	# * WER: *{best_model['WER']100:.2f}%**
	# * CER: *{best_model['CER']100:.2f}%**
	# * Combined Score: *{best_model['Combined_Score']100:.2f}%**
	# """)

	# with gr.Tabs() as tabs:
	# with gr.TabItem("🏅 Model Rankings"):

	# initial_leaderboard = create_leaderboard_table()

	# ranking_method = gr.Radio(
	# ["Combined Score (WER 70%, CER 30%)", "WER Only", "CER Only"],
	# label="Ranking Method",
	# value="Combined Score (WER 70%, CER 30%)"
	# )

	# leaderboard_view = gr.DataFrame(
	# value=initial_leaderboard,
	# interactive=False,
	# label="Models are ranked by selected metric - lower is better"
	# )

	# ranking_method.change(
	# fn=update_ranking,
	# inputs=[ranking_method],
	# outputs=[leaderboard_view]
	# )

	# with gr.Accordion("Metrics Explanation", open=False):
	# gr.Markdown(
	# """
	# ## Understanding ASR Metrics

	# ### Word Error Rate (WER)
	# WER measures how accurately the ASR system recognizes whole words:
	# * Lower values indicate better performance
	# * Calculated as: (Substitutions + Insertions + Deletions) / Total Words
	# * A WER of 0% means perfect transcription
	# * A WER of 20% means approximately 1 in 5 words contains an error

	# ### Character Error Rate (CER)
	# CER measures accuracy at the character level:
	# * More fine-grained than WER
	# * Better at capturing partial word matches
	# * Particularly useful for agglutinative languages like Bambara

	# ### Combined Score
	# * Weighted average: 70% WER + 30% CER
	# * Provides a balanced evaluation of model performance
	# * Used as the primary ranking metric
	# """
	# )

	# with gr.TabItem("📊 Submit New Results"):
	# gr.Markdown(
	# """
	# ### Submit a new model for evaluation

	# Upload a CSV file with the following format:
	# * Must contain exactly two columns: 'id' and 'text'
	# * The 'id' column should match the reference dataset IDs
	# * The 'text' column should contain your model's transcriptions
	# """
	# )

	# with gr.Row():
	# model_name_input = gr.Textbox(
	# label="Model Name",
	# placeholder="e.g., MALIBA-AI/bambara-asr"
	# )
	# gr.Markdown("Use a descriptive name to identify your model")

	# with gr.Row():
	# csv_upload = gr.File(
	# label="Upload CSV File",
	# file_types=[".csv"]
	# )
	# gr.Markdown("CSV with columns: id, text")

	# submit_btn = gr.Button("Submit", variant="primary")
	# output_msg = gr.Textbox(label="Status", interactive=False)
	# leaderboard_display = gr.DataFrame(
	# label="Updated Leaderboard",
	# value=initial_leaderboard,
	# interactive=False
	# )

	# submit_btn.click(
	# fn=process_submission,
	# inputs=[model_name_input, csv_upload],
	# outputs=[output_msg, leaderboard_display]
	# )

	# with gr.TabItem("📝 Benchmark Dataset"):
	# gr.Markdown(
	# """
	# ## About the Benchmark Dataset

	# This leaderboard uses the [sudoping01/bambara-speech-recognition-benchmark](https://huggingface.co/datasets/MALIBA-AI/bambara-speech-recognition-leaderboard) dataset:

	# * Contains diverse Bambara speech samples
	# * Includes various speakers, accents, and dialects
	# * Covers different speech styles and recording conditions
	# * Transcribed and validated

	# ### How to Generate Predictions

	# To submit results to this leaderboard:

	# 1. Download the audio files from the benchmark dataset
	# 2. Run your ASR model on the audio files
	# 3. Generate a CSV file with 'id' and 'text' columns
	# 4. Submit your results using the form in the "Submit New Results" tab

	# ### Evaluation Guidelines

	# * Text is normalized (lowercase, punctuation removed) before metrics calculation
	# * Extreme outliers are capped to prevent skewing results
	# * All submissions are validated for format and completeness

	# NB: This work is a collaboration between MALIBA-AI, RobotsMali AI4D-LAB and Djelia
	# """
	# )

	# gr.Markdown(
	# """
	# ---
	# ### About MALIBA-AI

	# MALIBA-AI: Empowering Mali's Future Through Community-Driven AI Innovation

	# "No Malian Language Left Behind"

	# This leaderboard is maintained by the MALIBA-AI initiative to track progress in Bambara speech recognition technology.
	# For more information, visit [MALIBA-AI on Hugging Face](https://huggingface.co/MALIBA-AI).
	# """
	# )

	# if __name__ == "__main__":
	# demo.launch()
	import gradio as gr
	import pandas as pd
	from datasets import load_dataset
	from jiwer import wer, cer
	import os
	from datetime import datetime
	import re
	import plotly.express as px
	import plotly.graph_objects as go
	from huggingface_hub import login
	import numpy as np

	# Custom CSS inspired by Sahara leaderboard
	custom_head_html = """
	<link rel="preconnect" href="https://fonts.googleapis.com">
	<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
	<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Rubik:wght@400;600&display=swap" rel="stylesheet">
	"""

	# Header with MALIBA-AI branding
	new_header_html = """
	<center>
	<br><br>
	<div style="display: flex; align-items: center; justify-content: center; gap: 20px; margin-bottom: 20px;">
	<div style="font-size: 4em;">🇲🇱</div>
	<div>
	<h1 style="margin: 0; font-family: 'Rubik', sans-serif; color: #2f3b7d; font-size: 2.5em; font-weight: 700;">
	Bambara ASR Leaderboard
	</h1>
	<p style="margin: 5px 0 0 0; font-size: 1.2em; color: #7d3561; font-weight: 600;">
	Powered by MALIBA-AI • "No Malian Language Left Behind"
	</p>
	</div>
	<div style="font-size: 4em;">🎙️</div>
	</div>
	</center>
	"""

	# Advanced CSS styling inspired by Sahara
	sahara_style_css = """
	/* Global Styles */
	div[class*="gradio-container"] {
	background: #FFFBF5 !important;
	color: #000 !important;
	font-family: 'Inter', sans-serif !important;
	}

	div.svelte-1nguped {
	background: white !important;
	}

	.fillable.svelte-15jxnnn.svelte-15jxnnn:not(.fill_width) {
	max-width: 1580px !important;
	}

	/* Navigation Buttons */
	.nav-button {
	background-color: #117b75 !important;
	color: #fff !important;
	font-weight: bold !important;
	border-radius: 8px !important;
	border: none !important;
	box-shadow: 0 2px 4px rgba(0,0,0,0.1) !important;
	transition: all 0.3s ease !important;
	}

	.nav-button:hover {
	background-color: #0f6b66 !important;
	color: #e8850e !important;
	transform: translateY(-1px) !important;
	box-shadow: 0 4px 8px rgba(0,0,0,0.2) !important;
	}

	/* Content Cards */
	.content-section {
	padding: 40px 0;
	}

	.content-card {
	background-color: #fff !important;
	border-radius: 16px !important;
	box-shadow: 0 10px 25px -5px rgba(0,0,0,0.1), 0 8px 10px -6px rgba(0,0,0,0.1) !important;
	padding: 40px !important;
	margin-bottom: 30px !important;
	border: 1px solid rgba(0,0,0,0.05) !important;
	}

	/* Typography */
	.content-card h2 {
	font-family: "Rubik", sans-serif !important;
	font-size: 32px !important;
	font-weight: 700 !important;
	line-height: 1.25 !important;
	letter-spacing: -1px !important;
	color: #2f3b7d !important;
	margin-bottom: 20px !important;
	text-align: center !important;
	}

	.content-card h3 {
	font-size: 22px !important;
	color: #2f3b7d !important;
	font-weight: 600 !important;
	margin-bottom: 15px !important;
	}

	.content-card h4 {
	font-family: "Rubik", sans-serif !important;
	color: #7d3561 !important;
	font-weight: 600 !important;
	margin-bottom: 10px !important;
	}

	.title {
	color: #7d3561 !important;
	font-weight: 600 !important;
	}

	/* Tab Styling */
	.tab-wrapper.svelte-1tcem6n.svelte-1tcem6n {
	display: flex;
	align-items: center;
	justify-content: space-between;
	position: relative;
	height: auto !important;
	padding-bottom: 0 !important;
	}

	.selected.svelte-1tcem6n.svelte-1tcem6n {
	background-color: #7d3561 !important;
	color: #fff !important;
	border-radius: 8px 8px 0 0 !important;
	}

	button.svelte-1tcem6n.svelte-1tcem6n {
	color: #7d3561 !important;
	font-weight: 600 !important;
	font-size: 16px !important;
	padding: 12px 20px !important;
	background-color: #fff !important;
	border-radius: 8px 8px 0 0 !important;
	border: 2px solid #e9ecef !important;
	border-bottom: none !important;
	transition: all 0.3s ease !important;
	}

	button.svelte-1tcem6n.svelte-1tcem6n:hover {
	background-color: #f8f9fa !important;
	border-color: #7d3561 !important;
	}

	.tab-container.svelte-1tcem6n.svelte-1tcem6n:after {
	content: "";
	position: absolute;
	bottom: 0;
	left: 0;
	right: 0;
	height: 3px;
	background: linear-gradient(90deg, #7d3561 0%, #2f3b7d 100%) !important;
	}

	/* Table Styling */
	div[class*="gradio-container"] .prose table {
	color: #000 !important;
	border: 2px solid #dca02a !important;
	border-radius: 12px !important;
	margin-bottom: 20px !important;
	margin-left: auto !important;
	margin-right: auto !important;
	width: 100% !important;
	border-collapse: separate !important;
	border-spacing: 0 !important;
	overflow: hidden !important;
	box-shadow: 0 4px 6px rgba(0,0,0,0.1) !important;
	}

	div[class*="gradio-container"] .prose thead tr {
	background: linear-gradient(90deg, #7d3561 0%, #2f3b7d 100%) !important;
	}

	div[class*="gradio-container"] .prose th {
	color: #fff !important;
	font-weight: 700 !important;
	font-size: 14px !important;
	padding: 15px 10px !important;
	text-align: center !important;
	border: none !important;
	}

	div[class*="gradio-container"] .prose td {
	font-size: 14px !important;
	padding: 12px 10px !important;
	border: none !important;
	text-align: center !important;
	color: #000 !important;
	border-bottom: 1px solid #f8f9fa !important;
	}

	div[class*="gradio-container"] .prose tbody tr:nth-child(even) {
	background-color: #f8f9fa !important;
	}

	div[class*="gradio-container"] .prose tbody tr:hover {
	background-color: #e3f2fd !important;
	transition: background-color 0.2s ease !important;
	}

	/* First column (model names) styling */
	div[class*="gradio-container"] .prose th:first-child,
	div[class*="gradio-container"] .prose td:first-child {
	text-align: left !important;
	min-width: 250px !important;
	font-weight: 600 !important;
	}

	/* Performance badges */
	.performance-badge {
	display: inline-block;
	padding: 4px 8px;
	border-radius: 12px;
	font-size: 12px;
	font-weight: 600;
	margin-left: 8px;
	}

	.badge-excellent {
	background: #d4edda;
	color: #155724;
	}

	.badge-good {
	background: #fff3cd;
	color: #856404;
	}

	.badge-fair {
	background: #f8d7da;
	color: #721c24;
	}

	/* Stats cards */
	.stats-grid {
	display: grid;
	grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
	gap: 20px;
	margin: 20px 0;
	}

	.stat-card {
	background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white;
	padding: 20px;
	border-radius: 12px;
	text-align: center;
	box-shadow: 0 4px 6px rgba(0,0,0,0.1);
	}

	.stat-number {
	font-size: 2em;
	font-weight: 700;
	margin-bottom: 5px;
	}

	.stat-label {
	font-size: 0.9em;
	opacity: 0.9;
	}

	/* Form styling */
	.form-section {
	background: #f8f9fa;
	border-radius: 12px;
	padding: 25px;
	margin: 20px 0;
	border-left: 4px solid #7d3561;
	}

	/* Citation block */
	.citation-block {
	background-color: #FDF6E3 !important;
	border-radius: 12px !important;
	padding: 25px !important;
	border-left: 4px solid #D97706 !important;
	margin: 20px 0 !important;
	}

	/* Dropdown styling */
	.gradio-dropdown {
	border-radius: 8px !important;
	border: 2px solid #e9ecef !important;
	}

	.gradio-dropdown:focus {
	border-color: #7d3561 !important;
	box-shadow: 0 0 0 3px rgba(125, 53, 97, 0.1) !important;
	}

	/* Button styling */
	.gradio-button {
	border-radius: 8px !important;
	font-weight: 600 !important;
	transition: all 0.3s ease !important;
	}

	.gradio-button.primary {
	background: linear-gradient(135deg, #7d3561 0%, #2f3b7d 100%) !important;
	border: none !important;
	color: white !important;
	}

	.gradio-button.primary:hover {
	transform: translateY(-2px) !important;
	box-shadow: 0 4px 12px rgba(125, 53, 97, 0.3) !important;
	}

	/* Responsive design */
	@media (max-width: 768px) {
	.content-card {
	padding: 20px !important;
	margin-bottom: 20px !important;
	}

	.content-card h2 {
	font-size: 24px !important;
	}

	.stats-grid {
	grid-template-columns: 1fr !important;
	}
	}
	"""

	# Login to Hugging Face Hub (if token is available)
	token = os.environ.get("HG_TOKEN")
	if token:
	login(token)

	# Load dataset
	try:
	# Load dataset without audio decoding, just get the text references
	dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default", streaming=False)["eval"]
	references = {row["id"]: row["text"] for row in dataset}
	print(f"Loaded {len(references)} reference transcriptions")
	except Exception as e:
	print(f"Error loading dataset: {str(e)}")
	# Fallback with sample references for testing
	references = {
	"sample_001": "nin ye mun kε",
	"sample_002": "a bε baara kε",
	"sample_003": "an bε taa sugu",
	"sample_004": "duguba ka nyi",
	"sample_005": "muso min bε yan"
	}
	print(f"Using fallback references: {len(references)} samples")

	# Initialize leaderboard
	leaderboard_file = "leaderboard.csv"
	if not os.path.exists(leaderboard_file):
	sample_data = [
	["MALIBA-AI/bambara-whisper-small", 0.2264, 0.1094, 0.1922, "2025-03-15 10:30:45", "Whisper-based", "Mali", "ASR"],
	["OpenAI/whisper-base", 0.3264, 0.1094, 0.1922, "2025-03-15 10:30:45", "Foundation", "USA", "ASR"],
	]
	pd.DataFrame(sample_data,
	columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"]).to_csv(leaderboard_file, index=False)
	print(f"Created new leaderboard file with sample data")
	else:
	leaderboard_df = pd.read_csv(leaderboard_file)

	# Add new columns if they don't exist
	required_columns = ["Combined_Score", "Type", "Origin", "Task"]
	for col in required_columns:
	if col not in leaderboard_df.columns:
	if col == "Combined_Score":
	leaderboard_df[col] = leaderboard_df["WER"] * 0.7 + leaderboard_df["CER"] * 0.3
	else:
	default_val = "Unknown" if col != "Task" else "ASR"
	leaderboard_df[col] = default_val

	leaderboard_df.to_csv(leaderboard_file, index=False)
	print(f"Loaded leaderboard with {len(leaderboard_df)} entries")

	def normalize_text(text):
	"""Normalize text for WER/CER calculation"""
	if not isinstance(text, str):
	text = str(text)

	text = text.lower()
	text = re.sub(r'[^\w\s]', '', text)
	text = re.sub(r'\s+', ' ', text).strip()
	return text

	def calculate_metrics(predictions_df):
	"""Calculate WER and CER for predictions."""
	results = []
	total_ref_words = 0
	total_ref_chars = 0

	for _, row in predictions_df.iterrows():
	id_val = row["id"]
	if id_val not in references:
	continue

	reference = normalize_text(references[id_val])
	hypothesis = normalize_text(row["text"])

	if not reference or not hypothesis:
	continue

	reference_words = reference.split()
	hypothesis_words = hypothesis.split()
	reference_chars = list(reference)

	try:
	sample_wer = wer(reference, hypothesis)
	sample_cer = cer(reference, hypothesis)

	sample_wer = min(sample_wer, 2.0)
	sample_cer = min(sample_cer, 2.0)

	total_ref_words += len(reference_words)
	total_ref_chars += len(reference_chars)

	results.append({
	"id": id_val,
	"reference": reference,
	"hypothesis": hypothesis,
	"ref_word_count": len(reference_words),
	"ref_char_count": len(reference_chars),
	"wer": sample_wer,
	"cer": sample_cer
	})
	except Exception as e:
	print(f"Error processing sample {id_val}: {str(e)}")
	pass

	if not results:
	raise ValueError("No valid samples for WER/CER calculation")

	avg_wer = sum(item["wer"] for item in results) / len(results)
	avg_cer = sum(item["cer"] for item in results) / len(results)

	weighted_wer = sum(item["wer"] * item["ref_word_count"] for item in results) / total_ref_words
	weighted_cer = sum(item["cer"] * item["ref_char_count"] for item in results) / total_ref_chars

	return avg_wer, avg_cer, weighted_wer, weighted_cer, results

	def format_as_percentage(value):
	"""Convert decimal to percentage with 2 decimal places"""
	return f"{value * 100:.2f}%"

	def get_performance_badge(score):
	"""Get performance badge based on score"""
	if score < 0.15:
	return "🏆 Excellent"
	elif score < 0.30:
	return "🥉 Good"
	else:
	return "📈 Fair"

	def add_medals_to_models(df, score_col="Combined_Score"):
	"""Add medals to top-performing models"""
	if df.empty or score_col not in df.columns:
	return df

	df_copy = df.copy()

	# Convert score to float for sorting
	df_copy[f"{score_col}_float"] = pd.to_numeric(df_copy[score_col], errors='coerce')

	# Sort by score (ascending - lower is better for error rates)
	df_copy = df_copy.sort_values(by=f"{score_col}_float", ascending=True, na_position='last').reset_index(drop=True)

	# Get unique scores for ranking
	valid_scores = df_copy[f"{score_col}_float"].dropna().unique()
	valid_scores.sort()

	# Assign medals
	medals = ["🏆", "🥈", "🥉"]

	def get_medal(score):
	if pd.isna(score):
	return ""
	rank = np.where(valid_scores == score)[0]
	if len(rank) > 0 and rank[0] < len(medals):
	return medals[rank[0]] + " "
	return ""

	df_copy["Medal"] = df_copy[f"{score_col}_float"].apply(get_medal)
	df_copy["Model_Name"] = df_copy["Medal"] + df_copy["Model_Name"].astype(str)

	# Clean up temporary columns
	df_copy = df_copy.drop(columns=[f"{score_col}_float", "Medal"])

	return df_copy

	def prepare_leaderboard_for_display(df, sort_by="Combined_Score"):
	"""Format leaderboard for display with ranking and percentages"""
	if df is None or len(df) == 0:
	return pd.DataFrame(columns=["Rank", "Model", "WER (%)", "CER (%)", "Combined Score (%)", "Performance", "Type", "Date"])

	display_df = df.copy()

	# Add medals first
	display_df = add_medals_to_models(display_df, sort_by)

	# Sort by the specified column
	display_df[f"{sort_by}_float"] = pd.to_numeric(display_df[sort_by], errors='coerce')
	display_df = display_df.sort_values(f"{sort_by}_float", ascending=True, na_position='last')

	# Add rank
	display_df.insert(0, "Rank", range(1, len(display_df) + 1))

	# Format percentages
	for col in ["WER", "CER", "Combined_Score"]:
	if col in display_df.columns:
	display_df[f"{col} (%)"] = display_df[col].apply(lambda x: f"{x * 100:.2f}" if pd.notna(x) else "---")

	# Add performance badges
	display_df["Performance"] = display_df["Combined_Score"].apply(lambda x: get_performance_badge(x) if pd.notna(x) else "---")

	# Shorten model names for display
	display_df["Model"] = display_df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in str(x) else str(x))

	# Format date
	if "timestamp" in display_df.columns:
	display_df["Date"] = pd.to_datetime(display_df["timestamp"], errors='coerce').dt.strftime("%Y-%m-%d")
	else:
	display_df["Date"] = "---"

	# Select and reorder columns
	display_columns = ["Rank", "Model", "WER (%)", "CER (%)", "Combined Score (%)", "Performance", "Type", "Date"]
	available_columns = [col for col in display_columns if col in display_df.columns]

	# Clean up temporary columns
	temp_cols = [col for col in display_df.columns if col.endswith("_float")]
	display_df = display_df.drop(columns=temp_cols, errors='ignore')

	return display_df[available_columns]

	def create_performance_chart():
	"""Create performance visualization chart"""
	try:
	df = pd.read_csv(leaderboard_file)
	if len(df) == 0:
	return None

	# Sort by Combined_Score
	df = df.sort_values("Combined_Score")

	fig = go.Figure()

	# Add WER bars
	fig.add_trace(go.Bar(
	name="WER",
	x=df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in x else x),
	y=df["WER"] * 100,
	marker_color='#ff7f0e',
	hovertemplate='<b>%{x}</b><br>WER: %{y:.2f}%<extra></extra>'
	))

	# Add CER bars
	fig.add_trace(go.Bar(
	name="CER",
	x=df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in x else x),
	y=df["CER"] * 100,
	marker_color='#2ca02c',
	hovertemplate='<b>%{x}</b><br>CER: %{y:.2f}%<extra></extra>'
	))

	# Add Combined Score line
	fig.add_trace(go.Scatter(
	name="Combined Score",
	x=df["Model_Name"].apply(lambda x: x.split("/")[-1] if "/" in x else x),
	y=df["Combined_Score"] * 100,
	mode='lines+markers',
	line=dict(color='#d62728', width=3),
	marker=dict(size=8),
	hovertemplate='<b>%{x}</b><br>Combined Score: %{y:.2f}%<extra></extra>'
	))

	fig.update_layout(
	title={
	'text': "📊 Model Performance Comparison",
	'x': 0.5,
	'font': {'size': 18, 'family': 'Rubik'}
	},
	xaxis_title="Model",
	yaxis_title="Error Rate (%)",
	hovermode='x unified',
	height=500,
	showlegend=True,
	plot_bgcolor='rgba(0,0,0,0)',
	paper_bgcolor='rgba(0,0,0,0)',
	font=dict(family="Inter", size=12),
	legend=dict(
	orientation="h",
	yanchor="bottom",
	y=1.02,
	xanchor="right",
	x=1
	)
	)

	return fig
	except Exception as e:
	print(f"Error creating chart: {str(e)}")
	return None

	def get_leaderboard_stats():
	"""Get summary statistics for the leaderboard"""
	try:
	df = pd.read_csv(leaderboard_file)
	if len(df) == 0:
	return """
	<div class="stats-grid">
	<div class="stat-card">
	<div class="stat-number">0</div>
	<div class="stat-label">Models Submitted</div>
	</div>
	</div>
	"""

	best_model = df.loc[df["Combined_Score"].idxmin()]
	total_models = len(df)
	avg_wer = df["WER"].mean()
	avg_cer = df["CER"].mean()

	return f"""
	<div class="stats-grid">
	<div class="stat-card">
	<div class="stat-number">{total_models}</div>
	<div class="stat-label">Models Evaluated</div>
	</div>
	<div class="stat-card">
	<div class="stat-number">{format_as_percentage(best_model['Combined_Score'])}</div>
	<div class="stat-label">Best Combined Score</div>
	</div>
	<div class="stat-card">
	<div class="stat-number">{format_as_percentage(avg_wer)}</div>
	<div class="stat-label">Average WER</div>
	</div>
	<div class="stat-card">
	<div class="stat-number">{format_as_percentage(avg_cer)}</div>
	<div class="stat-label">Average CER</div>
	</div>
	</div>

	<div style="text-align: center; margin-top: 20px;">
	<h4>🏆 Current Champion: {best_model['Model_Name']}</h4>
	</div>
	"""
	except Exception as e:
	return f"<p>Error loading stats: {str(e)}</p>"

	def update_ranking(method):
	"""Update leaderboard ranking based on selected method"""
	try:
	current_lb = pd.read_csv(leaderboard_file)

	if "Combined_Score" not in current_lb.columns:
	current_lb["Combined_Score"] = current_lb["WER"] * 0.7 + current_lb["CER"] * 0.3

	sort_column = "Combined_Score"
	if method == "WER Only":
	sort_column = "WER"
	elif method == "CER Only":
	sort_column = "CER"

	return prepare_leaderboard_for_display(current_lb, sort_column)

	except Exception as e:
	print(f"Error updating ranking: {str(e)}")
	return pd.DataFrame(columns=["Rank", "Model", "WER (%)", "CER (%)", "Combined Score (%)", "Performance", "Type", "Date"])

	def compare_models(model_1_name, model_2_name):
	"""Compare two models performance"""
	try:
	df = pd.read_csv(leaderboard_file)

	if model_1_name == model_2_name:
	return pd.DataFrame([{"Info": "Please select two different models to compare."}])

	model_1 = df[df["Model_Name"] == model_1_name]
	model_2 = df[df["Model_Name"] == model_2_name]

	if model_1.empty or model_2.empty:
	return pd.DataFrame([{"Info": "One or both models not found in leaderboard."}])

	m1 = model_1.iloc[0]
	m2 = model_2.iloc[0]

	comparison_data = {
	"Metric": ["WER", "CER", "Combined Score"],
	model_1_name.split("/")[-1]: [
	f"{m1['WER']*100:.2f}%",
	f"{m1['CER']*100:.2f}%",
	f"{m1['Combined_Score']*100:.2f}%"
	],
	model_2_name.split("/")[-1]: [
	f"{m2['WER']*100:.2f}%",
	f"{m2['CER']*100:.2f}%",
	f"{m2['Combined_Score']*100:.2f}%"
	],
	"Difference": [
	f"{(m1['WER'] - m2['WER'])*100:+.2f}%",
	f"{(m1['CER'] - m2['CER'])*100:+.2f}%",
	f"{(m1['Combined_Score'] - m2['Combined_Score'])*100:+.2f}%"
	]
	}

	return pd.DataFrame(comparison_data)

	except Exception as e:
	return pd.DataFrame([{"Error": f"Error comparing models: {str(e)}"}])

	def process_submission(model_name, csv_file, model_type, origin_country):
	"""Process a new model submission with enhanced metadata"""
	if not model_name or not model_name.strip():
	return "❌ Error: Please provide a model name.", None, None

	if not csv_file:
	return "❌ Error: Please upload a CSV file.", None, None

	try:
	df = pd.read_csv(csv_file)

	if len(df) == 0:
	return "❌ Error: Uploaded CSV is empty.", None, None

	if set(df.columns) != {"id", "text"}:
	return f"❌ Error: CSV must contain exactly 'id' and 'text' columns. Found: {', '.join(df.columns)}", None, None

	if df["id"].duplicated().any():
	dup_ids = df[df["id"].duplicated()]["id"].unique()
	return f"❌ Error: Duplicate IDs found: {', '.join(map(str, dup_ids[:5]))}", None, None

	missing_ids = set(references.keys()) - set(df["id"])
	extra_ids = set(df["id"]) - set(references.keys())

	if missing_ids:
	return f"❌ Error: Missing {len(missing_ids)} IDs in submission. First few missing: {', '.join(map(str, list(missing_ids)[:5]))}", None, None

	if extra_ids:
	return f"❌ Error: Found {len(extra_ids)} extra IDs not in reference dataset. First few extra: {', '.join(map(str, list(extra_ids)[:5]))}", None, None

	try:
	avg_wer, avg_cer, weighted_wer, weighted_cer, detailed_results = calculate_metrics(df)

	if avg_wer < 0.001:
	return "❌ Error: WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None, None

	except Exception as e:
	return f"❌ Error calculating metrics: {str(e)}", None, None

	# Update leaderboard
	leaderboard = pd.read_csv(leaderboard_file)
	timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
	combined_score = avg_wer * 0.7 + avg_cer * 0.3

	if model_name in leaderboard["Model_Name"].values:
	idx = leaderboard[leaderboard["Model_Name"] == model_name].index
	leaderboard.loc[idx, "WER"] = avg_wer
	leaderboard.loc[idx, "CER"] = avg_cer
	leaderboard.loc[idx, "Combined_Score"] = combined_score
	leaderboard.loc[idx, "timestamp"] = timestamp
	leaderboard.loc[idx, "Type"] = model_type
	leaderboard.loc[idx, "Origin"] = origin_country
	updated_leaderboard = leaderboard
	else:
	new_entry = pd.DataFrame(
	[[model_name, avg_wer, avg_cer, combined_score, timestamp, model_type, origin_country, "ASR"]],
	columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"]
	)
	updated_leaderboard = pd.concat([leaderboard, new_entry])

	updated_leaderboard = updated_leaderboard.sort_values("Combined_Score")
	updated_leaderboard.to_csv(leaderboard_file, index=False)

	display_leaderboard = prepare_leaderboard_for_display(updated_leaderboard)
	chart = create_performance_chart()

	badge = get_performance_badge(combined_score)

	success_msg = f"""
	✅ Submission processed successfully!

	{model_name} ({model_type} from {origin_country})
	- WER: {format_as_percentage(avg_wer)}
	- CER: {format_as_percentage(avg_cer)}
	- Combined Score: {format_as_percentage(combined_score)}
	- Performance: {badge}
	"""

	return success_msg, display_leaderboard, chart

	except Exception as e:
	return f"❌ Error processing submission: {str(e)}", None, None

	def get_current_leaderboard():
	"""Get the current leaderboard data for display"""
	try:
	if os.path.exists(leaderboard_file):
	current_leaderboard = pd.read_csv(leaderboard_file)

	# Ensure all required columns exist
	required_columns = ["Combined_Score", "Type", "Origin", "Task"]
	for col in required_columns:
	if col not in current_leaderboard.columns:
	if col == "Combined_Score":
	current_leaderboard[col] = current_leaderboard["WER"] * 0.7 + current_leaderboard["CER"] * 0.3
	else:
	current_leaderboard[col] = "Unknown" if col != "Task" else "ASR"

	current_leaderboard.to_csv(leaderboard_file, index=False)
	return current_leaderboard
	else:
	return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"])
	except Exception as e:
	print(f"Error getting leaderboard: {str(e)}")
	return pd.DataFrame(columns=["Model_Name", "WER", "CER", "Combined_Score", "timestamp", "Type", "Origin", "Task"])

	def create_leaderboard_table():
	"""Create and format the leaderboard table for display"""
	leaderboard_data = get_current_leaderboard()
	return prepare_leaderboard_for_display(leaderboard_data)

	def df_to_html(df):
	"""Convert DataFrame to HTML with custom styling"""
	if df.empty:
	return "<p style='text-align: center; color: #666;'>No data available</p>"

	# Convert DataFrame to HTML
	html = df.to_html(index=False, escape=False, classes="leaderboard-table")

	# Add custom styling
	html = html.replace('<table class="leaderboard-table"',
	'<table class="leaderboard-table" style="width: 100%; margin: 0 auto;"')

	return html

	# Main Gradio Interface
	with gr.Blocks(
	title="🇲🇱 Bambara ASR Leaderboard \| MALIBA-AI",
	css=sahara_style_css,
	head=custom_head_html,
	theme=gr.themes.Soft()
	) as demo:

	# Header Section
	gr.HTML(new_header_html)

	# Navigation Buttons
	with gr.Row():
	gr.Button("🌐 MALIBA-AI Website", link="https://maliba-ai.org/", elem_classes=['nav-button'])
	gr.Button("📊 HF Dataset Repo", link="https://huggingface.co/datasets/sudoping01/bambara-speech-recognition-benchmark", elem_classes=['nav-button'])
	gr.Button("🤗 MALIBA-AI Hub", link="https://huggingface.co/MALIBA-AI", elem_classes=['nav-button'])
	gr.Button("📚 Documentation", link="https://huggingface.co/spaces/MALIBA-AI/bambara-asr-leaderboard", elem_classes=['nav-button'])

	with gr.Group(elem_classes="content-card"):
	# Stats display
	stats_html = gr.HTML(get_leaderboard_stats())

	with gr.Tabs() as tabs:
	with gr.TabItem("🏅 Main Leaderboard", id="main"):
	gr.HTML("<h2>Main Leaderboard</h2>")

	initial_leaderboard = create_leaderboard_table()

	with gr.Row():
	ranking_method = gr.Radio(
	["Combined Score (WER 70%, CER 30%)", "WER Only", "CER Only"],
	label="🔄 Ranking Method",
	value="Combined Score (WER 70%, CER 30%)"
	)
	gr.Markdown("Choose how to rank the models")

	leaderboard_view = gr.DataFrame(
	value=initial_leaderboard,
	interactive=False,
	label="📋 Leaderboard Rankings - Lower scores indicate better performance",
	wrap=True
	)

	# Performance chart
	gr.Markdown("### 📊 Visual Performance Comparison")
	performance_chart = gr.Plot(
	value=create_performance_chart(),
	label="Model Performance Visualization"
	)

	ranking_method.change(
	fn=update_ranking,
	inputs=[ranking_method],
	outputs=[leaderboard_view]
	)

	with gr.Accordion("📖 Understanding ASR Metrics", open=False):
	gr.Markdown("""
	## 🎯 Automatic Speech Recognition Evaluation Metrics

	### Word Error Rate (WER)
	WER measures transcription accuracy at the word level:
	- Formula: `(Substitutions + Insertions + Deletions) / Total Reference Words`
	- Range: 0% (perfect) to 100%+ (very poor)
	- Interpretation:
	- 0-5%: 🏆 Excellent performance
	- 5-15%: 🥉 Good performance
	- 15-30%: 📈 Fair performance
	- 30%+: Poor performance

	### Character Error Rate (CER)
	CER measures transcription accuracy at the character level:
	- Advantage: More granular than WER, captures partial matches
	- Benefit for Bambara: Particularly valuable for agglutinative languages
	- Typical Range: Usually lower than WER values

	### Combined Score (Primary Ranking Metric)
	Formula: `Combined Score = 0.7 × WER + 0.3 × CER`
	- Rationale: Balanced evaluation emphasizing word-level accuracy
	- Usage: Primary metric for model ranking

	### 🎯 Performance Categories
	- 🏆 Excellent: < 15% Combined Score
	- 🥉 Good: 15-30% Combined Score
	- 📈 Fair: > 30% Combined Score
	""")

	with gr.TabItem("📤 Submit New Model", id="submit"):
	gr.HTML("<h2>Submit Your Bambara ASR Model</h2>")

	gr.Markdown("""
	### 🚀 Ready to benchmark your model? Submit your results and join the leaderboard!

	Follow these steps to submit your Bambara ASR model for evaluation.
	""")

	with gr.Group(elem_classes="form-section"):
	with gr.Row():
	with gr.Column(scale=2):
	model_name_input = gr.Textbox(
	label="🤖 Model Name",
	placeholder="e.g., MALIBA-AI/bambara-whisper-large"
	)
	gr.Markdown("Use a descriptive name (organization/model format preferred)")

	model_type = gr.Dropdown(
	label="🏷️ Model Type",
	choices=["Whisper-based", "Wav2Vec2", "Foundation", "Custom", "Fine-tuned", "Multilingual", "Other"],
	value="Custom"
	)
	gr.Markdown("Select the type/architecture of your model")

	origin_country = gr.Dropdown(
	label="🌍 Origin/Institution",
	choices=["Mali", "Senegal", "Burkina Faso", "Niger", "Guinea", "Ivory Coast", "USA", "France", "Canada", "UK", "Other"],
	value="Mali"
	)
	gr.Markdown("Country or region of the developing institution")

	with gr.Column(scale=1):
	gr.Markdown("""
	#### 📋 Submission Requirements

	CSV Format:
	- Columns: `id`, `text`
	- Match all reference dataset IDs
	- No duplicate IDs
	- Text transcriptions in Bambara

	Data Quality:
	- Clean, normalized text
	- Consistent formatting
	- Complete coverage of test set
	""")

	csv_upload = gr.File(
	label="📁 Upload Predictions CSV",
	file_types=[".csv"]
	)
	gr.Markdown("Upload your model's transcriptions in the required CSV format")

	submit_btn = gr.Button("🚀 Submit Model", variant="primary", size="lg", elem_classes=['gradio-button', 'primary'])

	output_msg = gr.Markdown(label="📢 Submission Status")

	with gr.Row():
	leaderboard_display = gr.DataFrame(
	label="📊 Updated Leaderboard",
	value=initial_leaderboard,
	interactive=False,
	wrap=True
	)

	updated_chart = gr.Plot(
	label="📈 Updated Performance Chart"
	)

	submit_btn.click(
	fn=process_submission,
	inputs=[model_name_input, csv_upload, model_type, origin_country],
	outputs=[output_msg, leaderboard_display, updated_chart]
	)

	with gr.TabItem("🔍 Compare Models", id="compare"):
	gr.HTML("<h2>Compare Two Models</h2>")

	gr.Markdown("### Select two models to compare their performance side-by-side")

	with gr.Row():
	current_data = get_current_leaderboard()
	model_names = current_data["Model_Name"].tolist() if not current_data.empty else []

	model_1_dropdown = gr.Dropdown(
	choices=model_names,
	label="🤖 Model 1"
	)
	gr.Markdown("Select the first model for comparison")

	model_2_dropdown = gr.Dropdown(
	choices=model_names,
	label="🤖 Model 2"
	)
	gr.Markdown("Select the second model for comparison")

	compare_btn = gr.Button("⚡ Compare Models", variant="primary", elem_classes=['gradio-button', 'primary'])

	comparison_note = gr.Markdown("""
	Note on Comparison Results:
	- Positive difference values (🟢) indicate Model 1 performed better
	- Negative difference values (🔴) indicate Model 2 performed better
	- Lower error rates indicate better performance
	""", visible=False)

	comparison_output = gr.DataFrame(
	label="📊 Model Comparison Results",
	value=pd.DataFrame([{"Info": "Select two models and click Compare to see the results."}]),
	interactive=False
	)

	def update_comparison_table(m1, m2):
	if not m1 or not m2:
	return gr.update(visible=False), pd.DataFrame([{"Info": "Please select both models before clicking Compare."}])

	if m1 == m2:
	return gr.update(visible=False), pd.DataFrame([{"Info": "Please select two different models to compare."}])

	df = compare_models(m1, m2)
	return gr.update(visible=True), df

	compare_btn.click(
	fn=update_comparison_table,
	inputs=[model_1_dropdown, model_2_dropdown],
	outputs=[comparison_note, comparison_output]
	)

	with gr.TabItem("📊 Dataset & Methodology", id="dataset"):
	gr.HTML("<h2>Dataset & Methodology</h2>")

	gr.Markdown("""
	## 🎯 About the Bambara Speech Recognition Benchmark

	### 📈 Dataset Overview

	Our benchmark is built on the `sudoping01/bambara-speech-recognition-benchmark` dataset, featuring:

	- 🎙️ Diverse Audio Samples: Various speakers, dialects, and recording conditions
	- 🗣️ Speaker Variety: Multiple native Bambara speakers from different regions
	- 🎵 Acoustic Diversity: Different recording environments and quality levels
	- ✅ Quality Assurance: Manually validated transcriptions
	- 📚 Content Variety: Multiple domains and speaking styles

	### 🔬 Evaluation Methodology

	#### Text Normalization Process
	1. Lowercase conversion for consistency
	2. Punctuation removal to focus on linguistic content
	3. Whitespace normalization for standardized formatting
	4. Unicode normalization for proper character handling

	#### Quality Controls
	- Outlier Detection: Extreme error rates are capped to prevent skewing
	- Data Validation: Comprehensive format and completeness checks
	- Duplicate Prevention: Automatic detection of duplicate submissions
	- Missing Data Handling: Identification of incomplete submissions

	### 🚀 How to Participate

	#### Step 1: Access the Dataset
	```python
	from datasets import load_dataset
	dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark")
	```

	#### Step 2: Generate Predictions
	- Process the audio files with your ASR model
	- Generate transcriptions for each audio sample
	- Ensure your model outputs text in Bambara language

	#### Step 3: Format Results
	Create a CSV file with exactly these columns:
	- `id`: Sample identifier (must match dataset IDs)
	- `text`: Your model's transcription

	#### Step 4: Submit & Evaluate
	- Upload your CSV using the submission form
	- Your model will be automatically evaluated
	- Results appear on the leaderboard immediately

	### 🏆 Recognition & Impact

	Top-performing models will be:
	- Featured prominently on our leaderboard
	- Highlighted in MALIBA-AI communications
	- Considered for inclusion in production systems
	- Invited to present at community events

	### 🤝 Community Guidelines

	- Reproducibility: Please provide model details and methodology
	- Fair Play: No data leakage or unfair advantages
	- Collaboration: Share insights and learnings with the community
	- Attribution: Properly cite the benchmark in publications

	### 📚 Technical Specifications

	\| Aspect \| Details \|
	\|--------\|---------\|
	\| Audio Format \| WAV, various sample rates \|
	\| Language \| Bambara (bam) \|
	\| Evaluation Metrics \| WER, CER, Combined Score \|
	\| Text Encoding \| UTF-8 \|
	\| Submission Format \| CSV with id, text columns \|
	""")

	# Citation and Footer
	with gr.Group(elem_classes="content-card"):
	gr.HTML("""
	<div class="citation-block">
	<h2>📚 Citation</h2>
	<p>If you use the Bambara ASR Leaderboard for your scientific publication, or if you find the resources useful, please cite our work:</p>
	<pre>
	@misc{bambara_asr_leaderboard_2025,
	title={Bambara Speech Recognition Leaderboard},
	author={MALIBA-AI Team},
	year={2025},
	url={https://huggingface.co/spaces/MALIBA-AI/bambara-asr-leaderboard},
	note={A community initiative for advancing Bambara speech recognition technology}
	}
	</pre>
	</div>
	""")

	gr.HTML("""
	<div style="text-align: center; margin-top: 30px; padding-top: 20px; border-top: 2px solid #e9ecef;">
	<h3 style="color: #7d3561; margin-bottom: 15px;">About MALIBA-AI</h3>
	<p style="font-size: 16px; line-height: 1.6; max-width: 800px; margin: 0 auto;">
	<strong>MALIBA-AI: Empowering Mali's Future Through Community-Driven AI Innovation</strong><br>
	<em>"No Malian Language Left Behind"</em>
	</p>
	<p style="margin-top: 15px;">
	This leaderboard is maintained by the MALIBA-AI initiative to track progress in Bambara speech recognition technology.
	For more information, visit <a href="https://maliba-ai.org/" style="color: #7d3561; font-weight: 600;">MALIBA-AI</a> or
	<a href="https://huggingface.co/MALIBA-AI" style="color: #7d3561; font-weight: 600;">our Hugging Face page</a>.
	</p>
	<div style="margin-top: 20px;">
	<span style="font-size: 2em;">🇲🇱</span>
	<span style="margin: 0 20px; color: #7d3561; font-weight: 600;">•</span>
	<span style="font-size: 2em;">🤝</span>
	<span style="margin: 0 20px; color: #7d3561; font-weight: 600;">•</span>
	<span style="font-size: 2em;">🚀</span>
	</div>
	</div>
	""")

	if __name__ == "__main__":
	demo.launch()