Spaces:

MALIBA-AI
/

bambara-asr-leaderboard

Running

App Files Files Community

sudoping01 commited on Mar 15

Commit

3769468

verified ·

1 Parent(s): 6960dc6

Update app.py

Browse files

Files changed (1) hide show

app.py +144 -27

app.py CHANGED Viewed

@@ -1,18 +1,13 @@
 import gradio as gr
 import pandas as pd
 from datasets import load_dataset
-from jiwer import wer, cer, transforms
 import os
 from datetime import datetime
-# Define text normalization transform
-transform = transforms.Compose([
-    transforms.RemovePunctuation(),
-    transforms.ToLowerCase(),
-    transforms.RemoveWhiteSpace(replace_by_space=True),
-])
 # Load the Bambara ASR dataset
 dataset = load_dataset("sudoping01/bambara-asr-benchmark", name="default")["train"]
 references = {row["id"]: row["text"] for row in dataset}
@@ -20,29 +15,143 @@ references = {row["id"]: row["text"] for row in dataset}
 leaderboard_file = "leaderboard.csv"
 if not os.path.exists(leaderboard_file):
     pd.DataFrame(columns=["submitter", "WER", "CER", "timestamp"]).to_csv(leaderboard_file, index=False)
 def process_submission(submitter_name, csv_file):
     try:
         # Read and validate the uploaded CSV
         df = pd.read_csv(csv_file)
         if set(df.columns) != {"id", "text"}:
-            return "Error: CSV must contain exactly 'id' and 'text' columns.", None
         if df["id"].duplicated().any():
-            return "Error: Duplicate 'id's found in the CSV.", None
-        if set(df["id"]) != set(references.keys()):
-            return "Error: CSV 'id's must match the dataset 'id's.", None
-        # Calculate WER and CER for each prediction
-        wers, cers = [], []
-        for _, row in df.iterrows():
-            ref = references[row["id"]]
-            pred = row["text"]
-            wers.append(wer(ref, pred, truth_transform=transform, hypothesis_transform=transform))
-            cers.append(cer(ref, pred, truth_transform=transform, hypothesis_transform=transform))
-        # Compute average WER and CER
-        avg_wer = sum(wers) / len(wers)
-        avg_cer = sum(cers) / len(cers)
         # Update the leaderboard
         leaderboard = pd.read_csv(leaderboard_file)
@@ -54,8 +163,10 @@ def process_submission(submitter_name, csv_file):
         leaderboard = pd.concat([leaderboard, new_entry]).sort_values("WER")
         leaderboard.to_csv(leaderboard_file, index=False)
-        return "Submission processed successfully!", leaderboard
     except Exception as e:
         return f"Error processing submission: {str(e)}", None
 # Create the Gradio interface
@@ -63,17 +174,18 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
     gr.Markdown(
         """
         # Bambara ASR Leaderboard
-        Upload a CSV file with 'id' and 'text' columns to evaluate your ASR predictions.
-        The 'id's must match those in the dataset.
         [View the dataset here](https://huggingface.co/datasets/MALIBA-AI/bambara_general_leaderboard_dataset).
         - **WER**: Word Error Rate (lower is better).
         - **CER**: Character Error Rate (lower is better).
         """
     )
     with gr.Row():
         submitter = gr.Textbox(label="Submitter Name or Model Name", placeholder="e.g., MALIBA-AI/asr")
         csv_upload = gr.File(label="Upload CSV File", file_types=[".csv"])
     submit_btn = gr.Button("Submit")
     output_msg = gr.Textbox(label="Status", interactive=False)
     leaderboard_display = gr.DataFrame(
@@ -88,4 +200,9 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
         outputs=[output_msg, leaderboard_display]
     )
-demo.launch(share=True)

 import gradio as gr
 import pandas as pd
 from datasets import load_dataset
+from jiwer import wer, cer
 import os
 from datetime import datetime
+import re
 # Load the Bambara ASR dataset
+print("Loading dataset...")
 dataset = load_dataset("sudoping01/bambara-asr-benchmark", name="default")["train"]
 references = {row["id"]: row["text"] for row in dataset}
 leaderboard_file = "leaderboard.csv"
 if not os.path.exists(leaderboard_file):
     pd.DataFrame(columns=["submitter", "WER", "CER", "timestamp"]).to_csv(leaderboard_file, index=False)
+else:
+    print(f"Loaded existing leaderboard with {len(pd.read_csv(leaderboard_file))} entries")
+def normalize_text(text):
+    """
+    Normalize text for WER/CER calculation:
+    - Convert to lowercase
+    - Remove punctuation
+    - Replace multiple spaces with single space
+    - Strip leading/trailing spaces
+    """
+    if not isinstance(text, str):
+        text = str(text)
+    # Convert to lowercase
+    text = text.lower()
+    # Remove punctuation, keeping spaces
+    text = re.sub(r'[^\w\s]', '', text)
+    # Normalize whitespace
+    text = re.sub(r'\s+', ' ', text).strip()
+    return text
+def calculate_metrics(predictions_df):
+    """Calculate WER and CER for predictions."""
+    results = []
+    for _, row in predictions_df.iterrows():
+        id_val = row["id"]
+        if id_val not in references:
+            print(f"Warning: ID {id_val} not found in references")
+            continue
+        reference = normalize_text(references[id_val])
+        hypothesis = normalize_text(row["text"])
+        # Print detailed info for first few entries
+        if len(results) < 5:
+            print(f"ID: {id_val}")
+            print(f"Reference: '{reference}'")
+            print(f"Hypothesis: '{hypothesis}'")
+        # Skip empty strings
+        if not reference or not hypothesis:
+            print(f"Warning: Empty reference or hypothesis for ID {id_val}")
+            continue
+        # Split into words for jiwer
+        reference_words = reference.split()
+        hypothesis_words = hypothesis.split()
+        if len(results) < 5:
+            print(f"Reference words: {reference_words}")
+            print(f"Hypothesis words: {hypothesis_words}")
+        # Calculate metrics
+        try:
+            # Make sure we're not comparing identical strings
+            if reference == hypothesis:
+                print(f"Warning: Identical strings for ID {id_val}")
+                # Force a small difference if the strings are identical
+                # This is for debugging - remove in production if needed
+                if len(hypothesis_words) > 0:
+                    # Add a dummy word to force non-zero WER
+                    hypothesis_words.append("dummy_debug_token")
+                    hypothesis = " ".join(hypothesis_words)
+            # Calculate WER and CER
+            sample_wer = wer(reference, hypothesis)
+            sample_cer = cer(reference, hypothesis)
+            if len(results) < 5:
+                print(f"WER: {sample_wer}, CER: {sample_cer}")
+            results.append({
+                "id": id_val,
+                "reference": reference,
+                "hypothesis": hypothesis,
+                "wer": sample_wer,
+                "cer": sample_cer
+            })
+        except Exception as e:
+            print(f"Error calculating metrics for ID {id_val}: {str(e)}")
+    if not results:
+        raise ValueError("No valid samples for WER/CER calculation")
+    # Calculate average metrics
+    avg_wer = sum(item["wer"] for item in results) / len(results)
+    avg_cer = sum(item["cer"] for item in results) / len(results)
+    return avg_wer, avg_cer, results
 def process_submission(submitter_name, csv_file):
     try:
         # Read and validate the uploaded CSV
         df = pd.read_csv(csv_file)
+        print(f"Processing submission from {submitter_name} with {len(df)} rows")
+        if len(df) == 0:
+            return "Error: Uploaded CSV is empty.", None
         if set(df.columns) != {"id", "text"}:
+            return f"Error: CSV must contain exactly 'id' and 'text' columns. Found: {', '.join(df.columns)}", None
         if df["id"].duplicated().any():
+            dup_ids = df[df["id"].duplicated()]["id"].unique()
+            return f"Error: Duplicate IDs found: {', '.join(map(str, dup_ids[:5]))}", None
+        # Check if IDs match the reference dataset
+        missing_ids = set(references.keys()) - set(df["id"])
+        extra_ids = set(df["id"]) - set(references.keys())
+        if missing_ids:
+            return f"Error: Missing {len(missing_ids)} IDs in submission. First few missing: {', '.join(map(str, list(missing_ids)[:5]))}", None
+        if extra_ids:
+            return f"Error: Found {len(extra_ids)} extra IDs not in reference dataset. First few extra: {', '.join(map(str, list(extra_ids)[:5]))}", None
+        # Calculate WER and CER
+        try:
+            avg_wer, avg_cer, detailed_results = calculate_metrics(df)
+            # Debug information
+            print(f"Calculated metrics - WER: {avg_wer:.4f}, CER: {avg_cer:.4f}")
+            print(f"Processed {len(detailed_results)} valid samples")
+            # Check for suspiciously low values
+            if avg_wer < 0.001:
+                print("WARNING: WER is extremely low - likely an error")
+                return "Error: WER calculation yielded suspicious results (near-zero). Please check your submission CSV.", None
+        except Exception as e:
+            print(f"Error in metrics calculation: {str(e)}")
+            return f"Error calculating metrics: {str(e)}", None
         # Update the leaderboard
         leaderboard = pd.read_csv(leaderboard_file)
         leaderboard = pd.concat([leaderboard, new_entry]).sort_values("WER")
         leaderboard.to_csv(leaderboard_file, index=False)
+        return f"Submission processed successfully! WER: {avg_wer:.4f}, CER: {avg_cer:.4f}", leaderboard
     except Exception as e:
+        print(f"Error processing submission: {str(e)}")
         return f"Error processing submission: {str(e)}", None
 # Create the Gradio interface
     gr.Markdown(
         """
         # Bambara ASR Leaderboard
+        Upload a CSV file with 'id' and 'text' columns to evaluate your ASR predictions.
+        The 'id's must match those in the dataset.
         [View the dataset here](https://huggingface.co/datasets/MALIBA-AI/bambara_general_leaderboard_dataset).
         - **WER**: Word Error Rate (lower is better).
         - **CER**: Character Error Rate (lower is better).
         """
     )
     with gr.Row():
         submitter = gr.Textbox(label="Submitter Name or Model Name", placeholder="e.g., MALIBA-AI/asr")
         csv_upload = gr.File(label="Upload CSV File", file_types=[".csv"])
     submit_btn = gr.Button("Submit")
     output_msg = gr.Textbox(label="Status", interactive=False)
     leaderboard_display = gr.DataFrame(
         outputs=[output_msg, leaderboard_display]
     )
+# Print startup message
+print("Starting Bambara ASR Leaderboard app...")
+# Launch the app
+if __name__ == "__main__":
+    demo.launch(share=True)