Spaces:

MALIBA-AI
/

bambara-asr-leaderboard

Running

App Files Files Community

sudoping01 commited on Mar 15

Commit

3efa4cc

verified ·

1 Parent(s): 5d4699a

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -9

app.py CHANGED Viewed

@@ -25,21 +25,35 @@ def process_submission(submitter_name, csv_file):
     try:
         # Read and validate the uploaded CSV
         df = pd.read_csv(csv_file)
         if set(df.columns) != {"id", "text"}:
             return "Error: CSV must contain exactly 'id' and 'text' columns.", None
         if df["id"].duplicated().any():
             return "Error: Duplicate 'id's found in the CSV.", None
         if set(df["id"]) != set(references.keys()):
             return "Error: CSV 'id's must match the dataset 'id's.", None
         # Calculate WER and CER for each prediction
         wers, cers = [], []
-        for _, row in df.iterrows():
-            ref = references[row["id"]]
-            pred = row["text"]
-            wers.append(wer(ref, pred, truth_transform=transform, hypothesis_transform=transform))
-            cers.append(cer(ref, pred, truth_transform=transform, hypothesis_transform=transform))
         # Compute average WER and CER
         avg_wer = sum(wers) / len(wers)
         avg_cer = sum(cers) / len(cers)
@@ -55,6 +69,7 @@ def process_submission(submitter_name, csv_file):
         leaderboard.to_csv(leaderboard_file, index=False)
         return "Submission processed successfully!", leaderboard
     except Exception as e:
         return f"Error processing submission: {str(e)}", None
@@ -63,17 +78,18 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
     gr.Markdown(
         """
         # Bambara ASR Leaderboard
-        Upload a CSV file with 'id' and 'text' columns to evaluate your ASR predictions.
-        The 'id's must match those in the dataset.
         [View the dataset here](https://huggingface.co/datasets/MALIBA-AI/bambara_general_leaderboard_dataset).
         - **WER**: Word Error Rate (lower is better).
         - **CER**: Character Error Rate (lower is better).
         """
     )
     with gr.Row():
         submitter = gr.Textbox(label="Submitter Name or Model Name", placeholder="e.g., MALIBA-AI/asr")
         csv_upload = gr.File(label="Upload CSV File", file_types=[".csv"])
     submit_btn = gr.Button("Submit")
     output_msg = gr.Textbox(label="Status", interactive=False)
     leaderboard_display = gr.DataFrame(

     try:
         # Read and validate the uploaded CSV
         df = pd.read_csv(csv_file)
         if set(df.columns) != {"id", "text"}:
             return "Error: CSV must contain exactly 'id' and 'text' columns.", None
         if df["id"].duplicated().any():
             return "Error: Duplicate 'id's found in the CSV.", None
         if set(df["id"]) != set(references.keys()):
             return "Error: CSV 'id's must match the dataset 'id's.", None
         # Calculate WER and CER for each prediction
         wers, cers = [], []
+        for _, row in df.iterrows():
+            ref = str(references[row["id"]])  # Ensure reference is a string
+            pred = str(row["text"])  # Ensure prediction is a string
+            # Apply transformation directly to the text strings before WER/CER calculation
+            ref_transformed = " ".join(transform(ref).split())
+            pred_transformed = " ".join(transform(pred).split())
+            # Check if transformation produced valid result
+            if not ref_transformed or not pred_transformed:
+                return f"Error: Empty string after transformation for id {row['id']}", None
+            # Calculate metrics without transform parameter (we pre-transformed)
+            wers.append(wer(ref_transformed, pred_transformed))
+            cers.append(cer(ref_transformed, pred_transformed))
         # Compute average WER and CER
         avg_wer = sum(wers) / len(wers)
         avg_cer = sum(cers) / len(cers)
         leaderboard.to_csv(leaderboard_file, index=False)
         return "Submission processed successfully!", leaderboard
     except Exception as e:
         return f"Error processing submission: {str(e)}", None
     gr.Markdown(
         """
         # Bambara ASR Leaderboard
+        Upload a CSV file with 'id' and 'text' columns to evaluate your ASR predictions.
+        The 'id's must match those in the dataset.
         [View the dataset here](https://huggingface.co/datasets/MALIBA-AI/bambara_general_leaderboard_dataset).
         - **WER**: Word Error Rate (lower is better).
         - **CER**: Character Error Rate (lower is better).
         """
     )
     with gr.Row():
         submitter = gr.Textbox(label="Submitter Name or Model Name", placeholder="e.g., MALIBA-AI/asr")
         csv_upload = gr.File(label="Upload CSV File", file_types=[".csv"])
     submit_btn = gr.Button("Submit")
     output_msg = gr.Textbox(label="Status", interactive=False)
     leaderboard_display = gr.DataFrame(