sudoping01 commited on
Commit
3efa4cc
·
verified ·
1 Parent(s): 5d4699a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -9
app.py CHANGED
@@ -25,21 +25,35 @@ def process_submission(submitter_name, csv_file):
25
  try:
26
  # Read and validate the uploaded CSV
27
  df = pd.read_csv(csv_file)
 
28
  if set(df.columns) != {"id", "text"}:
29
  return "Error: CSV must contain exactly 'id' and 'text' columns.", None
 
30
  if df["id"].duplicated().any():
31
  return "Error: Duplicate 'id's found in the CSV.", None
 
32
  if set(df["id"]) != set(references.keys()):
33
  return "Error: CSV 'id's must match the dataset 'id's.", None
34
-
35
  # Calculate WER and CER for each prediction
36
  wers, cers = [], []
37
- for _, row in df.iterrows():
38
- ref = references[row["id"]]
39
- pred = row["text"]
40
- wers.append(wer(ref, pred, truth_transform=transform, hypothesis_transform=transform))
41
- cers.append(cer(ref, pred, truth_transform=transform, hypothesis_transform=transform))
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  # Compute average WER and CER
44
  avg_wer = sum(wers) / len(wers)
45
  avg_cer = sum(cers) / len(cers)
@@ -55,6 +69,7 @@ def process_submission(submitter_name, csv_file):
55
  leaderboard.to_csv(leaderboard_file, index=False)
56
 
57
  return "Submission processed successfully!", leaderboard
 
58
  except Exception as e:
59
  return f"Error processing submission: {str(e)}", None
60
 
@@ -63,17 +78,18 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
63
  gr.Markdown(
64
  """
65
  # Bambara ASR Leaderboard
66
- Upload a CSV file with 'id' and 'text' columns to evaluate your ASR predictions.
67
- The 'id's must match those in the dataset.
68
  [View the dataset here](https://huggingface.co/datasets/MALIBA-AI/bambara_general_leaderboard_dataset).
69
-
70
  - **WER**: Word Error Rate (lower is better).
71
  - **CER**: Character Error Rate (lower is better).
72
  """
73
  )
 
74
  with gr.Row():
75
  submitter = gr.Textbox(label="Submitter Name or Model Name", placeholder="e.g., MALIBA-AI/asr")
76
  csv_upload = gr.File(label="Upload CSV File", file_types=[".csv"])
 
77
  submit_btn = gr.Button("Submit")
78
  output_msg = gr.Textbox(label="Status", interactive=False)
79
  leaderboard_display = gr.DataFrame(
 
25
  try:
26
  # Read and validate the uploaded CSV
27
  df = pd.read_csv(csv_file)
28
+
29
  if set(df.columns) != {"id", "text"}:
30
  return "Error: CSV must contain exactly 'id' and 'text' columns.", None
31
+
32
  if df["id"].duplicated().any():
33
  return "Error: Duplicate 'id's found in the CSV.", None
34
+
35
  if set(df["id"]) != set(references.keys()):
36
  return "Error: CSV 'id's must match the dataset 'id's.", None
37
+
38
  # Calculate WER and CER for each prediction
39
  wers, cers = [], []
 
 
 
 
 
40
 
41
+ for _, row in df.iterrows():
42
+ ref = str(references[row["id"]]) # Ensure reference is a string
43
+ pred = str(row["text"]) # Ensure prediction is a string
44
+
45
+ # Apply transformation directly to the text strings before WER/CER calculation
46
+ ref_transformed = " ".join(transform(ref).split())
47
+ pred_transformed = " ".join(transform(pred).split())
48
+
49
+ # Check if transformation produced valid result
50
+ if not ref_transformed or not pred_transformed:
51
+ return f"Error: Empty string after transformation for id {row['id']}", None
52
+
53
+ # Calculate metrics without transform parameter (we pre-transformed)
54
+ wers.append(wer(ref_transformed, pred_transformed))
55
+ cers.append(cer(ref_transformed, pred_transformed))
56
+
57
  # Compute average WER and CER
58
  avg_wer = sum(wers) / len(wers)
59
  avg_cer = sum(cers) / len(cers)
 
69
  leaderboard.to_csv(leaderboard_file, index=False)
70
 
71
  return "Submission processed successfully!", leaderboard
72
+
73
  except Exception as e:
74
  return f"Error processing submission: {str(e)}", None
75
 
 
78
  gr.Markdown(
79
  """
80
  # Bambara ASR Leaderboard
81
+ Upload a CSV file with 'id' and 'text' columns to evaluate your ASR predictions.
82
+ The 'id's must match those in the dataset.
83
  [View the dataset here](https://huggingface.co/datasets/MALIBA-AI/bambara_general_leaderboard_dataset).
 
84
  - **WER**: Word Error Rate (lower is better).
85
  - **CER**: Character Error Rate (lower is better).
86
  """
87
  )
88
+
89
  with gr.Row():
90
  submitter = gr.Textbox(label="Submitter Name or Model Name", placeholder="e.g., MALIBA-AI/asr")
91
  csv_upload = gr.File(label="Upload CSV File", file_types=[".csv"])
92
+
93
  submit_btn = gr.Button("Submit")
94
  output_msg = gr.Textbox(label="Status", interactive=False)
95
  leaderboard_display = gr.DataFrame(