sudoping01 commited on
Commit
3bdb09a
·
verified ·
1 Parent(s): d60aa58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -10
app.py CHANGED
@@ -5,7 +5,7 @@ from jiwer import wer, cer, transforms
5
  import os
6
  from datetime import datetime
7
 
8
-
9
  transform = transforms.Compose([
10
  transforms.RemovePunctuation(),
11
  transforms.ToLowerCase(),
@@ -22,9 +22,8 @@ if not os.path.exists(leaderboard_file):
22
  pd.DataFrame(columns=["submitter", "WER", "CER", "timestamp"]).to_csv(leaderboard_file, index=False)
23
 
24
  def process_submission(submitter_name, csv_file):
25
-
26
  try:
27
- # Read and validate the uploaded CSV
28
  df = pd.read_csv(csv_file)
29
  if set(df.columns) != {"id", "text"}:
30
  return "Error: CSV must contain exactly 'id' and 'text' columns.", None
@@ -33,19 +32,19 @@ def process_submission(submitter_name, csv_file):
33
  if set(df["id"]) != set(references.keys()):
34
  return "Error: CSV 'id's must match the dataset 'id's.", None
35
 
36
- # Calculate WER and CER for each prediction
37
  wers, cers = [], []
38
  for _, row in df.iterrows():
39
  ref = references[row["id"]]
40
  pred = row["text"]
41
- wers.append(wer(ref, pred, standardize=transform))
42
- cers.append(cer(ref, pred, standardize=transform))
43
 
44
- # Compute average WER and CER
45
  avg_wer = sum(wers) / len(wers)
46
  avg_cer = sum(cers) / len(cers)
47
 
48
- # Update the leaderboard
49
  leaderboard = pd.read_csv(leaderboard_file)
50
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
51
  new_entry = pd.DataFrame(
@@ -59,7 +58,7 @@ def process_submission(submitter_name, csv_file):
59
  except Exception as e:
60
  return f"Error processing submission: {str(e)}", None
61
 
62
- # Create the Gradio interface
63
  with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
64
  gr.Markdown(
65
  """
@@ -73,7 +72,7 @@ with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
73
  """
74
  )
75
  with gr.Row():
76
- submitter = gr.Textbox(label="Submitter Name or Model Name", placeholder="e.g., MALIBA-AI/asr")
77
  csv_upload = gr.File(label="Upload CSV File", file_types=[".csv"])
78
  submit_btn = gr.Button("Submit")
79
  output_msg = gr.Textbox(label="Status", interactive=False)
 
5
  import os
6
  from datetime import datetime
7
 
8
+ # Define text normalization transform
9
  transform = transforms.Compose([
10
  transforms.RemovePunctuation(),
11
  transforms.ToLowerCase(),
 
22
  pd.DataFrame(columns=["submitter", "WER", "CER", "timestamp"]).to_csv(leaderboard_file, index=False)
23
 
24
  def process_submission(submitter_name, csv_file):
 
25
  try:
26
+
27
  df = pd.read_csv(csv_file)
28
  if set(df.columns) != {"id", "text"}:
29
  return "Error: CSV must contain exactly 'id' and 'text' columns.", None
 
32
  if set(df["id"]) != set(references.keys()):
33
  return "Error: CSV 'id's must match the dataset 'id's.", None
34
 
35
+
36
  wers, cers = [], []
37
  for _, row in df.iterrows():
38
  ref = references[row["id"]]
39
  pred = row["text"]
40
+ wers.append(wer(ref, pred, truth_transform=transform, hypothesis_transform=transform))
41
+ cers.append(cer(ref, pred, truth_transform=transform, hypothesis_transform=transform))
42
 
43
+
44
  avg_wer = sum(wers) / len(wers)
45
  avg_cer = sum(cers) / len(cers)
46
 
47
+
48
  leaderboard = pd.read_csv(leaderboard_file)
49
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
50
  new_entry = pd.DataFrame(
 
58
  except Exception as e:
59
  return f"Error processing submission: {str(e)}", None
60
 
61
+
62
  with gr.Blocks(title="Bambara ASR Leaderboard") as demo:
63
  gr.Markdown(
64
  """
 
72
  """
73
  )
74
  with gr.Row():
75
+ submitter = gr.Textbox(label="Submitter Name or Model Name", placeholder="e.g., MALIBA-AI/asr")
76
  csv_upload = gr.File(label="Upload CSV File", file_types=[".csv"])
77
  submit_btn = gr.Button("Submit")
78
  output_msg = gr.Textbox(label="Status", interactive=False)