Spaces:

MALIBA-AI
/

bambara-asr-leaderboard

Running

sudoping01 commited on Mar 24

Commit

9174c46

verified ·

1 Parent(s): da12542

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,19 +8,14 @@ import re
 from huggingface_hub import login
 token = os.environ.get("HG_TOKEN")
-print(f"Token exists: {token is not None}")
 login(token)
-# Load the Bambara ASR dataset
 print("Loading dataset...")
 dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
 references = {row["id"]: row["text"] for row in dataset}
-# Load or initialize the leaderboard
 leaderboard_file = "leaderboard.csv"
 if not os.path.exists(leaderboard_file):
     pd.DataFrame(columns=["submitter", "WER", "CER", "timestamp"]).to_csv(leaderboard_file, index=False)
@@ -41,11 +36,11 @@ def normalize_text(text):
     # Convert to lowercase
     text = text.lower()
-    # Remove punctuation, keeping spaces
-    text = re.sub(r'[^\w\s]', '', text)
-    # Normalize whitespace
-    text = re.sub(r'\s+', ' ', text).strip()
     return text

 from huggingface_hub import login
 token = os.environ.get("HG_TOKEN")
 login(token)
 print("Loading dataset...")
 dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
 references = {row["id"]: row["text"] for row in dataset}
 leaderboard_file = "leaderboard.csv"
 if not os.path.exists(leaderboard_file):
     pd.DataFrame(columns=["submitter", "WER", "CER", "timestamp"]).to_csv(leaderboard_file, index=False)
     # Convert to lowercase
     text = text.lower()
+    # # Remove punctuation, keeping spaces
+    # text = re.sub(r'[^\w\s]', '', text)
+    # # Normalize whitespace
+    # text = re.sub(r'\s+', ' ', text).strip()
     return text