sudoping01 commited on
Commit
9174c46
·
verified ·
1 Parent(s): da12542

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -9
app.py CHANGED
@@ -8,19 +8,14 @@ import re
8
 
9
  from huggingface_hub import login
10
 
11
-
12
  token = os.environ.get("HG_TOKEN")
13
 
14
- print(f"Token exists: {token is not None}")
15
-
16
  login(token)
17
 
18
- # Load the Bambara ASR dataset
19
  print("Loading dataset...")
20
  dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
21
  references = {row["id"]: row["text"] for row in dataset}
22
 
23
- # Load or initialize the leaderboard
24
  leaderboard_file = "leaderboard.csv"
25
  if not os.path.exists(leaderboard_file):
26
  pd.DataFrame(columns=["submitter", "WER", "CER", "timestamp"]).to_csv(leaderboard_file, index=False)
@@ -41,11 +36,11 @@ def normalize_text(text):
41
  # Convert to lowercase
42
  text = text.lower()
43
 
44
- # Remove punctuation, keeping spaces
45
- text = re.sub(r'[^\w\s]', '', text)
46
 
47
- # Normalize whitespace
48
- text = re.sub(r'\s+', ' ', text).strip()
49
 
50
  return text
51
 
 
8
 
9
  from huggingface_hub import login
10
 
 
11
  token = os.environ.get("HG_TOKEN")
12
 
 
 
13
  login(token)
14
 
 
15
  print("Loading dataset...")
16
  dataset = load_dataset("sudoping01/bambara-speech-recognition-benchmark", name="default")["eval"]
17
  references = {row["id"]: row["text"] for row in dataset}
18
 
 
19
  leaderboard_file = "leaderboard.csv"
20
  if not os.path.exists(leaderboard_file):
21
  pd.DataFrame(columns=["submitter", "WER", "CER", "timestamp"]).to_csv(leaderboard_file, index=False)
 
36
  # Convert to lowercase
37
  text = text.lower()
38
 
39
+ # # Remove punctuation, keeping spaces
40
+ # text = re.sub(r'[^\w\s]', '', text)
41
 
42
+ # # Normalize whitespace
43
+ # text = re.sub(r'\s+', ' ', text).strip()
44
 
45
  return text
46