Steveeeeeeen HF staff commited on
Commit
74703b5
·
verified ·
1 Parent(s): 2ba944a

Update constants.py

Browse files
Files changed (1) hide show
  1. constants.py +30 -8
constants.py CHANGED
@@ -12,14 +12,6 @@ banner_url = "https://huggingface.co/datasets/reach-vb/random-images/resolve/mai
12
  BANNER = f'<div style="display: flex; justify-content: space-around;"><img src="{banner_url}" alt="Banner" style="width: 40vw; min-width: 300px; max-width: 600px;"> </div>'
13
 
14
  EXPLANATION = """
15
- ## Why EdAcc Matters for ASR Evaluation
16
-
17
- The EdAcc dataset is specifically designed to evaluate the robustness of Automatic Speech Recognition (ASR) models across diverse accents and demographics. This leaderboard helps you:
18
-
19
- * **Assess Accent Fairness**: Compare model performance across 30+ different accents and speaker demographics
20
- * **Evaluate Real-World Robustness**: Understand how ASR models perform beyond standard benchmarks
21
- * **Make Informed Choices**: Select models that work well for your target demographics
22
-
23
  ### How to Read the Results
24
  * **Average WER ⬇️**: Lower Word Error Rate (WER) is better
25
  * **Average per Gender**: Average WER for each gender
@@ -29,6 +21,36 @@ EXPLANATION = """
29
  Use the column filter to focus on specific demographics or view all results together.
30
  """
31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  TITLE = "<html> <head> <style> h1 {text-align: center;} </style> </head> <body> <h1> 🤗 Open Automatic Speech Recognition Leaderboard </b> </body> </html>"
33
 
34
  INTRODUCTION_TEXT = "📐 Results on [EdAcc Dataset](https://huggingface.co/datasets/edinburghcstr/edacc) split by accents and gender. \
 
12
  BANNER = f'<div style="display: flex; justify-content: space-around;"><img src="{banner_url}" alt="Banner" style="width: 40vw; min-width: 300px; max-width: 600px;"> </div>'
13
 
14
  EXPLANATION = """
 
 
 
 
 
 
 
 
15
  ### How to Read the Results
16
  * **Average WER ⬇️**: Lower Word Error Rate (WER) is better
17
  * **Average per Gender**: Average WER for each gender
 
21
  Use the column filter to focus on specific demographics or view all results together.
22
  """
23
 
24
+ EXPLANATION_EDACC = """
25
+ ## EdAcc: Evaluating ASR Models Across Global English Accents
26
+
27
+ The Edinburgh International Accents of English Corpus (EdAcc) features over 40 distinct English accents from both native (L1) and non-native (L2) speakers. This evaluation helps you:
28
+
29
+ * **Compare Gender Performance**: Analyze how models perform across male and female speakers
30
+ * **Evaluate Regional Robustness**: Test model accuracy across European, Asian, African, and American accents
31
+ * **Assess Real-World Applicability**: Understand performance in natural conversational settings
32
+
33
+ The results show that:
34
+ * Larger models consistently outperform their smaller counterparts
35
+ * Multilingual models often handle accent diversity better than English-only variants
36
+ * Distilled models maintain good performance but show slight degradation on challenging accents
37
+ """
38
+
39
+ EXPLANATION_AFRI = """
40
+ ## AfriSpeech: Testing ASR Robustness on African English Accents
41
+
42
+ The AfriSpeech Out-of-Distribution (OOD) test set features 20 distinct African English accents not present in common training data. This benchmark:
43
+
44
+ * **Challenges Model Generalization**: Tests performance on truly underrepresented accents
45
+ * **Reveals Robustness Gaps**: Highlights limitations in current ASR systems
46
+ * **Guides Improvement**: Identifies areas needing focused development
47
+
48
+ Key findings show:
49
+ * Full-sized models significantly outperform distilled versions
50
+ * Multilingual models demonstrate better generalization to African accents
51
+ * Even top performers show room for improvement on these challenging accents
52
+ """
53
+
54
  TITLE = "<html> <head> <style> h1 {text-align: center;} </style> </head> <body> <h1> 🤗 Open Automatic Speech Recognition Leaderboard </b> </body> </html>"
55
 
56
  INTRODUCTION_TEXT = "📐 Results on [EdAcc Dataset](https://huggingface.co/datasets/edinburghcstr/edacc) split by accents and gender. \