IPA-Transcription-EN

Running

App Files Files Community

arunasrivastava commited on Dec 4, 2024

Commit

b957022

1 Parent(s): 4b2532b

front end edits

Browse files

Files changed (6) hide show

__pycache__/constants.cpython-310.pyc +0 -0
__pycache__/init.cpython-310.pyc +0 -0
__pycache__/utils_display.cpython-310.pyc +0 -0
app.py +2 -10
constants.py +6 -19
utils_display.py +1 -9

__pycache__/constants.cpython-310.pyc ADDED Viewed

Binary file (8.44 kB). View file

__pycache__/init.cpython-310.pyc ADDED Viewed

Binary file (3.11 kB). View file

__pycache__/utils_display.cpython-310.pyc ADDED Viewed

Binary file (2.16 kB). View file

app.py CHANGED Viewed

@@ -10,16 +10,8 @@ LAST_UPDATED = "Nov 22th 2024"
 column_names = {
     "MODEL": "Model",
-    "Avg. WER": "Average WER ⬇️",
-    "RTFx": "RTFx ⬆️️",
-    "AMI WER": "AMI",
-    "Earnings22 WER": "Earnings22",
-    "Gigaspeech WER": "Gigaspeech",
-    "LS Clean WER": "LS Clean",
-    "LS Other WER": "LS Other",
-    "SPGISpeech WER": "SPGISpeech",
-    "Tedlium WER": "Tedlium",
-    "Voxpopuli WER": "Voxpopuli",
 }
 eval_queue_repo, requested_models, csv_results = load_all_info_from_dataset_hub()

 column_names = {
     "MODEL": "Model",
+    "Avg. PER": "Average PER ⬇️",
+    "Avg. PWED": "Avg. PWED ⬆️️",
 }
 eval_queue_repo, requested_models, csv_results = load_all_info_from_dataset_hub()

constants.py CHANGED Viewed

@@ -11,22 +11,14 @@ EVAL_REQUESTS_PATH = Path("eval_requests")
 banner_url = "https://huggingface.co/datasets/reach-vb/random-images/resolve/main/asr_leaderboard.png"
 BANNER = f'<div style="display: flex; justify-content: space-around;"><img src="{banner_url}" alt="Banner" style="width: 40vw; min-width: 300px; max-width: 600px;"> </div>'
-TITLE = "<html> <head> <style> h1 {text-align: center;} </style> </head> <body> <h1> 🤗 Open Automatic Speech Recognition Leaderboard </b> </body> </html>"
-INTRODUCTION_TEXT = "📐 The 🤗 Open ASR Leaderboard ranks and evaluates speech recognition models \
     on the Hugging Face Hub. \
-    \nWe report the Average [WER](https://huggingface.co/spaces/evaluate-metric/wer) (⬇️ lower the better) and [RTFx](https://github.com/NVIDIA/DeepLearningExamples/blob/master/Kaldi/SpeechRecognition/README.md#metrics) (⬆️ higher the better). Models are ranked based on their Average WER, from lowest to highest. Check the 📈 Metrics tab to understand how the models are evaluated. \
     \nIf you want results for a model that is not listed here, you can submit a request for it to be included ✉️✨. \
     \nThe leaderboard currently focuses on English speech recognition, and will be expanded to multilingual evaluation in later versions."
-CITATION_TEXT = """@misc{open-asr-leaderboard,
-	title        = {Open Automatic Speech Recognition Leaderboard},
-	author       = {Srivastav, Vaibhav and Majumdar, Somshubra and Koluguri, Nithin and Moumen, Adel and Gandhi, Sanchit and others},
-	year         = 2023,
-	publisher    = {Hugging Face},
-	howpublished = "\\url{https://huggingface.co/spaces/hf-audio/open_asr_leaderboard}"
-}
-"""
 METRICS_TAB_TEXT = """
 Here you will find details about the speech recognition metrics and datasets reported in our leaderboard.
@@ -101,16 +93,11 @@ a model is likely to perform on downstream ASR compared to evaluating it on one
 The ESB score is calculated as a macro-average of the WER scores across the ESB datasets. The models in the leaderboard
 are ranked based on their average WER scores, from lowest to highest.
 | Dataset                                                                                 | Domain                      | Speaking Style        | Train (h) | Dev (h) | Test (h) | Transcriptions     | License         |
 |-----------------------------------------------------------------------------------------|-----------------------------|-----------------------|-----------|---------|----------|--------------------|-----------------|
-| [LibriSpeech](https://huggingface.co/datasets/librispeech_asr)                          | Audiobook                   | Narrated              | 960       | 11      | 11       | Normalised         | CC-BY-4.0       |
-| [VoxPopuli](https://huggingface.co/datasets/facebook/voxpopuli)                         | European Parliament         | Oratory               | 523       | 5       | 5        | Punctuated         | CC0             |
-| [TED-LIUM](https://huggingface.co/datasets/LIUM/tedlium)                                | TED talks                   | Oratory               | 454       | 2       | 3        | Normalised         | CC-BY-NC-ND 3.0 |
-| [GigaSpeech](https://huggingface.co/datasets/speechcolab/gigaspeech)                    | Audiobook, podcast, YouTube | Narrated, spontaneous | 2500      | 12      | 40       | Punctuated         | apache-2.0      |
-| [SPGISpeech](https://huggingface.co/datasets/kensho/spgispeech)                         | Financial meetings          | Oratory, spontaneous  | 4900      | 100     | 100      | Punctuated & Cased | User Agreement  |
-| [Earnings-22](https://huggingface.co/datasets/revdotcom/earnings22)                     | Financial meetings          | Oratory, spontaneous  | 105       | 5       | 5        | Punctuated & Cased | CC-BY-SA-4.0    |
-| [AMI](https://huggingface.co/datasets/edinburghcstr/ami)                                | Meetings                    | Spontaneous           | 78        | 9       | 9        | Punctuated & Cased | CC-BY-4.0       |
 For more details on the individual datasets and how models are evaluated to give the ESB score, refer to the [ESB paper](https://arxiv.org/abs/2210.13352).
 """

 banner_url = "https://huggingface.co/datasets/reach-vb/random-images/resolve/main/asr_leaderboard.png"
 BANNER = f'<div style="display: flex; justify-content: space-around;"><img src="{banner_url}" alt="Banner" style="width: 40vw; min-width: 300px; max-width: 600px;"> </div>'
+TITLE = "<html> <head> <style> h1 {text-align: center;} </style> </head> <body> <h1> 🤗 IPA Transcription Leaderboard </b> </body> </html>"
+INTRODUCTION_TEXT = "📐 The 🤗 IPA transcription Leaderboard ranks and evaluates speech recognition models \
     on the Hugging Face Hub. \
+    \nWe report the Average [PER](https://huggingface.co/spaces/evaluate-metric/wer) (⬇️ lower the better) and [RTFx](https://github.com/NVIDIA/DeepLearningExamples/blob/master/Kaldi/SpeechRecognition/README.md#metrics) (⬆️ higher the better). Models are ranked based on their Average WER, from lowest to highest. Check the 📈 Metrics tab to understand how the models are evaluated. \
     \nIf you want results for a model that is not listed here, you can submit a request for it to be included ✉️✨. \
     \nThe leaderboard currently focuses on English speech recognition, and will be expanded to multilingual evaluation in later versions."
 METRICS_TAB_TEXT = """
 Here you will find details about the speech recognition metrics and datasets reported in our leaderboard.
 The ESB score is calculated as a macro-average of the WER scores across the ESB datasets. The models in the leaderboard
 are ranked based on their average WER scores, from lowest to highest.
+We are currently working to add and curate more datasets. Right now, models will be evaluated just on the TIMIT test dataset for phoneme transcription.
 | Dataset                                                                                 | Domain                      | Speaking Style        | Train (h) | Dev (h) | Test (h) | Transcriptions     | License         |
 |-----------------------------------------------------------------------------------------|-----------------------------|-----------------------|-----------|---------|----------|--------------------|-----------------|
+| [TIMIT Dataset](https://www.kaggle.com/datasets/mfekadu/darpa-timit-acousticphonetic-continuous-speech)                          | Audiobook                   | Narrated              | 960       | 11      | 11       | Normalised         | CC-BY-4.0       |
 For more details on the individual datasets and how models are evaluated to give the ESB score, refer to the [ESB paper](https://arxiv.org/abs/2210.13352).
 """

utils_display.py CHANGED Viewed

@@ -14,15 +14,7 @@ def fields(raw_class):
 class AutoEvalColumn: # Auto evals column
     model = ColumnContent("Model", "markdown")
     avg_wer = ColumnContent("Average WER ⬇️", "number")
-    rtf = ColumnContent("RTFx ⬆️️", "number")
-    ami_wer = ColumnContent("AMI", "number")
-    e22_wer = ColumnContent("Earnings22", "number")
-    gs_wer = ColumnContent("Gigaspeech", "number")
-    lsc_wer = ColumnContent("LS Clean", "number")
-    lso_wer = ColumnContent("LS Other", "number")
-    ss_wer = ColumnContent("SPGISpeech", "number")
-    tl_wer = ColumnContent("Tedlium", "number")
-    vp_wer = ColumnContent("Voxpopuli", "number")
 def make_clickable_model(model_name):

 class AutoEvalColumn: # Auto evals column
     model = ColumnContent("Model", "markdown")
     avg_wer = ColumnContent("Average WER ⬇️", "number")
+    avg_wped = ColumnContent("Average PWED ⬇️", "number")
 def make_clickable_model(model_name):