File size: 5,501 Bytes
4c0593e 09b2769 15f66cd 61ba593 d521dce 378c937 61ba593 15f66cd 8414736 1dc0a7f d521dce 09b2769 1dc0a7f 752ce9b d521dce db6e0bb 15f66cd 1dc0a7f d521dce 1dc0a7f 15f66cd 1dc0a7f 61ba593 4c0593e 61ba593 db6e0bb 1dc0a7f 09b2769 15f66cd 09b2769 1dc0a7f d521dce 15f66cd 8414736 d521dce 15f66cd d521dce 15f66cd d521dce 15f66cd d521dce bb8566e 1dc0a7f 752ce9b 4c0593e 15f66cd 4c0593e 1dc0a7f 752ce9b 15f66cd 1dc0a7f 09b2769 1dc0a7f 15f66cd 09b2769 1dc0a7f 15f66cd 1dc0a7f 09b2769 15f66cd 1dc0a7f 15f66cd 1dc0a7f 09b2769 15f66cd 1dc0a7f 15f66cd 1dc0a7f 4c0593e 15f66cd 1dc0a7f 752ce9b 4c0593e 752ce9b 15f66cd 4c0593e d521dce 15f66cd 8414736 d521dce 1dc0a7f bb8566e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
import gradio as gr
from processing import run
import json # is only used if hf_login() is used
from huggingface_hub import login
import os
# LOG INTO HUGGING FACE
hf_token = os.getenv("HF_Token")
login(hf_token)
# I have used this function for logging into HF using a credentials file
# def hf_login():
# hf_token = os.getenv("HF_Token")
# if hf_token is None:
# with open("credentials.json", "r") as f:
# hf_token = json.load(f)["token"]
# login(token=hf_token, add_to_git_credential=True)
# hf_login()
# GENERAL OPTIONS FOR MODELS AND DATASETS
MODEL_OPTIONS = ["openai/whisper-tiny.en", "facebook/s2t-medium-librispeech-asr", "facebook/wav2vec2-base-960h","openai/whisper-large-v2","facebook/hf-seamless-m4t-medium"]
DATASET_OPTIONS = ["Common Voice", "Librispeech ASR clean", "Librispeech ASR other", "OWN Recording/Sample"]
# HELPER FUNCTIONS
def get_card(selected_model:str)->str:
"""
This function retrieves the markdown text displayed for each selected Model
"""
with open("cards.txt", "r") as f:
cards = f.read()
cards = cards.split("@@")
for card in cards:
if "ID: "+selected_model in card:
return card
return "## Unknown Model"
def is_own(selected_option):
"""
In case the User wants to record an own Sample, this function makes the Components visible
"""
if selected_option == "OWN Recording/Sample":
return gr.update(visible=True), gr.update(visible=True)
else:
return gr.update(visible=False), gr.update(visible=False)
def make_visible():
"""
This function makes the Components needed for displaying the Results visible
"""
return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
# Introduction and Information about the APP
INTRODUCTION = """### Welcome to ASR Model Comparison Hub! 🎉
Hey there, and welcome to an app designed just for developers like you, who are passionate about pushing the boundaries of Automatic Speech Recognition (ASR) technology!
Here, you can easily compare different ASR models by selecting a dataset and choosing two models from the dropdown to see how they stack up against each other. If you're feeling creative, go ahead and select 'OWN' as your dataset option to upload your own audio file or record something new right in the app. Don’t forget to provide a transcription, and the app will handle the rest!
ASR Model Comparison Hub uses the Word Error Rate (WER) ⬇️ (the lower the better) metric to give you a clear picture of each model's performance. And hey, don't miss out on checking the **Amazing Leaderboard** where you can see how a wide range of models have been evaluated—[Check it out here](https://huggingface.co/spaces/hf-audio/open_asr_leaderboard).
Happy experimenting and comparing! 🚀"""
# THE ACTUAL APP
with gr.Blocks() as demo:
gr.Markdown('# <p style="text-align: center;">ASR Model Comparison 💬</p>')
gr.Markdown(INTRODUCTION)
with gr.Row():
with gr.Column(scale=1):
pass
with gr.Column(scale=5):
# Select a Dataset to evaluate the Models on
data_subset = gr.Radio(
value="Common Voice",
choices=DATASET_OPTIONS,
label="Data subset / Own Sample",
)
# Components used to record an own sample
own_audio = gr.Audio(sources=['microphone'], visible=False, label=None)
own_transcription = gr.TextArea(lines=2, visible=False, label=None)
# Event Listiner to display the correct components
data_subset.change(is_own, inputs=[data_subset], outputs=[own_audio, own_transcription])
with gr.Column(scale=1):
pass
with gr.Row():
# This Column is for selecting the First Model
with gr.Column(scale=1):
model_1 = gr.Dropdown(
choices=MODEL_OPTIONS,
label=None
)
model_1_card = gr.Markdown("")
# This Columnis for selecting the Second Model
with gr.Column(scale=1):
model_2 = gr.Dropdown(
choices=MODEL_OPTIONS,
label=None
)
model_2_card = gr.Markdown("")
# Event Listiners if a model has been selected
model_1.change(get_card, inputs=model_1, outputs=model_1_card)
model_2.change(get_card, inputs=model_2, outputs=model_2_card)
# Main Action Button to start the Evaluation
eval_btn = gr.Button(
value="Evaluate",
variant="primary",
size="sm")
# This Section Displays the Evaluation Results
results_title = gr.Markdown(
'## <p style="text-align: center;">Results</p>',
visible=False
)
results_md = gr.Markdown("")
results_plot = gr.Plot(show_label=False, visible=False)
results_df = gr.DataFrame(
visible=False,
row_count=(5, "dynamic"), # Allow dynamic rows
interactive=False, # Allow users to interact with the DataFrame
wrap=True, # Ensure text wraps to multiple lines
)
# Event Listeners if the main aaction button has been trigered
eval_btn.click(make_visible, outputs=[results_plot, results_df, results_title])
eval_btn.click(run, [data_subset, model_1, model_2, own_audio, own_transcription], [results_md, results_plot, results_df], show_progress=False)
demo.launch(debug=True) |