File size: 4,464 Bytes
4c0593e
09b2769
d521dce
61ba593
 
 
d521dce
 
 
 
378c937
61ba593
 
8414736
 
 
 
 
 
1dc0a7f
d521dce
09b2769
1dc0a7f
752ce9b
 
 
d521dce
8cfce12
234fe59
1dc0a7f
d521dce
1dc0a7f
 
61ba593
 
4c0593e
61ba593
 
 
 
 
 
1dc0a7f
09b2769
 
 
 
 
1dc0a7f
d521dce
8414736
d521dce
 
 
 
 
 
bb8566e
1dc0a7f
752ce9b
4c0593e
8cfce12
 
 
4c0593e
 
 
 
 
 
 
 
 
 
 
 
 
1dc0a7f
 
 
 
752ce9b
1dc0a7f
09b2769
 
1dc0a7f
 
8414736
09b2769
 
1dc0a7f
 
 
 
 
 
 
 
09b2769
752ce9b
1dc0a7f
 
 
 
 
09b2769
752ce9b
1dc0a7f
 
 
 
 
 
4c0593e
1dc0a7f
 
 
752ce9b
4c0593e
752ce9b
8414736
4c0593e
d521dce
 
 
 
 
 
 
8414736
d521dce
1dc0a7f
bb8566e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import gradio as gr
from processing import run
import json
from huggingface_hub import login
import os




# LOG INTO HUGGING FACE
hf_token = os.getenv("HF_Token")
login(hf_token)

# def hf_login():
#     hf_token = os.getenv("HF_Token")
#     if hf_token is None:
#         with open("credentials.json", "r") as f:
#             hf_token = json.load(f)["token"]
#     login(token=hf_token, add_to_git_credential=True)

# hf_login()





# GENERAL OPTIONS FOR MODELS AND DATASETS
MODEL_OPTIONS = ["openai/whisper-tiny.en", "facebook/s2t-medium-librispeech-asr", "facebook/wav2vec2-base-960h","openai/whisper-large-v2"]
DATASET_OPTIONS = ["Common Voice", "Librispeech ASR clean", "Librispeech ASR other", "OWN Recoding/Sample"]

# HELPER FUNCTIONS
def get_card(selected_model:str)->str:

    with open("cards.txt", "r") as f:
        cards = f.read()
    
    cards = cards.split("@@")
    for card in cards:
        if "ID: "+selected_model in card:
            return card

    return "Unknown Model"

def is_own(selected_option):
    if selected_option == "OWN Recoding/Sample":
        return gr.update(visible=True), gr.update(visible=True)
    else:
        return gr.update(visible=False), gr.update(visible=False)

def make_visible():
    return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)





# THE ACTUAL APP
with gr.Blocks() as demo:


    gr.Markdown('# <p style="text-align: center;">ASR Model Comparison 💬</p>')
    gr.Markdown("""

""")


    gr.Markdown("""### Welcome to ASR Model Comparison Hub! 🎉

Hey there, and welcome to an app designed just for developers like you, who are passionate about pushing the boundaries of Automatic Speech Recognition (ASR) technology!

Here, you can easily compare different ASR models by selecting a dataset and choosing two models from the dropdown to see how they stack up against each other. If you're feeling creative, go ahead and select 'OWN' as your dataset option to upload your own audio file or record something new right in the app. Don’t forget to provide a transcription, and the app will handle the rest!

ASR Model Comparison Hub uses the Word Error Rate (WER) ⬇️ (the lower the better) metric to give you a clear picture of each model's performance. And hey, don't miss out on checking the **Amazing Leaderboard** where you can see how a wide range of models have been evaluated—[Check it out here](https://huggingface.co/spaces/hf-audio/open_asr_leaderboard).

Happy experimenting and comparing! 🚀""")

    

    with gr.Row():
        with gr.Column(scale=1):
            pass
        with gr.Column(scale=5):
            data_subset = gr.Radio(
                value="Common Voice",
                choices=DATASET_OPTIONS,
                label="Data subset / Own Sample",
            )
            own_audio = gr.Audio(sources=['microphone'], visible=False)
            own_transcription = gr.TextArea(lines=2, visible=False)
            data_subset.change(is_own, inputs=[data_subset], outputs=[own_audio, own_transcription])
        with gr.Column(scale=1):
            pass


    with gr.Row():

        with gr.Column(scale=1):
            model_1 = gr.Dropdown(
                choices=MODEL_OPTIONS,
                label="Select Model"
            )
            model_1_card = gr.Markdown("")

        with gr.Column(scale=1):
            model_2 = gr.Dropdown(
                choices=MODEL_OPTIONS,
                label="Select Model"
            )
            model_2_card = gr.Markdown("")


        model_1.change(get_card, inputs=model_1, outputs=model_1_card)
        model_2.change(get_card, inputs=model_2, outputs=model_2_card)


    eval_btn = gr.Button(
        value="Evaluate",
        variant="primary",
        size="sm")
    
    results_title = gr.Markdown('## <p style="text-align: center;">Results</p>', visible=False)
    results_md = gr.Markdown("")
    results_plot = gr.Plot(show_label=False, visible=False)
    results_df = gr.DataFrame(
        visible=False,
        row_count=(5, "dynamic"),  # Allow dynamic rows
        interactive=False,  # Allow users to interact with the DataFrame
        wrap=True,  # Ensure text wraps to multiple lines
    )
    eval_btn.click(make_visible, outputs=[results_plot, results_df, results_title])
    eval_btn.click(run, [data_subset, model_1, model_2, own_audio, own_transcription], [results_md, results_plot, results_df], show_progress=False)

demo.launch(debug=True)