Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
add longform tab
Browse files
app.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
import json
|
| 4 |
-
from constants import BANNER, INTRODUCTION_TEXT, CITATION_TEXT, METRICS_TAB_TEXT, DIR_OUTPUT_REQUESTS, LEADERBOARD_CSS, EU_LANGUAGES, MULTILINGUAL_TAB_TEXT
|
| 5 |
from init import is_model_on_hub, upload_file, load_all_info_from_dataset_hub
|
| 6 |
-
from utils_display import AutoEvalColumn, MultilingualColumn, fields, make_clickable_model, styled_error, styled_message
|
| 7 |
import numpy as np
|
| 8 |
from datetime import datetime, timezone
|
| 9 |
|
|
@@ -27,7 +27,7 @@ column_names = {
|
|
| 27 |
"Voxpopuli WER": "Voxpopuli",
|
| 28 |
}
|
| 29 |
|
| 30 |
-
eval_queue_repo, requested_models, csv_results, multilingual_csv_path = load_all_info_from_dataset_hub()
|
| 31 |
|
| 32 |
if not csv_results.exists():
|
| 33 |
raise Exception(f"CSV file {csv_results} does not exist locally")
|
|
@@ -57,6 +57,10 @@ TYPES = [c.type for c in fields(AutoEvalColumn)]
|
|
| 57 |
# Multilingual columns (dynamic based on expansion state)
|
| 58 |
MULTILINGUAL_COLS = [c.name for c in fields(MultilingualColumn)]
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
def create_multilingual_dataframe():
|
| 61 |
"""Create multilingual dataframe with CoVoST, MLS, and FLEURS benchmark data"""
|
| 62 |
global benchmark_details, expanded_languages
|
|
@@ -225,6 +229,43 @@ def toggle_language_expansion(language_code):
|
|
| 225 |
# Initialize multilingual dataframe
|
| 226 |
multilingual_df = create_multilingual_dataframe()
|
| 227 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
def request_model(model_text, chbcoco2017):
|
| 230 |
|
|
@@ -363,10 +404,21 @@ with gr.Blocks(css=LEADERBOARD_CSS) as demo:
|
|
| 363 |
outputs=[multilingual_table]
|
| 364 |
)
|
| 365 |
|
| 366 |
-
with gr.TabItem("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text")
|
| 368 |
|
| 369 |
-
with gr.TabItem("βοΈβ¨ Request a model here!", elem_id="od-benchmark-tab-table", id=
|
| 370 |
with gr.Column():
|
| 371 |
gr.Markdown("# βοΈβ¨ Request results for a new model here!", elem_classes="markdown-text")
|
| 372 |
with gr.Column():
|
|
@@ -381,7 +433,7 @@ with gr.Blocks(css=LEADERBOARD_CSS) as demo:
|
|
| 381 |
[model_name_textbox, chb_coco2017],
|
| 382 |
mdw_submission_result)
|
| 383 |
# add an about section
|
| 384 |
-
with gr.TabItem("π€ About", elem_id="od-benchmark-tab-table", id=
|
| 385 |
gr.Markdown("## About", elem_classes="markdown-text")
|
| 386 |
|
| 387 |
gr.Markdown(f"Last updated on **{LAST_UPDATED}**", elem_classes="markdown-text")
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
import json
|
| 4 |
+
from constants import BANNER, INTRODUCTION_TEXT, CITATION_TEXT, METRICS_TAB_TEXT, DIR_OUTPUT_REQUESTS, LEADERBOARD_CSS, EU_LANGUAGES, MULTILINGUAL_TAB_TEXT, LONGFORM_TAB_TEXT
|
| 5 |
from init import is_model_on_hub, upload_file, load_all_info_from_dataset_hub
|
| 6 |
+
from utils_display import AutoEvalColumn, MultilingualColumn, LongformColumn, fields, make_clickable_model, styled_error, styled_message
|
| 7 |
import numpy as np
|
| 8 |
from datetime import datetime, timezone
|
| 9 |
|
|
|
|
| 27 |
"Voxpopuli WER": "Voxpopuli",
|
| 28 |
}
|
| 29 |
|
| 30 |
+
eval_queue_repo, requested_models, csv_results, multilingual_csv_path, longform_csv_path = load_all_info_from_dataset_hub()
|
| 31 |
|
| 32 |
if not csv_results.exists():
|
| 33 |
raise Exception(f"CSV file {csv_results} does not exist locally")
|
|
|
|
| 57 |
# Multilingual columns (dynamic based on expansion state)
|
| 58 |
MULTILINGUAL_COLS = [c.name for c in fields(MultilingualColumn)]
|
| 59 |
|
| 60 |
+
# Longform columns
|
| 61 |
+
LONGFORM_COLS = [c.name for c in fields(LongformColumn)]
|
| 62 |
+
LONGFORM_TYPES = [c.type for c in fields(LongformColumn)]
|
| 63 |
+
|
| 64 |
def create_multilingual_dataframe():
|
| 65 |
"""Create multilingual dataframe with CoVoST, MLS, and FLEURS benchmark data"""
|
| 66 |
global benchmark_details, expanded_languages
|
|
|
|
| 229 |
# Initialize multilingual dataframe
|
| 230 |
multilingual_df = create_multilingual_dataframe()
|
| 231 |
|
| 232 |
+
def create_longform_dataframe():
|
| 233 |
+
"""Create longform dataframe from CSV data"""
|
| 234 |
+
if longform_csv_path is not None and longform_csv_path.exists():
|
| 235 |
+
longform_raw_df = pd.read_csv(longform_csv_path)
|
| 236 |
+
longform_data = []
|
| 237 |
+
|
| 238 |
+
for _, row_data in longform_raw_df.iterrows():
|
| 239 |
+
model_name = row_data['model_id']
|
| 240 |
+
|
| 241 |
+
# Get values from CSV, similar to other tabs
|
| 242 |
+
earnings21_wer = row_data.get('earnings21', -1)
|
| 243 |
+
earnings22_wer = row_data.get('earnings22', -1)
|
| 244 |
+
tedlium_wer = row_data.get('tedlium', -1)
|
| 245 |
+
rtfx_value = row_data.get('RTFx', 0)
|
| 246 |
+
|
| 247 |
+
# Calculate average WER from available datasets
|
| 248 |
+
available_wers = [w for w in [earnings21_wer, tedlium_wer] if w != -1 and w > 0]
|
| 249 |
+
avg_wer = round(np.mean(available_wers), 2) if available_wers else 0.0
|
| 250 |
+
|
| 251 |
+
row = {
|
| 252 |
+
"Model": make_clickable_model(model_name),
|
| 253 |
+
"Average WER β¬οΈ": avg_wer,
|
| 254 |
+
"RTFx β¬οΈοΈ": rtfx_value if rtfx_value > 0 else "NA",
|
| 255 |
+
"Earnings21": earnings21_wer if earnings21_wer != -1 else "NA",
|
| 256 |
+
"Earnings22": earnings22_wer if earnings22_wer != -1 else "NA",
|
| 257 |
+
"Tedlium": tedlium_wer if tedlium_wer != -1 else "NA",
|
| 258 |
+
}
|
| 259 |
+
longform_data.append(row)
|
| 260 |
+
|
| 261 |
+
longform_df = pd.DataFrame(longform_data)
|
| 262 |
+
|
| 263 |
+
longform_df = longform_df.sort_values(by='Average WER β¬οΈ')
|
| 264 |
+
return longform_df
|
| 265 |
+
|
| 266 |
+
# Initialize longform dataframe
|
| 267 |
+
longform_df = create_longform_dataframe()
|
| 268 |
+
|
| 269 |
|
| 270 |
def request_model(model_text, chbcoco2017):
|
| 271 |
|
|
|
|
| 404 |
outputs=[multilingual_table]
|
| 405 |
)
|
| 406 |
|
| 407 |
+
with gr.TabItem("π Long-form", elem_id="longform-benchmark-tab-table", id=2):
|
| 408 |
+
gr.Markdown(LONGFORM_TAB_TEXT, elem_classes="markdown-text")
|
| 409 |
+
|
| 410 |
+
longform_table = gr.components.Dataframe(
|
| 411 |
+
value=longform_df,
|
| 412 |
+
datatype=LONGFORM_TYPES,
|
| 413 |
+
elem_id="longform-table",
|
| 414 |
+
interactive=False,
|
| 415 |
+
visible=True,
|
| 416 |
+
)
|
| 417 |
+
|
| 418 |
+
with gr.TabItem("π Metrics", elem_id="od-benchmark-tab-table", id=4):
|
| 419 |
gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text")
|
| 420 |
|
| 421 |
+
with gr.TabItem("βοΈβ¨ Request a model here!", elem_id="od-benchmark-tab-table", id=5):
|
| 422 |
with gr.Column():
|
| 423 |
gr.Markdown("# βοΈβ¨ Request results for a new model here!", elem_classes="markdown-text")
|
| 424 |
with gr.Column():
|
|
|
|
| 433 |
[model_name_textbox, chb_coco2017],
|
| 434 |
mdw_submission_result)
|
| 435 |
# add an about section
|
| 436 |
+
with gr.TabItem("π€ About", elem_id="od-benchmark-tab-table", id=6):
|
| 437 |
gr.Markdown("## About", elem_classes="markdown-text")
|
| 438 |
|
| 439 |
gr.Markdown(f"Last updated on **{LAST_UPDATED}**", elem_classes="markdown-text")
|