Spaces:

projecte-aina
/

catalan_tts_arena

Running

App Files Files Community

rjzevallos commited on Nov 25, 2024

Commit

4868e2d

verified ·

1 Parent(s): 798f155

Update app.py

Browse files

Files changed (1) hide show

app.py +90 -197

app.py CHANGED Viewed

@@ -1,204 +1,97 @@
 import gradio as gr
-from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
-import pandas as pd
-from apscheduler.schedulers.background import BackgroundScheduler
-from huggingface_hub import snapshot_download
-from src.about import (
-    CITATION_BUTTON_LABEL,
-    CITATION_BUTTON_TEXT,
-    EVALUATION_QUEUE_TEXT,
-    INTRODUCTION_TEXT,
-    LLM_BENCHMARKS_TEXT,
-    TITLE,
-)
-from src.display.css_html_js import custom_css
-from src.display.utils import (
-    BENCHMARK_COLS,
-    COLS,
-    EVAL_COLS,
-    EVAL_TYPES,
-    AutoEvalColumn,
-    ModelType,
-    fields,
-    WeightType,
-    Precision
 )
-from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
-from src.populate import get_evaluation_queue_df, get_leaderboard_df
-from src.submission.submit import add_new_eval
-def restart_space():
-    API.restart_space(repo_id=REPO_ID)
-### Space initialisation
-try:
-    print(EVAL_REQUESTS_PATH)
-    snapshot_download(
-        repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
-    )
-except Exception:
-    restart_space()
-try:
-    print(EVAL_RESULTS_PATH)
-    snapshot_download(
-        repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
-    )
-except Exception:
-    restart_space()
-LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS)
-(
-    finished_eval_queue_df,
-    running_eval_queue_df,
-    pending_eval_queue_df,
-) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
-def init_leaderboard(dataframe):
-    if dataframe is None or dataframe.empty:
-        raise ValueError("Leaderboard DataFrame is empty or None.")
-    return Leaderboard(
-        value=dataframe,
-        datatype=[c.type for c in fields(AutoEvalColumn)],
-        select_columns=SelectColumns(
-            default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
-            cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
-            label="Select Columns to Display:",
-        ),
-        search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
-        hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
-        filter_columns=[
-            ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
-            ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
-            ColumnFilter(
-                AutoEvalColumn.params.name,
-                type="slider",
-                min=0.01,
-                max=150,
-                label="Select the number of parameters (B)",
-            ),
-            ColumnFilter(
-                AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
-            ),
-        ],
-        bool_checkboxgroup_label="Hide models",
-        interactive=False,
-    )
-demo = gr.Blocks(css=custom_css)
-with demo:
-    gr.HTML(TITLE)
-    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
-            leaderboard = init_leaderboard(LEADERBOARD_DF)
-        with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
-            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
-        with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
-            with gr.Column():
-                with gr.Row():
-                    gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
-                with gr.Column():
-                    with gr.Accordion(
-                        f"✅ Finished Evaluations ({len(finished_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            finished_eval_table = gr.components.Dataframe(
-                                value=finished_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"🔄 Running Evaluation Queue ({len(running_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            running_eval_table = gr.components.Dataframe(
-                                value=running_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-                    with gr.Accordion(
-                        f"⏳ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
-                        open=False,
-                    ):
-                        with gr.Row():
-                            pending_eval_table = gr.components.Dataframe(
-                                value=pending_eval_queue_df,
-                                headers=EVAL_COLS,
-                                datatype=EVAL_TYPES,
-                                row_count=5,
-                            )
-            with gr.Row():
-                gr.Markdown("# ✉️✨ Submit your model here!", elem_classes="markdown-text")
-            with gr.Row():
-                with gr.Column():
-                    model_name_textbox = gr.Textbox(label="Model name")
-                    revision_name_textbox = gr.Textbox(label="Revision commit", placeholder="main")
-                    model_type = gr.Dropdown(
-                        choices=[t.to_str(" : ") for t in ModelType if t != ModelType.Unknown],
-                        label="Model type",
-                        multiselect=False,
-                        value=None,
-                        interactive=True,
-                    )
-                with gr.Column():
-                    precision = gr.Dropdown(
-                        choices=[i.value.name for i in Precision if i != Precision.Unknown],
-                        label="Precision",
-                        multiselect=False,
-                        value="float16",
-                        interactive=True,
-                    )
-                    weight_type = gr.Dropdown(
-                        choices=[i.value.name for i in WeightType],
-                        label="Weights type",
-                        multiselect=False,
-                        value="Original",
-                        interactive=True,
-                    )
-                    base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)")
-            submit_button = gr.Button("Submit Eval")
-            submission_result = gr.Markdown()
-            submit_button.click(
-                add_new_eval,
-                [
-                    model_name_textbox,
-                    base_model_name_textbox,
-                    revision_name_textbox,
-                    precision,
-                    weight_type,
-                    model_type,
-                ],
-                submission_result,
             )
-    with gr.Row():
-        with gr.Accordion("📙 Citation", open=False):
-            citation_button = gr.Textbox(
-                value=CITATION_BUTTON_TEXT,
-                label=CITATION_BUTTON_LABEL,
-                lines=20,
-                elem_id="citation-button",
-                show_copy_button=True,
-            )
-scheduler = BackgroundScheduler()
-scheduler.add_job(restart_space, "interval", seconds=1800)
-scheduler.start()
-demo.queue(default_concurrency_limit=40).launch()

 import gradio as gr
+LAST_UPDATED = "Nov 25th 2024"
+####################################
+# Datos estáticos del leaderboard
+####################################
+leaderboard_data = [
+    {'name': 'StyleTTS 2', 'STOI': 0.998, 'PESQ': 3.921, 'WER': 0.162, 'UTMOS': 3.47},
+    {'name': 'Matxa-TTS', 'STOI': 0.996, 'PESQ': 3.539, 'WER': 0.179, 'UTMOS': 3.50},
+    {'name': 'Matxa-TTS-multiaccent', 'STOI': 0.996, 'PESQ': 3.415, 'WER': 0.242, 'UTMOS': 2.98},
+    {'name': 'StableTTS', 'STOI': 0.997, 'PESQ': 3.643, 'WER': 0.164, 'UTMOS': 2.62},
+]
+# Texto para la pestaña de métricas
+METRICS_TAB_TEXT = """
+## Metrics
+Models in the leaderboard are evaluated using several key metrics:
+* **UTMOS** (UTokyo-SaruLab Mean Opinion Score),
+* **WER** (Word Error Rate),
+* **STOI** (Short-Time Objective Intelligibility),
+* **PESQ** (Perceptual Evaluation of Speech Quality).
+These metrics help evaluate both the accuracy and quality of the model.
+### UTMOS (UTokyo-SaruLab Mean Opinion Score)[[Paper](https://arxiv.org/abs/2204.02152)]
+UTMOS is a MOS prediction system. **A higher UTMOS indicates better quality** of the generated voice.
+### WER (Word Error Rate)
+WER is a common metric for evaluating speech recognition systems. It measures the percentage of words in the generated transcript that differ from the reference (correct) transcript. **A lower WER value indicates higher accuracy**.
+Example:
+| Reference   | the  | cat | sat     | on  | the  | mat |
+|-------------|------|-----|---------|-----|------|-----|
+| Prediction  | the  | cat | **sit** | on  | the  |     |
+| Label       | ✅   | ✅  | S       | ✅  | ✅   | D   |
+The WER calculation is done as follows:
+```
+WER = (S + I + D) / N = (1 + 0 + 1) / 6 = 0.333
+```
+### STOI (Short-Time Objective Intelligibility)[[Paper](https://ieeexplore.ieee.org/abstract/document/5495701?casa_token=PLtqLc8KNAgAAAAA:FOLuZ4dgMYsnGb1dQHgqVOouQzRJ3vA5yqj-sbwf8gs9Q-AIDCLkMZzAgzRrAogwwxULK9zsYeE)]
+STOI measures the intelligibility of the synthesized speech signal compared to the original signal. **A higher STOI indicates better intelligibility**.
+### PESQ (Perceptual Evaluation of Speech Quality)[[Paper](https://ieeexplore.ieee.org/abstract/document/941023?casa_token=jdtHy84_KhQAAAAA:qHN3WbT6cNdufj6OOn_fn0Je0RedMv-WJCmhQ_3CWy4nMTuDvFMF3KstAmKqLx5suQwdPgGByoY)]
+PESQ is a perceptual metric that evaluates the quality of speech in a similar manner to how a human listener would. **A higher PESQ indicates better voice quality**.
+## Benchmark Datasets
+Model performance is evaluated using [our test datasets](https://huggingface.co/spaces/rjzevallos/test_app/blob/main/bsc.txt). These datasets cover a variety of domains and acoustic conditions, ensuring a robust evaluation.
+"""
+####################################
+# Functions (static version)
+####################################
+def get_leaderboard():
+    """
+    Retorna el leaderboard en orden descendente por PESQ y luego por UTMOS.
+    """
+    # Ordenar primero por PESQ (calidad del habla) y luego por UTMOS (calidad percibida)
+    sorted_leaderboard = sorted(leaderboard_data, key=lambda x: (x['UTMOS']), reverse=True)
+    # Asignar el rank basado en el orden por PESQ
+    for rank, model in enumerate(sorted_leaderboard):
+        model['rank'] = rank + 1  # rank es la posición en la lista (1-indexed)
+    return [[model['rank'], model['name'], model['UTMOS'], model['WER'], model['STOI'], model['PESQ']] for model in sorted_leaderboard]
+####################################
+# Interfaz con Gradio
+####################################
+theme = gr.themes.Base(
+    font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
 )
+with gr.Blocks(theme=theme) as demo:
+    gr.Markdown("# 🏆 Leaderboard\nVote to help the community determine the best Catalan TTS models.\n")
+with gr.Blocks(theme=theme) as demo:
+    gr.Markdown("# 🏆 Leaderboard\nVote to help the community determine the best Catalan TTS models.\n")
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
+        with gr.TabItem("🏅 Leaderboard", elem_id="od-benchmark-tab-table", id=0):
+            leaderboard_table = gr.DataFrame(
+                headers=["Rank", "Model", "UTMOS", "WER", "STOI", "PESQ"],
+                datatype=["str", "str", "str", "str", "str", "str"],
+                value=get_leaderboard()  # Carga los datos iniciales de la tabla
             )
+        with gr.TabItem("📈 Metrics", elem_id="od-benchmark-tab-table", id=1):
+            gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text")
+    gr.Markdown(f"Last updated on **{LAST_UPDATED}**", elem_classes="markdown-text")
+# Lanzar la aplicación
+demo.queue(api_open=False, default_concurrency_limit=40).launch(show_api=False)