Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
import json | |
from pathlib import Path | |
from datetime import datetime, timezone | |
import uuid | |
LAST_UPDATED = "Dec 4th 2024" | |
QUEUE_DIR = Path("/Users/arunasrivastava/Koel/IPA-Leaderboard/IPA-Transcription-EN-queue/queue") | |
APP_DIR = Path("./") | |
# Modified column names for phonemic transcription metrics | |
column_names = { | |
"MODEL": "Model", | |
"AVG_PER": "Average PER β¬οΈ", | |
"AVG_PWED": "Average PWED β¬οΈ", | |
"GITHUB_URL": "GitHub", | |
"DATE": "Submission Date" | |
} | |
def load_json_file(file_path: Path, default=None): | |
"""Safely load a JSON file or return default if file doesn't exist""" | |
if default is None: | |
default = [] | |
if not file_path.exists(): | |
return default | |
try: | |
with open(file_path, 'r') as f: | |
return json.load(f) | |
except json.JSONDecodeError: | |
return default | |
def save_json_file(file_path: Path, data): | |
"""Safely save data to a JSON file""" | |
file_path.parent.mkdir(parents=True, exist_ok=True) | |
with open(file_path, 'w') as f: | |
json.dump(data, f, indent=2, ensure_ascii=False) | |
def load_leaderboard_data(): | |
"""Load and parse leaderboard data""" | |
leaderboard_path = QUEUE_DIR / "leaderboard.json" | |
data = load_json_file(leaderboard_path) | |
return pd.DataFrame(data) if data else pd.DataFrame() | |
def format_leaderboard_df(df): | |
"""Format leaderboard dataframe for display""" | |
if df.empty: | |
return df | |
# Select and rename only the columns we want to display | |
display_df = pd.DataFrame({ | |
"MODEL": df["model"], | |
"AVG_PER": df["average_per"], | |
"AVG_PWED": df["average_pwed"], | |
"GITHUB_URL": df["github_url"], | |
"DATE": pd.to_datetime(df["submission_date"]).dt.strftime("%Y-%m-%d") | |
}) | |
# Format numeric columns | |
display_df["AVG_PER"] = display_df["AVG_PER"].apply(lambda x: f"{x:.4f}") | |
display_df["AVG_PWED"] = display_df["AVG_PWED"].apply(lambda x: f"{x:.4f}") | |
# Make GitHub URLs clickable | |
display_df["GITHUB_URL"] = display_df["GITHUB_URL"].apply( | |
lambda x: f'<a href="{x}" target="_blank">Repository</a>' if x else "N/A" | |
) | |
# Sort by PER (ascending) | |
display_df.sort_values(by="AVG_PER", inplace=True) | |
return display_df | |
def request_evaluation(model_name, submission_name, github_url, subset="test", max_samples=None): | |
"""Submit new evaluation request""" | |
if not model_name or not submission_name: | |
return gr.Markdown("β οΈ Please provide both model name and submission name.") | |
try: | |
# Ensure queue directory exists | |
QUEUE_DIR.mkdir(parents=True, exist_ok=True) | |
# Load existing tasks | |
tasks_file = QUEUE_DIR / "tasks.json" | |
tasks = load_json_file(tasks_file) | |
# Create new task | |
new_task = { | |
"id": str(uuid.uuid4()), | |
"transcription_model": model_name, | |
"subset": subset, | |
"max_samples": max_samples, | |
"submission_name": submission_name, | |
"github_url": github_url or "", | |
"status": "queued", | |
"submitted_at": datetime.now(timezone.utc).isoformat() | |
} | |
# Add new task to existing tasks | |
tasks.append(new_task) | |
# Save updated tasks | |
save_json_file(tasks_file, tasks) | |
return gr.Markdown("β Evaluation request submitted successfully! Your results will appear on the leaderboard once processing is complete.") | |
except Exception as e: | |
return gr.Markdown(f"β Error submitting request: {str(e)}") | |
def load_results_for_model(model_name): | |
"""Load detailed results for a specific model""" | |
results_path = QUEUE_DIR / "results.json" | |
results = load_json_file(results_path) | |
# Filter results for the specific model | |
model_results = [r for r in results if r["model"] == model_name] | |
if not model_results: | |
return None | |
# Get the most recent result | |
latest_result = max(model_results, key=lambda x: x["timestamp"]) | |
return latest_result | |
def create_html_table(df): | |
"""Create HTML table with dark theme styling""" | |
if df.empty: | |
return "<p>No data available</p>" | |
html = """ | |
<style> | |
table { | |
width: 100%; | |
border-collapse: collapse; | |
color: white; | |
background-color: #1a1a1a; | |
} | |
th, td { | |
padding: 8px; | |
text-align: left; | |
border: 1px solid #333; | |
} | |
th { | |
background-color: #2a2a2a; | |
color: white; | |
} | |
tr:nth-child(even) { | |
background-color: #252525; | |
} | |
tr:hover { | |
background-color: #303030; | |
} | |
a { | |
color: #6ea8fe; | |
text-decoration: none; | |
} | |
a:hover { | |
text-decoration: underline; | |
} | |
</style> | |
<table> | |
<thead> | |
<tr> | |
""" | |
# Add headers | |
for header in column_names.values(): | |
html += f"<th>{header}</th>" | |
html += "</tr></thead><tbody>" | |
# Add rows | |
for _, row in df.iterrows(): | |
html += "<tr>" | |
for col in df.columns: | |
if col == "GITHUB_URL": | |
html += f"<td>{row[col]}</td>" # URL is already formatted as HTML | |
else: | |
html += f"<td>{row[col]}</td>" | |
html += "</tr>" | |
html += "</tbody></table>" | |
return html | |
# Create Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# π― Phonemic Transcription Model Evaluation Leaderboard") | |
gr.Markdown(""" | |
Compare the performance of different phonemic transcription models on speech-to-IPA transcription tasks for English. | |
**Metrics:** | |
- **PER (Phoneme Error Rate)**: Measures the edit distance between predicted and ground truth phonemes (lower is better) | |
- **PWED (Phoneme Weighted Edit Distance)**: Measures a weighted difference in phonemes using phonemic features (lower is better) | |
**Datasets:** | |
- **[TIMIT](https://www.kaggle.com/datasets/mfekadu/darpa-timit-acousticphonetic-continuous-speech)**: A phonemic transcription dataset for English speech recognition | |
To learn more about the evaluation metrics, check out our blog post [here](https://huggingface.co/spaces/evaluate-metric/wer). | |
""") | |
with gr.Tabs() as tabs: | |
with gr.TabItem("π Leaderboard"): | |
leaderboard_df = load_leaderboard_data() | |
formatted_df = format_leaderboard_df(leaderboard_df) | |
leaderboard_table = gr.HTML( | |
value=create_html_table(formatted_df) | |
) | |
refresh_btn = gr.Button("π Refresh Leaderboard") | |
refresh_btn.click( | |
lambda: gr.HTML(value=create_html_table(format_leaderboard_df(load_leaderboard_data()))) | |
) | |
with gr.TabItem("π Submit Model"): | |
with gr.Column(): | |
model_input = gr.Textbox( | |
label="Model Name", | |
placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft", | |
info="Enter the Hugging Face model ID" | |
) | |
submission_name = gr.Textbox( | |
label="Submission Name", | |
placeholder="My Awesome Model v1.0", | |
info="Give your submission a descriptive name" | |
) | |
github_url = gr.Textbox( | |
label="GitHub Repository URL (optional)", | |
placeholder="https://github.com/username/repo", | |
info="Link to your model's code repository" | |
) | |
submit_btn = gr.Button("π Submit for Evaluation") | |
result_text = gr.Markdown() | |
def submit_and_clear(model_name, submission_name, github_url): | |
result = request_evaluation(model_name, submission_name, github_url) | |
# If submission was successful, clear the form | |
if "β " in result.value: | |
return { | |
model_input: "", | |
submission_name: "", | |
github_url: "", | |
result_text: result | |
} | |
# If there was an error, keep the form data and show error | |
return { | |
model_input: model_name, | |
submission_name: submission_name, | |
github_url: github_url, | |
result_text: result | |
} | |
submit_btn.click( | |
submit_and_clear, | |
inputs=[model_input, submission_name, github_url], | |
outputs=[model_input, submission_name, github_url, result_text] | |
) | |
with gr.TabItem("βΉοΈ Detailed Results"): | |
model_selector = gr.Textbox( | |
label="Enter Model Name to View Details", | |
placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft" | |
) | |
view_btn = gr.Button("View Results") | |
results_json = gr.JSON(label="Detailed Results") | |
def show_model_results(model_name): | |
results = load_results_for_model(model_name) | |
return results or {"error": "No results found for this model"} | |
view_btn.click( | |
show_model_results, | |
inputs=[model_selector], | |
outputs=[results_json] | |
) | |
gr.Markdown(f"Last updated: {LAST_UPDATED}") | |
if __name__ == "__main__": | |
demo.launch() |