Spaces:
Running
Running
import gradio as gr | |
import pandas as pd | |
import json | |
from pathlib import Path | |
from datetime import datetime, timezone | |
LAST_UPDATED = "Dec 4th 2024" | |
QUEUE_DIR = Path("/Users/arunasrivastava/Koel/IPA-Leaderboard/IPA-Transcription-EN-queue/queue") | |
APP_DIR = Path("./") | |
# Modified column names for phonemic transcription metrics | |
column_names = { | |
"MODEL": "Model", | |
"SUBMISSION_NAME": "Submission Name", | |
"AVG_PER": "Average PER β¬οΈ", | |
"AVG_PFER": "Average PFER β¬οΈ", | |
"SUBSET": "Dataset Subset", | |
"GITHUB_URL": "GitHub", | |
"DATE": "Submission Date" | |
} | |
def load_leaderboard_data(): | |
leaderboard_path = QUEUE_DIR / "leaderboard.json" | |
if not leaderboard_path.exists(): | |
print(f"Warning: Leaderboard file not found at {leaderboard_path}") | |
return pd.DataFrame() | |
try: | |
with open(leaderboard_path, 'r') as f: | |
data = json.load(f) | |
df = pd.DataFrame(data) | |
return df | |
except Exception as e: | |
print(f"Error loading leaderboard data: {e}") | |
return pd.DataFrame() | |
def format_leaderboard_df(df): | |
if df.empty: | |
return df | |
# Rename columns to display names | |
display_df = df.rename(columns={ | |
"model": "MODEL", | |
"submission_name": "SUBMISSION_NAME", | |
"average_per": "AVG_PER", | |
"average_pfer": "AVG_PFER", | |
"subset": "SUBSET", | |
"github_url": "GITHUB_URL", | |
"submission_date": "DATE" | |
}) | |
# Format numeric columns | |
display_df["AVG_PER"] = display_df["AVG_PER"].apply(lambda x: f"{x:.4f}") | |
display_df["AVG_PFER"] = display_df["AVG_PFER"].apply(lambda x: f"{x:.4f}") | |
# Make GitHub URLs clickable | |
display_df["GITHUB_URL"] = display_df["GITHUB_URL"].apply( | |
lambda x: f'<a href="{x}" target="_blank">Repository</a>' if x else "N/A" | |
) | |
# Sort by PER (ascending) | |
display_df.sort_values(by="AVG_PER", inplace=True) | |
return display_df | |
def request_evaluation(model_name, submission_name, github_url, subset="test", max_samples=5): | |
if not model_name or not submission_name: | |
return gr.Markdown("β οΈ Please provide both model name and submission name.") | |
request_data = { | |
"transcription_model": model_name, | |
"subset": subset, | |
"max_samples": max_samples, | |
"submission_name": submission_name, | |
"github_url": github_url or "" | |
} | |
try: | |
# Ensure queue directory exists | |
QUEUE_DIR.mkdir(parents=True, exist_ok=True) | |
# Generate unique timestamp for request file | |
timestamp = datetime.now(timezone.utc).isoformat().replace(":", "-") | |
request_file = QUEUE_DIR / f"request_{timestamp}.json" | |
with open(request_file, 'w') as f: | |
json.dump(request_data, f, indent=2) | |
return gr.Markdown("β Evaluation request submitted successfully! Your results will appear on the leaderboard once processing is complete.") | |
except Exception as e: | |
return gr.Markdown(f"β Error submitting request: {str(e)}") | |
def load_results_for_model(model_name): | |
results_path = QUEUE_DIR / "results.json" | |
try: | |
with open(results_path, 'r') as f: | |
results = json.load(f) | |
# Filter results for the specific model | |
model_results = [r for r in results if r["model"] == model_name] | |
if not model_results: | |
return None | |
# Get the most recent result | |
latest_result = max(model_results, key=lambda x: x["timestamp"]) | |
return latest_result | |
except Exception as e: | |
print(f"Error loading results: {e}") | |
return None | |
# Create Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# π― Phonemic Transcription Model Evaluation Leaderboard") | |
gr.Markdown(""" | |
Compare the performance of different phonemic transcription models on speech-to-IPA transcription tasks. | |
**Metrics:** | |
- **PER (Phoneme Error Rate)**: Measures the edit distance between predicted and ground truth phonemes (lower is better) | |
- **PFER (Phoneme Frame Error Rate)**: Measures frame-level phoneme prediction accuracy (lower is better) | |
""") | |
with gr.Tabs() as tabs: | |
with gr.TabItem("π Leaderboard"): | |
leaderboard_df = load_leaderboard_data() | |
formatted_df = format_leaderboard_df(leaderboard_df) | |
leaderboard_table = gr.DataFrame( | |
value=formatted_df, | |
interactive=False, | |
headers=list(column_names.values()) | |
) | |
refresh_btn = gr.Button("π Refresh Leaderboard") | |
refresh_btn.click( | |
lambda: gr.DataFrame(value=format_leaderboard_df(load_leaderboard_data())) | |
) | |
with gr.TabItem("π Submit Model"): | |
with gr.Column(): | |
model_input = gr.Textbox( | |
label="Model Name", | |
placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft", | |
info="Enter the Hugging Face model ID" | |
) | |
submission_name = gr.Textbox( | |
label="Submission Name", | |
placeholder="My Awesome Model v1.0", | |
info="Give your submission a descriptive name" | |
) | |
github_url = gr.Textbox( | |
label="GitHub Repository URL (optional)", | |
placeholder="https://github.com/username/repo", | |
info="Link to your model's code repository" | |
) | |
submit_btn = gr.Button("π Submit for Evaluation") | |
result_text = gr.Markdown() | |
submit_btn.click( | |
request_evaluation, | |
inputs=[model_input, submission_name, github_url], | |
outputs=result_text | |
) | |
with gr.TabItem("βΉοΈ Detailed Results"): | |
model_selector = gr.Textbox( | |
label="Enter Model Name to View Details", | |
placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft" | |
) | |
view_btn = gr.Button("View Results") | |
results_json = gr.JSON(label="Detailed Results") | |
def show_model_results(model_name): | |
results = load_results_for_model(model_name) | |
return results or {"error": "No results found for this model"} | |
view_btn.click( | |
show_model_results, | |
inputs=[model_selector], | |
outputs=[results_json] | |
) | |
gr.Markdown(f"Last updated: {LAST_UPDATED}") | |
demo.launch() |