import gradio as gr import pandas as pd import json from pathlib import Path from datetime import datetime, timezone LAST_UPDATED = "Dec 4th 2024" QUEUE_DIR = Path("/Users/arunasrivastava/Koel/IPA-Leaderboard/IPA-Transcription-EN-queue/queue") APP_DIR = Path("./") # Modified column names for phonemic transcription metrics column_names = { "MODEL": "Model", "SUBMISSION_NAME": "Submission Name", "AVG_PER": "Average PER ⬇️", "AVG_PFER": "Average PFER ⬇️", "SUBSET": "Dataset Subset", "GITHUB_URL": "GitHub", "DATE": "Submission Date" } def load_leaderboard_data(): leaderboard_path = QUEUE_DIR / "leaderboard.json" if not leaderboard_path.exists(): print(f"Warning: Leaderboard file not found at {leaderboard_path}") return pd.DataFrame() try: with open(leaderboard_path, 'r') as f: data = json.load(f) df = pd.DataFrame(data) return df except Exception as e: print(f"Error loading leaderboard data: {e}") return pd.DataFrame() def format_leaderboard_df(df): if df.empty: return df # Rename columns to display names display_df = df.rename(columns={ "model": "MODEL", "submission_name": "SUBMISSION_NAME", "average_per": "AVG_PER", "average_pfer": "AVG_PFER", "subset": "SUBSET", "github_url": "GITHUB_URL", "submission_date": "DATE" }) # Format numeric columns display_df["AVG_PER"] = display_df["AVG_PER"].apply(lambda x: f"{x:.4f}") display_df["AVG_PFER"] = display_df["AVG_PFER"].apply(lambda x: f"{x:.4f}") # Make GitHub URLs clickable display_df["GITHUB_URL"] = display_df["GITHUB_URL"].apply( lambda x: f'Repository' if x else "N/A" ) # Sort by PER (ascending) display_df.sort_values(by="AVG_PER", inplace=True) return display_df def request_evaluation(model_name, submission_name, github_url, subset="test", max_samples=5): if not model_name or not submission_name: return gr.Markdown("⚠️ Please provide both model name and submission name.") request_data = { "transcription_model": model_name, "subset": subset, "max_samples": max_samples, "submission_name": submission_name, "github_url": github_url or "" } try: # Ensure queue directory exists QUEUE_DIR.mkdir(parents=True, exist_ok=True) # Generate unique timestamp for request file timestamp = datetime.now(timezone.utc).isoformat().replace(":", "-") request_file = QUEUE_DIR / f"request_{timestamp}.json" with open(request_file, 'w') as f: json.dump(request_data, f, indent=2) return gr.Markdown("✅ Evaluation request submitted successfully! Your results will appear on the leaderboard once processing is complete.") except Exception as e: return gr.Markdown(f"❌ Error submitting request: {str(e)}") def load_results_for_model(model_name): results_path = QUEUE_DIR / "results.json" try: with open(results_path, 'r') as f: results = json.load(f) # Filter results for the specific model model_results = [r for r in results if r["model"] == model_name] if not model_results: return None # Get the most recent result latest_result = max(model_results, key=lambda x: x["timestamp"]) return latest_result except Exception as e: print(f"Error loading results: {e}") return None # Create Gradio interface with gr.Blocks() as demo: gr.Markdown("# 🎯 Phonemic Transcription Model Evaluation Leaderboard") gr.Markdown(""" Compare the performance of different phonemic transcription models on speech-to-IPA transcription tasks. **Metrics:** - **PER (Phoneme Error Rate)**: Measures the edit distance between predicted and ground truth phonemes (lower is better) - **PFER (Phoneme Frame Error Rate)**: Measures frame-level phoneme prediction accuracy (lower is better) """) with gr.Tabs() as tabs: with gr.TabItem("🏆 Leaderboard"): leaderboard_df = load_leaderboard_data() formatted_df = format_leaderboard_df(leaderboard_df) leaderboard_table = gr.DataFrame( value=formatted_df, interactive=False, headers=list(column_names.values()) ) refresh_btn = gr.Button("🔄 Refresh Leaderboard") refresh_btn.click( lambda: gr.DataFrame(value=format_leaderboard_df(load_leaderboard_data())) ) with gr.TabItem("📝 Submit Model"): with gr.Column(): model_input = gr.Textbox( label="Model Name", placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft", info="Enter the Hugging Face model ID" ) submission_name = gr.Textbox( label="Submission Name", placeholder="My Awesome Model v1.0", info="Give your submission a descriptive name" ) github_url = gr.Textbox( label="GitHub Repository URL (optional)", placeholder="https://github.com/username/repo", info="Link to your model's code repository" ) submit_btn = gr.Button("🚀 Submit for Evaluation") result_text = gr.Markdown() submit_btn.click( request_evaluation, inputs=[model_input, submission_name, github_url], outputs=result_text ) with gr.TabItem("ℹ️ Detailed Results"): model_selector = gr.Textbox( label="Enter Model Name to View Details", placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft" ) view_btn = gr.Button("View Results") results_json = gr.JSON(label="Detailed Results") def show_model_results(model_name): results = load_results_for_model(model_name) return results or {"error": "No results found for this model"} view_btn.click( show_model_results, inputs=[model_selector], outputs=[results_json] ) gr.Markdown(f"Last updated: {LAST_UPDATED}") demo.launch()