import gradio as gr import pandas as pd import json from pathlib import Path from datetime import datetime, timezone import uuid LAST_UPDATED = "Dec 4th 2024" QUEUE_DIR = Path("/Users/arunasrivastava/Koel/IPA-Leaderboard/IPA-Transcription-EN-queue/queue") APP_DIR = Path("./") # Modified column names for phonemic transcription metrics column_names = { "MODEL": "Model", "AVG_PER": "Average PER ⬇️", "AVG_PWED": "Average PWED ⬇️", "GITHUB_URL": "GitHub", "DATE": "Submission Date" } def load_json_file(file_path: Path, default=None): """Safely load a JSON file or return default if file doesn't exist""" if default is None: default = [] if not file_path.exists(): return default try: with open(file_path, 'r') as f: return json.load(f) except json.JSONDecodeError: return default def save_json_file(file_path: Path, data): """Safely save data to a JSON file""" file_path.parent.mkdir(parents=True, exist_ok=True) with open(file_path, 'w') as f: json.dump(data, f, indent=2, ensure_ascii=False) def load_leaderboard_data(): """Load and parse leaderboard data""" leaderboard_path = QUEUE_DIR / "leaderboard.json" data = load_json_file(leaderboard_path) return pd.DataFrame(data) if data else pd.DataFrame() def format_leaderboard_df(df): """Format leaderboard dataframe for display""" if df.empty: return df # Select and rename only the columns we want to display display_df = pd.DataFrame({ "MODEL": df["model"], "AVG_PER": df["average_per"], "AVG_PWED": df["average_pwed"], "GITHUB_URL": df["github_url"], "DATE": pd.to_datetime(df["submission_date"]).dt.strftime("%Y-%m-%d") }) # Format numeric columns display_df["AVG_PER"] = display_df["AVG_PER"].apply(lambda x: f"{x:.4f}") display_df["AVG_PWED"] = display_df["AVG_PWED"].apply(lambda x: f"{x:.4f}") # Make GitHub URLs clickable display_df["GITHUB_URL"] = display_df["GITHUB_URL"].apply( lambda x: f'Repository' if x else "N/A" ) # Sort by PER (ascending) display_df.sort_values(by="AVG_PER", inplace=True) return display_df def request_evaluation(model_name, submission_name, github_url, subset="test", max_samples=None): """Submit new evaluation request""" if not model_name or not submission_name: return gr.Markdown("⚠️ Please provide both model name and submission name.") try: # Ensure queue directory exists QUEUE_DIR.mkdir(parents=True, exist_ok=True) # Load existing tasks tasks_file = QUEUE_DIR / "tasks.json" tasks = load_json_file(tasks_file) # Create new task new_task = { "id": str(uuid.uuid4()), "transcription_model": model_name, "subset": subset, "max_samples": max_samples, "submission_name": submission_name, "github_url": github_url or "", "status": "queued", "submitted_at": datetime.now(timezone.utc).isoformat() } # Add new task to existing tasks tasks.append(new_task) # Save updated tasks save_json_file(tasks_file, tasks) return gr.Markdown("✅ Evaluation request submitted successfully! Your results will appear on the leaderboard once processing is complete.") except Exception as e: return gr.Markdown(f"❌ Error submitting request: {str(e)}") def load_results_for_model(model_name): """Load detailed results for a specific model""" results_path = QUEUE_DIR / "results.json" results = load_json_file(results_path) # Filter results for the specific model model_results = [r for r in results if r["model"] == model_name] if not model_results: return None # Get the most recent result latest_result = max(model_results, key=lambda x: x["timestamp"]) return latest_result def create_html_table(df): """Create HTML table with dark theme styling""" if df.empty: return "

No data available

" html = """ """ # Add headers for header in column_names.values(): html += f"" html += "" # Add rows for _, row in df.iterrows(): html += "" for col in df.columns: if col == "GITHUB_URL": html += f"" # URL is already formatted as HTML else: html += f"" html += "" html += "
{header}
{row[col]}{row[col]}
" return html # Create Gradio interface with gr.Blocks() as demo: gr.Markdown("# 🎯 Phonemic Transcription Model Evaluation Leaderboard") gr.Markdown(""" Compare the performance of different phonemic transcription models on speech-to-IPA transcription tasks for English. **Metrics:** - **PER (Phoneme Error Rate)**: Measures the edit distance between predicted and ground truth phonemes (lower is better) - **PWED (Phoneme Weighted Edit Distance)**: Measures a weighted difference in phonemes using phonemic features (lower is better) **Datasets:** - **[TIMIT](https://www.kaggle.com/datasets/mfekadu/darpa-timit-acousticphonetic-continuous-speech)**: A phonemic transcription dataset for English speech recognition To learn more about the evaluation metrics, check out our blog post [here](https://huggingface.co/spaces/evaluate-metric/wer). """) with gr.Tabs() as tabs: with gr.TabItem("🏆 Leaderboard"): leaderboard_df = load_leaderboard_data() formatted_df = format_leaderboard_df(leaderboard_df) leaderboard_table = gr.HTML( value=create_html_table(formatted_df) ) refresh_btn = gr.Button("🔄 Refresh Leaderboard") refresh_btn.click( lambda: gr.HTML(value=create_html_table(format_leaderboard_df(load_leaderboard_data()))) ) with gr.TabItem("📝 Submit Model"): with gr.Column(): model_input = gr.Textbox( label="Model Name", placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft", info="Enter the Hugging Face model ID" ) submission_name = gr.Textbox( label="Submission Name", placeholder="My Awesome Model v1.0", info="Give your submission a descriptive name" ) github_url = gr.Textbox( label="GitHub Repository URL (optional)", placeholder="https://github.com/username/repo", info="Link to your model's code repository" ) submit_btn = gr.Button("🚀 Submit for Evaluation") result_text = gr.Markdown() def submit_and_clear(model_name, submission_name, github_url): result = request_evaluation(model_name, submission_name, github_url) # If submission was successful, clear the form if "✅" in result.value: return { model_input: "", submission_name: "", github_url: "", result_text: result } # If there was an error, keep the form data and show error return { model_input: model_name, submission_name: submission_name, github_url: github_url, result_text: result } submit_btn.click( submit_and_clear, inputs=[model_input, submission_name, github_url], outputs=[model_input, submission_name, github_url, result_text] ) with gr.TabItem("ℹ️ Detailed Results"): model_selector = gr.Textbox( label="Enter Model Name to View Details", placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft" ) view_btn = gr.Button("View Results") results_json = gr.JSON(label="Detailed Results") def show_model_results(model_name): results = load_results_for_model(model_name) return results or {"error": "No results found for this model"} view_btn.click( show_model_results, inputs=[model_selector], outputs=[results_json] ) gr.Markdown(f"Last updated: {LAST_UPDATED}") if __name__ == "__main__": demo.launch()