IPA-Transcription-EN

Running

File size: 6,800 Bytes


import gradio as gr
import pandas as pd
import json
from pathlib import Path
from datetime import datetime, timezone

LAST_UPDATED = "Dec 4th 2024"
QUEUE_DIR = Path("/Users/arunasrivastava/Koel/IPA-Leaderboard/IPA-Transcription-EN-queue/queue")
APP_DIR = Path("./")

# Modified column names for phonemic transcription metrics
column_names = {
    "MODEL": "Model",
    "SUBMISSION_NAME": "Submission Name",
    "AVG_PER": "Average PER ⬇️",    
    "AVG_PFER": "Average PFER ⬇️",
    "SUBSET": "Dataset Subset",
    "GITHUB_URL": "GitHub",
    "DATE": "Submission Date"
}

def load_leaderboard_data():
    leaderboard_path = QUEUE_DIR / "leaderboard.json"
    if not leaderboard_path.exists():
        print(f"Warning: Leaderboard file not found at {leaderboard_path}")
        return pd.DataFrame()
        
    try:
        with open(leaderboard_path, 'r') as f:
            data = json.load(f)
        df = pd.DataFrame(data)
        return df
    except Exception as e:
        print(f"Error loading leaderboard data: {e}")
        return pd.DataFrame()

def format_leaderboard_df(df):
    if df.empty:
        return df
        
    # Rename columns to display names
    display_df = df.rename(columns={
        "model": "MODEL",
        "submission_name": "SUBMISSION_NAME",
        "average_per": "AVG_PER",
        "average_pfer": "AVG_PFER",
        "subset": "SUBSET",
        "github_url": "GITHUB_URL",
        "submission_date": "DATE"
    })
    
    # Format numeric columns
    display_df["AVG_PER"] = display_df["AVG_PER"].apply(lambda x: f"{x:.4f}")
    display_df["AVG_PFER"] = display_df["AVG_PFER"].apply(lambda x: f"{x:.4f}")
    
    # Make GitHub URLs clickable
    display_df["GITHUB_URL"] = display_df["GITHUB_URL"].apply(
        lambda x: f'<a href="{x}" target="_blank">Repository</a>' if x else "N/A"
    )
    
    # Sort by PER (ascending)
    display_df.sort_values(by="AVG_PER", inplace=True)
    
    return display_df

def request_evaluation(model_name, submission_name, github_url, subset="test", max_samples=5):
    if not model_name or not submission_name:
        return gr.Markdown("⚠️ Please provide both model name and submission name.")
        
    request_data = {
        "transcription_model": model_name,
        "subset": subset,
        "max_samples": max_samples,
        "submission_name": submission_name,
        "github_url": github_url or ""
    }
    
    try:
        # Ensure queue directory exists
        QUEUE_DIR.mkdir(parents=True, exist_ok=True)
        
        # Generate unique timestamp for request file
        timestamp = datetime.now(timezone.utc).isoformat().replace(":", "-")
        request_file = QUEUE_DIR / f"request_{timestamp}.json"
        
        with open(request_file, 'w') as f:
            json.dump(request_data, f, indent=2)
            
        return gr.Markdown("✅ Evaluation request submitted successfully! Your results will appear on the leaderboard once processing is complete.")
        
    except Exception as e:
        return gr.Markdown(f"❌ Error submitting request: {str(e)}")

def load_results_for_model(model_name):
    results_path = QUEUE_DIR / "results.json"
    try:
        with open(results_path, 'r') as f:
            results = json.load(f)
            
        # Filter results for the specific model
        model_results = [r for r in results if r["model"] == model_name]
        if not model_results:
            return None
            
        # Get the most recent result
        latest_result = max(model_results, key=lambda x: x["timestamp"])
        return latest_result
    except Exception as e:
        print(f"Error loading results: {e}")
        return None

# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# 🎯 Phonemic Transcription Model Evaluation Leaderboard")
    gr.Markdown("""
        Compare the performance of different phonemic transcription models on speech-to-IPA transcription tasks.
        
        **Metrics:**
        - **PER (Phoneme Error Rate)**: Measures the edit distance between predicted and ground truth phonemes (lower is better)
        - **PFER (Phoneme Frame Error Rate)**: Measures frame-level phoneme prediction accuracy (lower is better)
    """)
    
    with gr.Tabs() as tabs:
        with gr.TabItem("🏆 Leaderboard"):
            leaderboard_df = load_leaderboard_data()
            formatted_df = format_leaderboard_df(leaderboard_df)
            
            leaderboard_table = gr.DataFrame(
                value=formatted_df,
                interactive=False,
                headers=list(column_names.values())
            )
            
            refresh_btn = gr.Button("🔄 Refresh Leaderboard")
            refresh_btn.click(
                lambda: gr.DataFrame(value=format_leaderboard_df(load_leaderboard_data()))
            )
            
        with gr.TabItem("📝 Submit Model"):
            with gr.Column():
                model_input = gr.Textbox(
                    label="Model Name",
                    placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft",
                    info="Enter the Hugging Face model ID"
                )
                submission_name = gr.Textbox(
                    label="Submission Name",
                    placeholder="My Awesome Model v1.0",
                    info="Give your submission a descriptive name"
                )
                github_url = gr.Textbox(
                    label="GitHub Repository URL (optional)",
                    placeholder="https://github.com/username/repo",
                    info="Link to your model's code repository"
                )
                
                submit_btn = gr.Button("🚀 Submit for Evaluation")
                result_text = gr.Markdown()
                
                submit_btn.click(
                    request_evaluation,
                    inputs=[model_input, submission_name, github_url],
                    outputs=result_text
                )
        
        with gr.TabItem("ℹ️ Detailed Results"):
            model_selector = gr.Textbox(
                label="Enter Model Name to View Details",
                placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft"
            )
            view_btn = gr.Button("View Results")
            results_json = gr.JSON(label="Detailed Results")
            
            def show_model_results(model_name):
                results = load_results_for_model(model_name)
                return results or {"error": "No results found for this model"}
            
            view_btn.click(
                show_model_results,
                inputs=[model_selector],
                outputs=[results_json]
            )
    
    gr.Markdown(f"Last updated: {LAST_UPDATED}")

demo.launch()