IPA-Transcription-EN

Running

App Files Files Community

arunasrivastava commited on Dec 5, 2024

Commit

b2730cf

1 Parent(s): a2c34b1

json not working

Browse files

Files changed (1) hide show

app.py +159 -65

app.py CHANGED Viewed

@@ -1,9 +1,9 @@
 import gradio as gr
 import pandas as pd
 import json
 from pathlib import Path
 from datetime import datetime, timezone
 LAST_UPDATED = "Dec 4th 2024"
 QUEUE_DIR = Path("/Users/arunasrivastava/Koel/IPA-Leaderboard/IPA-Transcription-EN-queue/queue")
@@ -12,47 +12,55 @@ APP_DIR = Path("./")
 # Modified column names for phonemic transcription metrics
 column_names = {
     "MODEL": "Model",
-    "SUBMISSION_NAME": "Submission Name",
     "AVG_PER": "Average PER ⬇️",
-    "AVG_PFER": "Average PFER ⬇️",
-    "SUBSET": "Dataset Subset",
     "GITHUB_URL": "GitHub",
     "DATE": "Submission Date"
 }
 def load_leaderboard_data():
     leaderboard_path = QUEUE_DIR / "leaderboard.json"
-    if not leaderboard_path.exists():
-        print(f"Warning: Leaderboard file not found at {leaderboard_path}")
-        return pd.DataFrame()
-    try:
-        with open(leaderboard_path, 'r') as f:
-            data = json.load(f)
-        df = pd.DataFrame(data)
-        return df
-    except Exception as e:
-        print(f"Error loading leaderboard data: {e}")
-        return pd.DataFrame()
 def format_leaderboard_df(df):
     if df.empty:
         return df
-    # Rename columns to display names
-    display_df = df.rename(columns={
-        "model": "MODEL",
-        "submission_name": "SUBMISSION_NAME",
-        "average_per": "AVG_PER",
-        "average_pfer": "AVG_PFER",
-        "subset": "SUBSET",
-        "github_url": "GITHUB_URL",
-        "submission_date": "DATE"
     })
     # Format numeric columns
     display_df["AVG_PER"] = display_df["AVG_PER"].apply(lambda x: f"{x:.4f}")
-    display_df["AVG_PFER"] = display_df["AVG_PFER"].apply(lambda x: f"{x:.4f}")
     # Make GitHub URLs clickable
     display_df["GITHUB_URL"] = display_df["GITHUB_URL"].apply(
@@ -64,61 +72,130 @@ def format_leaderboard_df(df):
     return display_df
-def request_evaluation(model_name, submission_name, github_url, subset="test", max_samples=5):
     if not model_name or not submission_name:
         return gr.Markdown("⚠️ Please provide both model name and submission name.")
-    request_data = {
-        "transcription_model": model_name,
-        "subset": subset,
-        "max_samples": max_samples,
-        "submission_name": submission_name,
-        "github_url": github_url or ""
-    }
     try:
         # Ensure queue directory exists
         QUEUE_DIR.mkdir(parents=True, exist_ok=True)
-        # Generate unique timestamp for request file
-        timestamp = datetime.now(timezone.utc).isoformat().replace(":", "-")
-        request_file = QUEUE_DIR / f"request_{timestamp}.json"
-        with open(request_file, 'w') as f:
-            json.dump(request_data, f, indent=2)
         return gr.Markdown("✅ Evaluation request submitted successfully! Your results will appear on the leaderboard once processing is complete.")
     except Exception as e:
         return gr.Markdown(f"❌ Error submitting request: {str(e)}")
 def load_results_for_model(model_name):
     results_path = QUEUE_DIR / "results.json"
-    try:
-        with open(results_path, 'r') as f:
-            results = json.load(f)
-        # Filter results for the specific model
-        model_results = [r for r in results if r["model"] == model_name]
-        if not model_results:
-            return None
-        # Get the most recent result
-        latest_result = max(model_results, key=lambda x: x["timestamp"])
-        return latest_result
-    except Exception as e:
-        print(f"Error loading results: {e}")
         return None
 # Create Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# 🎯 Phonemic Transcription Model Evaluation Leaderboard")
     gr.Markdown("""
-        Compare the performance of different phonemic transcription models on speech-to-IPA transcription tasks.
         **Metrics:**
         - **PER (Phoneme Error Rate)**: Measures the edit distance between predicted and ground truth phonemes (lower is better)
-        - **PFER (Phoneme Frame Error Rate)**: Measures frame-level phoneme prediction accuracy (lower is better)
     """)
     with gr.Tabs() as tabs:
@@ -126,15 +203,13 @@ with gr.Blocks() as demo:
             leaderboard_df = load_leaderboard_data()
             formatted_df = format_leaderboard_df(leaderboard_df)
-            leaderboard_table = gr.DataFrame(
-                value=formatted_df,
-                interactive=False,
-                headers=list(column_names.values())
             )
             refresh_btn = gr.Button("🔄 Refresh Leaderboard")
             refresh_btn.click(
-                lambda: gr.DataFrame(value=format_leaderboard_df(load_leaderboard_data()))
             )
         with gr.TabItem("📝 Submit Model"):
@@ -158,10 +233,28 @@ with gr.Blocks() as demo:
                 submit_btn = gr.Button("🚀 Submit for Evaluation")
                 result_text = gr.Markdown()
                 submit_btn.click(
-                    request_evaluation,
                     inputs=[model_input, submission_name, github_url],
-                    outputs=result_text
                 )
         with gr.TabItem("ℹ️ Detailed Results"):
@@ -184,4 +277,5 @@ with gr.Blocks() as demo:
     gr.Markdown(f"Last updated: {LAST_UPDATED}")
-demo.launch()

 import gradio as gr
 import pandas as pd
 import json
 from pathlib import Path
 from datetime import datetime, timezone
+import uuid
 LAST_UPDATED = "Dec 4th 2024"
 QUEUE_DIR = Path("/Users/arunasrivastava/Koel/IPA-Leaderboard/IPA-Transcription-EN-queue/queue")
 # Modified column names for phonemic transcription metrics
 column_names = {
     "MODEL": "Model",
     "AVG_PER": "Average PER ⬇️",
+    "AVG_PWED": "Average PWED ⬇️",
     "GITHUB_URL": "GitHub",
     "DATE": "Submission Date"
 }
+def load_json_file(file_path: Path, default=None):
+    """Safely load a JSON file or return default if file doesn't exist"""
+    if default is None:
+        default = []
+    if not file_path.exists():
+        return default
+    try:
+        with open(file_path, 'r') as f:
+            return json.load(f)
+    except json.JSONDecodeError:
+        return default
+def save_json_file(file_path: Path, data):
+    """Safely save data to a JSON file"""
+    file_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(file_path, 'w') as f:
+        json.dump(data, f, indent=2, ensure_ascii=False)
 def load_leaderboard_data():
+    """Load and parse leaderboard data"""
     leaderboard_path = QUEUE_DIR / "leaderboard.json"
+    data = load_json_file(leaderboard_path)
+    return pd.DataFrame(data) if data else pd.DataFrame()
 def format_leaderboard_df(df):
+    """Format leaderboard dataframe for display"""
     if df.empty:
         return df
+    # Select and rename only the columns we want to display
+    display_df = pd.DataFrame({
+        "MODEL": df["model"],
+        "AVG_PER": df["average_per"],
+        "AVG_PWED": df["average_pwed"],
+        "GITHUB_URL": df["github_url"],
+        "DATE": pd.to_datetime(df["submission_date"]).dt.strftime("%Y-%m-%d")
     })
     # Format numeric columns
     display_df["AVG_PER"] = display_df["AVG_PER"].apply(lambda x: f"{x:.4f}")
+    display_df["AVG_PWED"] = display_df["AVG_PWED"].apply(lambda x: f"{x:.4f}")
     # Make GitHub URLs clickable
     display_df["GITHUB_URL"] = display_df["GITHUB_URL"].apply(
     return display_df
+def request_evaluation(model_name, submission_name, github_url, subset="test", max_samples=None):
+    """Submit new evaluation request"""
     if not model_name or not submission_name:
         return gr.Markdown("⚠️ Please provide both model name and submission name.")
     try:
         # Ensure queue directory exists
         QUEUE_DIR.mkdir(parents=True, exist_ok=True)
+        # Load existing tasks
+        tasks_file = QUEUE_DIR / "tasks.json"
+        tasks = load_json_file(tasks_file)
+        # Create new task
+        new_task = {
+            "id": str(uuid.uuid4()),
+            "transcription_model": model_name,
+            "subset": subset,
+            "max_samples": max_samples,
+            "submission_name": submission_name,
+            "github_url": github_url or "",
+            "status": "queued",
+            "submitted_at": datetime.now(timezone.utc).isoformat()
+        }
+        # Add new task to existing tasks
+        tasks.append(new_task)
+        # Save updated tasks
+        save_json_file(tasks_file, tasks)
         return gr.Markdown("✅ Evaluation request submitted successfully! Your results will appear on the leaderboard once processing is complete.")
     except Exception as e:
         return gr.Markdown(f"❌ Error submitting request: {str(e)}")
 def load_results_for_model(model_name):
+    """Load detailed results for a specific model"""
     results_path = QUEUE_DIR / "results.json"
+    results = load_json_file(results_path)
+    # Filter results for the specific model
+    model_results = [r for r in results if r["model"] == model_name]
+    if not model_results:
         return None
+    # Get the most recent result
+    latest_result = max(model_results, key=lambda x: x["timestamp"])
+    return latest_result
+def create_html_table(df):
+    """Create HTML table with dark theme styling"""
+    if df.empty:
+        return "<p>No data available</p>"
+    html = """
+    <style>
+    table {
+        width: 100%;
+        border-collapse: collapse;
+        color: white;
+        background-color: #1a1a1a;
+    }
+    th, td {
+        padding: 8px;
+        text-align: left;
+        border: 1px solid #333;
+    }
+    th {
+        background-color: #2a2a2a;
+        color: white;
+    }
+    tr:nth-child(even) {
+        background-color: #252525;
+    }
+    tr:hover {
+        background-color: #303030;
+    }
+    a {
+        color: #6ea8fe;
+        text-decoration: none;
+    }
+    a:hover {
+        text-decoration: underline;
+    }
+    </style>
+    <table>
+    <thead>
+        <tr>
+    """
+    # Add headers
+    for header in column_names.values():
+        html += f"<th>{header}</th>"
+    html += "</tr></thead><tbody>"
+    # Add rows
+    for _, row in df.iterrows():
+        html += "<tr>"
+        for col in df.columns:
+            if col == "GITHUB_URL":
+                html += f"<td>{row[col]}</td>"  # URL is already formatted as HTML
+            else:
+                html += f"<td>{row[col]}</td>"
+        html += "</tr>"
+    html += "</tbody></table>"
+    return html
 # Create Gradio interface
 with gr.Blocks() as demo:
     gr.Markdown("# 🎯 Phonemic Transcription Model Evaluation Leaderboard")
     gr.Markdown("""
+        Compare the performance of different phonemic transcription models on speech-to-IPA transcription tasks for English.
         **Metrics:**
         - **PER (Phoneme Error Rate)**: Measures the edit distance between predicted and ground truth phonemes (lower is better)
+        - **PWED (Phoneme Weighted Edit Distance)**: Measures a weighted difference in phonemes using phonemic features (lower is better)
+        **Datasets:**
+        - **[TIMIT](https://www.kaggle.com/datasets/mfekadu/darpa-timit-acousticphonetic-continuous-speech)**: A phonemic transcription dataset for English speech recognition
+        To learn more about the evaluation metrics, check out our blog post [here](https://huggingface.co/spaces/evaluate-metric/wer).
     """)
     with gr.Tabs() as tabs:
             leaderboard_df = load_leaderboard_data()
             formatted_df = format_leaderboard_df(leaderboard_df)
+            leaderboard_table = gr.HTML(
+                value=create_html_table(formatted_df)
             )
             refresh_btn = gr.Button("🔄 Refresh Leaderboard")
             refresh_btn.click(
+                lambda: gr.HTML(value=create_html_table(format_leaderboard_df(load_leaderboard_data())))
             )
         with gr.TabItem("📝 Submit Model"):
                 submit_btn = gr.Button("🚀 Submit for Evaluation")
                 result_text = gr.Markdown()
+                def submit_and_clear(model_name, submission_name, github_url):
+                    result = request_evaluation(model_name, submission_name, github_url)
+                    # If submission was successful, clear the form
+                    if "✅" in result.value:
+                        return {
+                            model_input: "",
+                            submission_name: "",
+                            github_url: "",
+                            result_text: result
+                        }
+                    # If there was an error, keep the form data and show error
+                    return {
+                        model_input: model_name,
+                        submission_name: submission_name,
+                        github_url: github_url,
+                        result_text: result
+                    }
                 submit_btn.click(
+                    submit_and_clear,
                     inputs=[model_input, submission_name, github_url],
+                    outputs=[model_input, submission_name, github_url, result_text]
                 )
         with gr.TabItem("ℹ️ Detailed Results"):
     gr.Markdown(f"Last updated: {LAST_UPDATED}")
+if __name__ == "__main__":
+    demo.launch()