Execution Accuracy Leaderboard

import gradio as gr
import pandas as pd

# Simplified leaderboard data
data = {
    "Model": [
        "Handwritten TAG",
        "Zero-shot Text2SQL",
        "Zero-shot Text2SQL + LM Generation",
        "RAG (E5)",
        "RAG (E5) + LM Rerank",
    ],
    "Execution Accuracy": ["55%", "17%", "13%", "0%", "2%"],
}

# Create a DataFrame
leaderboard_df = pd.DataFrame(data)

# Convert Execution Accuracy to numeric for sorting
leaderboard_df["Execution Accuracy (numeric)"] = (
    leaderboard_df["Execution Accuracy"].str.rstrip("%").astype(float)
)
leaderboard_df = leaderboard_df.sort_values(
    "Execution Accuracy (numeric)", ascending=False
).reset_index(drop=True)

# Add the Rank column
leaderboard_df.insert(0, "Rank", leaderboard_df.index + 1)

# Drop the numeric column for display
leaderboard_df = leaderboard_df.drop(columns=["Execution Accuracy (numeric)"])

# Add hyperlinks to the Model column
def hyperlink_model(model):
    base_url = "https://github.com/TAG-Research/TAG-Bench/tree/main"
    return f'<a href="{base_url}" target="_blank">{model}</a>'

leaderboard_df["Model"] = leaderboard_df["Model"].apply(hyperlink_model)

# Gradio app
with gr.Blocks() as demo:
    # Title and subtitle
    gr.HTML(
        """
        <div style="text-align: center;">
            <h1 style="font-size: 2.5rem; margin-bottom: 0.5rem;">Execution Accuracy Leaderboard</h1>
            <p style="font-size: 1.25rem; color: gray;">Comparing baseline approaches for structured data queries</p>
        </div>
        """
    )

    # Leaderboard table
    gr.HTML(
        leaderboard_df.to_html(
            index=False,
            escape=False,
            classes="leaderboard-table",
        )
    )

    # Footer or additional info (optional)
    gr.Markdown(
        "Note: Execution accuracy is based on the percentage of correctly answered queries."
    )

demo.launch()