TAG-Leaderboard / app.py
abiswal's picture
cleaner update
d11d433
raw
history blame
1.89 kB
import gradio as gr
import pandas as pd
# Simplified leaderboard data
data = {
"Model": [
"Handwritten TAG",
"Zero-shot Text2SQL",
"Zero-shot Text2SQL + LM Generation",
"RAG (E5)",
"RAG (E5) + LM Rerank",
],
"Execution Accuracy": ["55%", "17%", "13%", "0%", "2%"],
}
# Create a DataFrame
leaderboard_df = pd.DataFrame(data)
# Convert Execution Accuracy to numeric for sorting
leaderboard_df["Execution Accuracy (numeric)"] = (
leaderboard_df["Execution Accuracy"].str.rstrip("%").astype(float)
)
leaderboard_df = leaderboard_df.sort_values(
"Execution Accuracy (numeric)", ascending=False
).reset_index(drop=True)
# Add the Rank column
leaderboard_df.insert(0, "Rank", leaderboard_df.index + 1)
# Drop the numeric column for display
leaderboard_df = leaderboard_df.drop(columns=["Execution Accuracy (numeric)"])
# Add hyperlinks to the Model column
def hyperlink_model(model):
base_url = "https://github.com/TAG-Research/TAG-Bench/tree/main"
return f'<a href="{base_url}" target="_blank">{model}</a>'
leaderboard_df["Model"] = leaderboard_df["Model"].apply(hyperlink_model)
# Gradio app
with gr.Blocks() as demo:
# Title and subtitle
gr.HTML(
"""
<div style="text-align: center;">
<h1 style="font-size: 2.5rem; margin-bottom: 0.5rem;">Execution Accuracy Leaderboard</h1>
<p style="font-size: 1.25rem; color: gray;">Comparing baseline approaches for structured data queries</p>
</div>
"""
)
# Leaderboard table
gr.HTML(
leaderboard_df.to_html(
index=False,
escape=False,
classes="leaderboard-table",
)
)
# Footer or additional info (optional)
gr.Markdown(
"Note: Execution accuracy is based on the percentage of correctly answered queries."
)
demo.launch()