import gradio as gr import pandas as pd from src.about import ( CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, LLM_BENCHMARKS_TEXT, TITLE, ) data = { "Method": [ "Human Performance (Handwritten LOTUS Llama-3.1-70B)", "Zero-shot Text2SQL (Llama-3.1-70B)", "Zero-shot Text2SQL + LM Generation (Llama-3.1-70B)", "RAG (E5 + Llama-3.1-70B)", "RAG (E5) + LM Rerank (Llama-3.1-70B)", "Human Performance (Handwritten LOTUS GPT-4o)", "Zero-shot Text2SQL (GPT-4o)", "Zero-shot Text2SQL + LM Generation (GPT-4o)", "RAG (E5 + GPT-4o)", "RAG (E5) + LM Rerank (GPT-4o)", "Human Performance (Handwritten LOTUS o3-mini)", "Zero-shot Text2SQL (o3-mini)", "Zero-shot Text2SQL + LM Generation (o3-mini)", "RAG (E5 + o3-mini)", "RAG (E5) + LM Rerank (o3-mini)", "Zero-shot Text2SQL (Deepseek-R1)", "Zero-shot Text2SQL + LM Generation (Deepseek-R1)", ], # "Model": ["meta-llama/Llama-3.1-70B"] * 5, "Execution Accuracy": [55.0, 17.0, 13.0, 0.0, 2.0, 55.0, 18.0, 15.0, 3.0, 3.0, 65.0, 18.0, 30.0, 7.0, 7.0, 12.0, 0.0], # "Execution Accuracy": [0.0, 2.0, 55.0, 18.0, 3.0, 3.0, 65.0, 18.0, 7.0, 7.0, 12.0], } leaderboard_df = pd.DataFrame(data) leaderboard_df = leaderboard_df.sort_values( "Execution Accuracy", ascending=False ).reset_index(drop=True) leaderboard_df.insert(0, "Rank", leaderboard_df.index - 2) leaderboard_df.loc[0, "Rank"] = "" leaderboard_df.loc[1, "Rank"] = "" leaderboard_df.loc[2, "Rank"] = "" def hyperlink_method(method): base_url = "https://github.com/TAG-Research/TAG-Bench/tree/main" return f'{method}' def hyperlink_model(model): base_url = "https://huggingface.co/meta-llama/Llama-3.1-70B" return f'{model}' leaderboard_df["Method"] = leaderboard_df["Method"].apply(hyperlink_method) def highlight_row(row): if row["Rank"] == "": # First row return ["background-color: #d4edda; font-weight: bold;" for _ in row] return [""] * len(row) # Apply the style leaderboard_df = leaderboard_df.style.apply(highlight_row, axis=1) # leaderboard_df["Model"] = leaderboard_df["Model"].apply(hyperlink_model) with gr.Blocks() as demo: gr.HTML( """

TAG Leaderboard

A benchmark for natural language queries over data

""" ) with gr.Tabs(elem_classes="tab-buttons") as tabs: with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0): with gr.Row(): gr.Dataframe( value=leaderboard_df, headers=["Rank", "Method", "Execution Accuracy"], datatype=["str", "html", "number"], row_count=(5, "dynamic"), wrap=True, elem_id="leaderboard", type="pandas" ) with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2): gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") with gr.TabItem("🚀 Submission Instructions ", elem_id="llm-benchmark-tab-table", id=3): with gr.Accordion("1️⃣ Required Materials", open=True): gr.Markdown( """ Ensure the following files are included in your submission: - **output.json**: File containing the evaluation outputs generated by your model. Please refer to [] for format instructions. - **requirements.txt**: A list of dependencies needed to run your model or script. - **README.md**: A detailed description of your submission, including: - Purpose and overview of the submission. - Instructions to reproduce the results. - Any additional notes for evaluators. - **Model/Keys**: Upload your models or API keys to [Hugging Face](https://huggingface.co/) if they are not publicly accessible. **Note**: Submissions missing any of these materials will not be processed. """ ) # Section 2: Submission Frequency with gr.Accordion("2️⃣ Submission Frequency", open=True): gr.Markdown( """ - Submissions are accepted **once a month** to ensure sufficient evaluation bandwidth. - Plan your submission timeline accordingly to avoid delays. """ ) # Section 3: How to Upload Materials with gr.Accordion("3️⃣ How to Upload Materials", open=True): gr.Markdown( """ Follow these steps to upload your materials: 1. Compress all files in the code into a single `.zip` file, or provide a public repository to refer to. 2. Email the `.zip` file or repositoty link to our email tagbenchmark@gmail.com. """ ) # Section 4: Submission Process with gr.Accordion("4️⃣ Submission Process", open=True): gr.Markdown( """ After uploading your materials: - - Provide accurate contact information for follow-ups. - Double-check your materials for completeness to avoid processing delays. **Important:** Your submission will be added to the evaluation queue. Depending on the queue size, evaluations may take up to a few weeks. """ ) # Footer gr.Markdown( """
For further assistance, reach out to tagbenchmark@gmail.com with questions.
""" ) demo.launch()