from dataclasses import dataclass from enum import Enum @dataclass class Task: benchmark: str metric: str col_name: str # Select your tasks here # --------------------------------------------------- class Tasks(Enum): # task_key in the json file, metric_key in the json file, name to display in the leaderboard task0 = Task("anli_r1", "acc", "ANLI") task1 = Task("logiqa", "acc_norm", "LogiQA") NUM_FEWSHOT = 0 # Change with your few shot # --------------------------------------------------- # Your leaderboard name TITLE = """

TAG leaderboard

""" # What does your leaderboard evaluate? INTRODUCTION_TEXT = """ Intro text """ # Which evaluations are you running? how can people reproduce what you have? LLM_BENCHMARKS_TEXT = f""" ## What does the TAG leaderboard evaluate? In this leaderboard, you'll find execution accuracy comparisons of table question answering approaches on [TAG-Bench](https://github.com/TAG-Research/TAG-Bench/tree/main). TAG-Bench contains complex queries requiring world knowledge or semantic reasoning that goes beyond the information explicitly available in the database. ## How is accuracy measured? Execution accuracy is measured as the number of exact matches to our annotated ground truth answers which are hand-labeled by experts. ## Citation ``` @misc{{biswal2024text2sqlenoughunifyingai, title={{Text2SQL is Not Enough: Unifying AI and Databases with TAG}}, author={{Asim Biswal and Liana Patel and Siddarth Jha and Amog Kamsetty and Shu Liu and Joseph E. Gonzalez and Carlos Guestrin and Matei Zaharia}}, year={2024}, eprint={2408.14717}, archivePrefix={{arXiv}}, primaryClass={{cs.DB}}, url={{https://arxiv.org/abs/2408.14717}}, }} ``` """ EVALUATION_QUEUE_TEXT = """ ## Steps before submission ### 1) """ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" CITATION_BUTTON_TEXT = r""" """