Spaces:

adyen
/

DABstep

Running on CPU Upgrade

App Files Files Community

eggie5-adyen commited on Apr 4

Commit

aa92f8e

1 Parent(s): 84b45f6

added validation guidelines

Browse files

Files changed (2) hide show

app.py +6 -6
dabstep_benchmark/content.py +17 -0

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import os
 import gradio as gr
 from apscheduler.schedulers.background import BackgroundScheduler
-from dabstep_benchmark.content import TITLE, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_TEXT, CITATION_BUTTON_LABEL
 from dabstep_benchmark.leaderboard import *
@@ -30,7 +30,6 @@ if __name__ == "__main__":
         # Generate initial leaderboard data
         validated, unvalidated = update_tables()
         with gr.Tab("Validated"):
             verified_table = gr.Dataframe(
                 value=validated,
@@ -38,7 +37,7 @@ if __name__ == "__main__":
                 interactive=False,
                 column_widths=["20%"],
                 wrap=True,
-        )
         with gr.Tab("Unvalidated"):
             unverified_table = gr.Dataframe(
@@ -47,7 +46,7 @@ if __name__ == "__main__":
                 interactive=False,
                 column_widths=["20%"],
                 wrap=True,
-        )
         # create a Gradio event listener that runs when the page is loaded to populate the dataframe
         demo.load(update_tables, inputs=None, outputs=[verified_table, unverified_table])
@@ -61,6 +60,8 @@ if __name__ == "__main__":
                 verified_table, unverified_table
             ],
         )
         with gr.Row():
             with gr.Accordion("📙 Citation", open=False):
                 citation_button = gr.Textbox(
@@ -107,5 +108,4 @@ if __name__ == "__main__":
     scheduler = BackgroundScheduler()
     scheduler.add_job(restart_space, "interval", seconds=3600*24)
     scheduler.start()
-    demo.launch(debug=True)

 import gradio as gr
 from apscheduler.schedulers.background import BackgroundScheduler
+from dabstep_benchmark.content import TITLE, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_TEXT, CITATION_BUTTON_LABEL, VALIDATION_GUIDELINES
 from dabstep_benchmark.leaderboard import *
         # Generate initial leaderboard data
         validated, unvalidated = update_tables()
         with gr.Tab("Validated"):
             verified_table = gr.Dataframe(
                 value=validated,
                 interactive=False,
                 column_widths=["20%"],
                 wrap=True,
+            )
         with gr.Tab("Unvalidated"):
             unverified_table = gr.Dataframe(
                 interactive=False,
                 column_widths=["20%"],
                 wrap=True,
+            )
         # create a Gradio event listener that runs when the page is loaded to populate the dataframe
         demo.load(update_tables, inputs=None, outputs=[verified_table, unverified_table])
                 verified_table, unverified_table
             ],
         )
+        gr.Markdown(VALIDATION_GUIDELINES, elem_classes="markdown-text")
         with gr.Row():
             with gr.Accordion("📙 Citation", open=False):
                 citation_button = gr.Textbox(
     scheduler = BackgroundScheduler()
     scheduler.add_job(restart_space, "interval", seconds=3600*24)
     scheduler.start()
+    demo.launch(debug=True)

dabstep_benchmark/content.py CHANGED Viewed

@@ -39,3 +39,20 @@ CITATION_BUTTON_TEXT = r"""@misc{DABstep_benchmark_2025,
       month={February},
       url={https://www.adyen.com/knowledge-hub/data-agent-benchmark-for-multi-step-reasoning-dabstep}
 }"""

       month={February},
       url={https://www.adyen.com/knowledge-hub/data-agent-benchmark-for-multi-step-reasoning-dabstep}
 }"""
+VALIDATION_GUIDELINES = """
+## Benchmark Validation Standards
+All submissions are initially added to the **Unvalidated Leaderboard**. The Adyen/Hugging Face team will attempt, with the participation of the respective submission team, to validate any entries that rank within the top 10.
+**Validation** confirms that a submission's results were achieved using a novel approach involving data analysis agents. To support validation, participants must provide clear evidence of their methodology. This can be done in one of the following ways:
+- **Preferred:** Share a research paper or blog post along with the source code to enable full reproducibility.
+- Submit a complete dataset that includes **reasoning traces** demonstrating how the results were produced.
+- Provide access to an **API** that the Adyen/Hugging Face team can use to independently validate and reproduce results.
+Our goal with **DABStep** is to foster rapid progress and collaboration in the open research community. We strongly encourage participants to share their work and open-source their code whenever possible.
+Once validated, submissions will be featured and showcased on the **Validated Leaderboard**, including annotations indicating the validation method used (e.g., `traces`, `code`, `API`).
+"""