from dataclasses import dataclass from enum import Enum @dataclass class Task: benchmark: str metric: str col_name: str # Select your tasks here # --------------------------------------------------- class Tasks(Enum): # task_key in the json file, metric_key in the json file, name to display in the leaderboard # task0 = Task("anli_r1", "acc", "ANLI") # task1 = Task("logiqa", "acc_norm", "LogiQA") # task_0 = Task("multi-table", "AggregationDetection-LogisticRegression", "AggregationDetection-LogisticRegression ⬇️") task_1 = Task("multi-table", "AggregationDetection-XGBClassifier", "C2ST Agg-XGBClassifier ⬇️") task_2 = Task("multi-table", "CardinalityShapeSimilarity", "CardinalityShapeSimilarity ⬆️") class SingleTableTasks(Enum): task_0 = Task("single-table", "MaximumMeanDiscrepancy", "MaximumMeanDiscrepancy ⬇️") # PairwiseCorrelationDifference task_1 = Task("single-table", "PairwiseCorrelationDifference", "PairwiseCorrelationDifference ⬇️") # SingleTableDetection-LogisticRegression # task_2 = Task("single-table", "SingleTableDetection-LogisticRegression", "SingleTableDetection-LogisticRegression ⬇️") # SingleTableDetection-XGBClassifier task_3 = Task("single-table", "SingleTableDetection-XGBClassifier", "SingleTableDetection-XGBClassifier ⬇️") class SingleColumnTasks(Enum): # ChiSquareTest task_0 = Task("single-column", "ChiSquareTest", "ChiSquareTest ⬇️") # HellingerDistance task_1 = Task("single-column", "HellingerDistance", "HellingerDistance ⬇️") # JensenShannonDistance task_2 = Task("single-column", "JensenShannonDistance", "JensenShannonDistance ⬇️") # KolmogorovSmirnovTest task_3 = Task("single-column", "KolmogorovSmirnovTest", "KolmogorovSmirnovTest ⬇️") # SingleColumnDetection-LogisticRegression # task_4 = Task("single-column", "SingleColumnDetection-LogisticRegression", "SingleColumnDetection-LogisticRegression ⬇️") # SingleColumnDetection-XGBClassifier task_5 = Task("single-column", "SingleColumnDetection-XGBClassifier", "SingleColumnDetection-XGBClassifier ⬇️") # TotalVariationDistance task_6 = Task("single-column", "TotalVariationDistance", "TotalVariationDistance ⬇️") # WassersteinDistance task_7 = Task("single-column", "WassersteinDistance", "WassersteinDistance ⬇️") NUM_FEWSHOT = 0 # Change with your few shot # --------------------------------------------------- # Your leaderboard name TITLE = """

Syntherela Leaderboard

""" # What does your leaderboard evaluate? INTRODUCTION_TEXT = """ """ # Which evaluations are you running? how can people reproduce what you have? LLM_BENCHMARKS_TEXT = f""" # About The **SyntheRela Leaderboard** provides a public evaluation of relational database synthesis methods using the **SyntheRela benchmark** ([github](https://github.com/martinjurkovic/syntherela)). This benchmark incorporates best practices, a novel robust detection metric, and a relational deep learning utility approach that leverages graph neural networks. It enables a comprehensive comparison of methods across multiple real-world databases. To add a model to the leaderboard, run the **SyntheRela benchmark** on your generated data and then open a pull request on the [SyntheRela repository](https://github.com/martinjurkovic/syntherela). ## Authors - **Martin Jurkovič** - **Valter Hudovernik** - **Erik Štrumbelj** If you use the results from this leaderboard in your research, please **cite our paper** (citation below). """ EVALUATION_QUEUE_TEXT = """ To add a model to the leaderboard, run the **SyntheRela benchmark** on your generated data and then open a pull request on the [SyntheRela repository](https://github.com/martinjurkovic/syntherela). """ CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" CITATION_BUTTON_TEXT = r"""@inproceedings{ iclrsyntheticdata2025syntherela, title={SyntheRela: A Benchmark For Synthetic Relational Database Generation}, author={Martin Jurkovic and Valter Hudovernik and Erik {\v{S}}trumbelj}, booktitle={Will Synthetic Data Finally Solve the Data Access Problem?}, year={2025}, url={https://openreview.net/forum?id=ZfQofWYn6n} }"""