Syntherela Leaderboard

from dataclasses import dataclass
from enum import Enum

@dataclass
class Task:
    benchmark: str
    metric: str
    col_name: str


# Select your tasks here
# ---------------------------------------------------
class Tasks(Enum):
    # task_key in the json file, metric_key in the json file, name to display in the leaderboard 
    # task0 = Task("anli_r1", "acc", "ANLI")
    # task1 = Task("logiqa", "acc_norm", "LogiQA")
    # task_0 = Task("multi-table", "AggregationDetection-LogisticRegression", "AggregationDetection-LogisticRegression ⬇️")
    task_1 = Task("multi-table", "AggregationDetection-XGBClassifier", "C2ST Agg-XGBClassifier ⬇️")
    task_2 = Task("multi-table", "CardinalityShapeSimilarity", "CardinalityShapeSimilarity ⬆️")

class SingleTableTasks(Enum):
    task_0 = Task("single-table", "MaximumMeanDiscrepancy", "MaximumMeanDiscrepancy ⬇️")
    # PairwiseCorrelationDifference
    task_1 = Task("single-table", "PairwiseCorrelationDifference", "PairwiseCorrelationDifference ⬇️")
    # SingleTableDetection-LogisticRegression
    # task_2 = Task("single-table", "SingleTableDetection-LogisticRegression", "SingleTableDetection-LogisticRegression ⬇️")
    # SingleTableDetection-XGBClassifier
    task_3 = Task("single-table", "SingleTableDetection-XGBClassifier", "SingleTableDetection-XGBClassifier ⬇️")

class SingleColumnTasks(Enum):
    # ChiSquareTest
    task_0 = Task("single-column", "ChiSquareTest", "ChiSquareTest ⬇️")
    # HellingerDistance
    task_1 = Task("single-column", "HellingerDistance", "HellingerDistance ⬇️")
    # JensenShannonDistance
    task_2 = Task("single-column", "JensenShannonDistance", "JensenShannonDistance ⬇️")
    # KolmogorovSmirnovTest
    task_3 = Task("single-column", "KolmogorovSmirnovTest", "KolmogorovSmirnovTest ⬇️")
    # SingleColumnDetection-LogisticRegression
    # task_4 = Task("single-column", "SingleColumnDetection-LogisticRegression", "SingleColumnDetection-LogisticRegression ⬇️")
    # SingleColumnDetection-XGBClassifier
    task_5 = Task("single-column", "SingleColumnDetection-XGBClassifier", "SingleColumnDetection-XGBClassifier ⬇️")
    # TotalVariationDistance
    task_6 = Task("single-column", "TotalVariationDistance", "TotalVariationDistance ⬇️")
    # WassersteinDistance
    task_7 = Task("single-column", "WassersteinDistance", "WassersteinDistance ⬇️")

NUM_FEWSHOT = 0 # Change with your few shot
# ---------------------------------------------------


# Your leaderboard name
TITLE = """<h1 align="center" id="space-title">Syntherela Leaderboard</h1>"""

# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
"""

# Which evaluations are you running? how can people reproduce what you have?
LLM_BENCHMARKS_TEXT = f"""
# About
The **SyntheRela Leaderboard** provides a public evaluation of relational database synthesis methods using the **SyntheRela benchmark** ([github](https://github.com/martinjurkovic/syntherela)). This benchmark incorporates best practices, a novel robust detection metric, and a relational deep learning utility approach that leverages graph neural networks. It enables a comprehensive comparison of methods across multiple real-world databases.

To add a model to the leaderboard, run the **SyntheRela benchmark** on your generated data and then open a pull request on the [SyntheRela repository](https://github.com/martinjurkovic/syntherela).

## Authors
- **Martin Jurkovič**
- **Valter Hudovernik**
- **Erik Štrumbelj**

If you use the results from this leaderboard in your research, please **cite our paper** (citation below).

"""

EVALUATION_QUEUE_TEXT = """
To add a model to the leaderboard, run the **SyntheRela benchmark** on your generated data and then open a pull request on the [SyntheRela repository](https://github.com/martinjurkovic/syntherela).
"""

CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""@inproceedings{
    iclrsyntheticdata2025syntherela,
    title={SyntheRela: A Benchmark For Synthetic Relational Database Generation},
    author={Martin Jurkovic and Valter Hudovernik and Erik {\v{S}}trumbelj},
    booktitle={Will Synthetic Data Finally Solve the Data Access Problem?},
    year={2025},
    url={https://openreview.net/forum?id=ZfQofWYn6n}
}"""