File size: 4,332 Bytes
29546b4
91e8a06
6dff40c
29546b4
 
 
 
 
 
91e8a06
32b707a
 
29546b4
4f3c2a8
3b86dfc
 
5909269
56140d5
5f7fcf4
 
 
 
 
 
 
5909269
5f7fcf4
 
01ea22b
370d5a0
 
 
 
 
 
 
 
 
 
5909269
370d5a0
 
 
 
 
 
 
01ea22b
32b707a
 
29546b4
 
 
6daea60
58733e4
29546b4
b98f07f
e7226cc
 
29546b4
e7226cc
a41edef
6daea60
3aa78c2
a41edef
f7d1b51
a41edef
 
 
 
072fab0
a41edef
3aa78c2
a41edef
3aa78c2
a41edef
 
58733e4
2a73469
 
5acb894
a41edef
5909269
 
 
 
 
5acb894
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
from dataclasses import dataclass
from enum import Enum

@dataclass
class Task:
    benchmark: str
    metric: str
    col_name: str


# Select your tasks here
# ---------------------------------------------------
class Tasks(Enum):
    # task_key in the json file, metric_key in the json file, name to display in the leaderboard 
    # task0 = Task("anli_r1", "acc", "ANLI")
    # task1 = Task("logiqa", "acc_norm", "LogiQA")
    # task_0 = Task("multi-table", "AggregationDetection-LogisticRegression", "AggregationDetection-LogisticRegression ⬇️")
    task_1 = Task("multi-table", "AggregationDetection-XGBClassifier", "C2ST Agg-XGBClassifier ⬇️")
    task_2 = Task("multi-table", "CardinalityShapeSimilarity", "CardinalityShapeSimilarity ⬆️")

class SingleTableTasks(Enum):
    task_0 = Task("single-table", "MaximumMeanDiscrepancy", "MaximumMeanDiscrepancy ⬇️")
    # PairwiseCorrelationDifference
    task_1 = Task("single-table", "PairwiseCorrelationDifference", "PairwiseCorrelationDifference ⬇️")
    # SingleTableDetection-LogisticRegression
    # task_2 = Task("single-table", "SingleTableDetection-LogisticRegression", "SingleTableDetection-LogisticRegression ⬇️")
    # SingleTableDetection-XGBClassifier
    task_3 = Task("single-table", "SingleTableDetection-XGBClassifier", "SingleTableDetection-XGBClassifier ⬇️")

class SingleColumnTasks(Enum):
    # ChiSquareTest
    task_0 = Task("single-column", "ChiSquareTest", "ChiSquareTest ⬇️")
    # HellingerDistance
    task_1 = Task("single-column", "HellingerDistance", "HellingerDistance ⬇️")
    # JensenShannonDistance
    task_2 = Task("single-column", "JensenShannonDistance", "JensenShannonDistance ⬇️")
    # KolmogorovSmirnovTest
    task_3 = Task("single-column", "KolmogorovSmirnovTest", "KolmogorovSmirnovTest ⬇️")
    # SingleColumnDetection-LogisticRegression
    # task_4 = Task("single-column", "SingleColumnDetection-LogisticRegression", "SingleColumnDetection-LogisticRegression ⬇️")
    # SingleColumnDetection-XGBClassifier
    task_5 = Task("single-column", "SingleColumnDetection-XGBClassifier", "SingleColumnDetection-XGBClassifier ⬇️")
    # TotalVariationDistance
    task_6 = Task("single-column", "TotalVariationDistance", "TotalVariationDistance ⬇️")
    # WassersteinDistance
    task_7 = Task("single-column", "WassersteinDistance", "WassersteinDistance ⬇️")

NUM_FEWSHOT = 0 # Change with your few shot
# ---------------------------------------------------



# Your leaderboard name
TITLE = """<h1 align="center" id="space-title">Syntherela Leaderboard</h1>"""

# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
"""

# Which evaluations are you running? how can people reproduce what you have?
LLM_BENCHMARKS_TEXT = f"""
# About
The **SyntheRela Leaderboard** provides a public evaluation of relational database synthesis methods using the **SyntheRela benchmark** ([github](https://github.com/martinjurkovic/syntherela)). This benchmark incorporates best practices, a novel robust detection metric, and a relational deep learning utility approach that leverages graph neural networks. It enables a comprehensive comparison of methods across multiple real-world databases.

To add a model to the leaderboard, run the **SyntheRela benchmark** on your generated data and then open a pull request on the [SyntheRela repository](https://github.com/martinjurkovic/syntherela).

## Authors
- **Martin Jurkovič**
- **Valter Hudovernik**
- **Erik Štrumbelj**

If you use the results from this leaderboard in your research, please **cite our paper** (citation below).

"""

EVALUATION_QUEUE_TEXT = """
To add a model to the leaderboard, run the **SyntheRela benchmark** on your generated data and then open a pull request on the [SyntheRela repository](https://github.com/martinjurkovic/syntherela).
"""

CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""@inproceedings{
    iclrsyntheticdata2025syntherela,
    title={SyntheRela: A Benchmark For Synthetic Relational Database Generation},
    author={Martin Jurkovic and Valter Hudovernik and Erik {\v{S}}trumbelj},
    booktitle={Will Synthetic Data Finally Solve the Data Access Problem?},
    year={2025},
    url={https://openreview.net/forum?id=ZfQofWYn6n}
}"""