File size: 8,919 Bytes
b98f07f
 
 
 
3b86dfc
b98f07f
9446fe5
 
370d5a0
b98f07f
 
0a65444
 
 
 
 
 
 
 
 
 
7891337
 
 
 
 
 
 
 
 
 
 
 
0a65444
3b86dfc
 
 
 
 
 
 
 
 
 
 
 
 
385e405
5f7fcf4
 
 
3b86dfc
385e405
3b86dfc
5fe3b95
b98f07f
3b86dfc
 
 
 
 
 
 
 
 
 
56140d5
 
 
3b86dfc
56140d5
 
 
 
 
 
 
 
 
 
 
 
 
3b86dfc
 
 
5909269
0a65444
 
 
7891337
5909269
7891337
 
 
 
3b86dfc
 
5f7fcf4
56140d5
 
 
 
 
 
 
 
 
 
5f7fcf4
3b86dfc
5f7fcf4
 
 
 
 
b98f07f
56140d5
385e405
56140d5
3b86dfc
385e405
5f7fcf4
 
 
56140d5
5f7fcf4
 
 
 
 
 
 
385e405
56140d5
385e405
56140d5
5f7fcf4
385e405
370d5a0
 
 
 
56140d5
370d5a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56140d5
370d5a0
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import json
import os

import pandas as pd
import numpy as np

from src.display.formatting import make_clickable_model
from src.display.utils import EvalQueueColumn
from src.about import Tasks, SingleTableTasks, SingleColumnTasks


# Model name mapping dictionary
model_names = {
    'CLAVADDPM': "ClavaDDPM",
    'RGCLD': "RGCLD",
    'MOSTLYAI': "TabularARGN",
    'RCTGAN': "RCTGAN",
    'REALTABFORMER': "REaLTabFormer",
    'SDV': "SDV",
}

# Dataset name mapping dictionary
dataset_names = {
    "airbnb-simplified_subsampled": "Airbnb",
    "Berka_subsampled": "Berka",
    "Biodegradability_v1": "Biodegradability",
    "CORA_v1": "Cora",
    "imdb_MovieLens_v1": "IMDB",
    "rossmann_subsampled": "Rossmann",
    "walmart_subsampled": "Walmart",
    "f1_subsampled": "F1",
}


# def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
#     """Creates a dataframe from all the individual experiment results"""
#     raw_data = get_raw_eval_results(results_path, requests_path)
#     all_data_json = [v.to_dict() for v in raw_data]

#     df = pd.DataFrame.from_records(all_data_json)
#     df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
#     df = df[cols].round(decimals=2)

#     # filter out if any of the benchmarks have not been produced
#     df = df[has_no_nan_values(df, benchmark_cols)]
#     return df


def strip_emoji(text: str) -> str:
    """Removes emojis from text"""
    return text.encode("ascii", "ignore").decode("ascii").rstrip()


def get_leaderboard_df(results_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
    """Creates a dataframe from all the individual experiment results"""

    # iterate thorugh all files in the results path and read them into json
    all_data_json = []
    res_path = os.path.join(results_path, "demo-leaderboard", "syntherela-demo")
    for entry in os.listdir(res_path):
        if entry.endswith(".json"):
            file_path = os.path.join(res_path, entry)
            with open(file_path) as fp:
                data = json.load(fp)
                all_data_json.append(data)

    multi_table_metrics = [task.value.metric for task in Tasks]
    single_table_metrics = [task.value.metric for task in SingleTableTasks]
    single_column_metrics = [task.value.metric for task in SingleColumnTasks]

    multi_table_metric_names = [task.value.col_name for task in Tasks]
    single_table_metric_names = [task.value.col_name for task in SingleTableTasks]
    single_column_metric_names = [task.value.col_name for task in SingleColumnTasks]

    # Create mapping between metrics and their display names
    multi_table_metric_mapping = dict(zip(multi_table_metrics, multi_table_metric_names))
    single_table_metric_mapping = dict(zip(single_table_metrics, single_table_metric_names))
    single_column_metric_mapping = dict(zip(single_column_metrics, single_column_metric_names))

    # create empty dataframe with the display column names
    multitable_df = pd.DataFrame(columns=["Dataset", "Model"] + multi_table_metric_names)
    singletable_df = pd.DataFrame(columns=["Dataset", "Model"] + single_table_metric_names)
    singlecolumn_df = pd.DataFrame(columns=["Dataset", "Table", "Model"] + single_column_metric_names)

    # iterate through all json files and add the data to the dataframe
    for data in all_data_json:
        model = data["method_name"]
        # Rename model if it exists in the mapping dictionary
        if model.upper() in model_names:
            model = model_names[model.upper()]
        
        dataset = data["dataset_name"]
        # Rename dataset if it exists in the mapping dictionary
        if dataset in dataset_names:
            dataset = dataset_names[dataset]
            
        row = {"Dataset": dataset, "Model": model}
        for metric in multi_table_metrics:
            stripped_metric = strip_emoji(metric)
            display_name = multi_table_metric_mapping[metric]  # Get the display name for this metric
            
            # Special case for CardinalityShapeSimilarity which is stored under "Trends"
            if "CardinalityShapeSimilarity" in metric:
                if "Trends" in data["multi_table_metrics"] and "cardinality" in data["multi_table_metrics"]["Trends"]:
                    row[display_name] = data["multi_table_metrics"]["Trends"]["cardinality"]
                else:
                    row[display_name] = np.nan
                continue
                
            if stripped_metric in data["multi_table_metrics"]:
                metric_values = []
                for table in data["multi_table_metrics"][stripped_metric].keys():
                    if "accuracy" in data["multi_table_metrics"][stripped_metric][table]:
                        metric_values.append(data["multi_table_metrics"][stripped_metric][table]["accuracy"])
                    if "statistic" in data["multi_table_metrics"][stripped_metric][table]:
                        metric_values.append(data["multi_table_metrics"][stripped_metric][table]["statistic"])

                row[display_name] = np.mean(metric_values).round(decimals=2)  # Use display name as column
            else:
                row[display_name] = np.nan  # Use display name as column
        multitable_df = pd.concat([multitable_df, pd.DataFrame([row])], ignore_index=True)

        singletable_row = {"Dataset": dataset, "Model": model}
        for metric in single_table_metrics:
            stripped_metric = strip_emoji(metric)
            display_name = single_table_metric_mapping[metric]  # Get the display name for this metric
            if stripped_metric in data["single_table_metrics"]:
                metric_values = []
                for table in data["single_table_metrics"][stripped_metric].keys():
                    if "accuracy" in data["single_table_metrics"][stripped_metric][table]:
                        metric_values.append(data["single_table_metrics"][stripped_metric][table]["accuracy"])
                    if "value" in data["single_table_metrics"][stripped_metric][table]:
                        metric_values.append(data["single_table_metrics"][stripped_metric][table]["value"])

                singletable_row[display_name] = np.mean(metric_values).round(decimals=2)  # Use display name as column
            else:
                singletable_row[display_name] = np.nan  # Use display name as column
        singletable_df = pd.concat([singletable_df, pd.DataFrame([singletable_row])], ignore_index=True)

        singlecolumn_row = {"Dataset": dataset, "Model": model, "Table": ""}
        # insert row
        for metric in single_column_metrics:
            stripped_metric = strip_emoji(metric)
            display_name = single_column_metric_mapping[metric]  # Get the display name for this metric
            if stripped_metric in data["single_column_metrics"]:
                for table in data["single_column_metrics"][stripped_metric].keys():
                    # check if row where dataset = dataset, model = model, table = table exists
                    if singlecolumn_df[
                        (singlecolumn_df["Dataset"] == dataset) & 
                        (singlecolumn_df["Model"] == model) & 
                        (singlecolumn_df["Table"] == table)
                    ].empty:
                        singlecolumn_row = {"Dataset": dataset, "Model": model, "Table": table}
                        singlecolumn_df = pd.concat([singlecolumn_df, pd.DataFrame([singlecolumn_row])], ignore_index=True)

                    metric_values = []
                    for column in data["single_column_metrics"][stripped_metric][table].keys():
                        if "accuracy" in data["single_column_metrics"][stripped_metric][table][column]:
                            metric_values.append(data["single_column_metrics"][stripped_metric][table][column]["accuracy"])
                        if "value" in data["single_column_metrics"][stripped_metric][table][column]:
                            metric_values.append(data["single_column_metrics"][stripped_metric][table][column]["value"])
                        if "statistic" in data["single_column_metrics"][stripped_metric][table][column]:
                            metric_values.append(data["single_column_metrics"][stripped_metric][table][column]["statistic"])

                    # save np.mean(metric_values).round(decimals=2) to singlecolumn_df where dataset = dataset, model = model, table = table
                    singlecolumn_df.loc[
                        (singlecolumn_df["Dataset"] == dataset) & 
                        (singlecolumn_df["Model"] == model) & 
                        (singlecolumn_df["Table"] == table), display_name] = np.mean(metric_values).round(decimals=2)  # Use display name as column
            

    return singlecolumn_df, singletable_df, multitable_df