Spaces:
Running
Running
Martin Jurkovic
commited on
Commit
·
56140d5
1
Parent(s):
5acb894
Update cardinality score and C2ST naming
Browse files- src/about.py +1 -1
- src/populate.py +33 -12
src/about.py
CHANGED
|
@@ -15,7 +15,7 @@ class Tasks(Enum):
|
|
| 15 |
# task0 = Task("anli_r1", "acc", "ANLI")
|
| 16 |
# task1 = Task("logiqa", "acc_norm", "LogiQA")
|
| 17 |
# task_0 = Task("multi-table", "AggregationDetection-LogisticRegression", "AggregationDetection-LogisticRegression ⬇️")
|
| 18 |
-
task_1 = Task("multi-table", "AggregationDetection-XGBClassifier", "
|
| 19 |
task_2 = Task("multi-table", "CardinalityShapeSimilarity", "CardinalityShapeSimilarity ⬆️")
|
| 20 |
|
| 21 |
class SingleTableTasks(Enum):
|
|
|
|
| 15 |
# task0 = Task("anli_r1", "acc", "ANLI")
|
| 16 |
# task1 = Task("logiqa", "acc_norm", "LogiQA")
|
| 17 |
# task_0 = Task("multi-table", "AggregationDetection-LogisticRegression", "AggregationDetection-LogisticRegression ⬇️")
|
| 18 |
+
task_1 = Task("multi-table", "AggregationDetection-XGBClassifier", "C2ST Agg-XGBClassifier ⬇️")
|
| 19 |
task_2 = Task("multi-table", "CardinalityShapeSimilarity", "CardinalityShapeSimilarity ⬆️")
|
| 20 |
|
| 21 |
class SingleTableTasks(Enum):
|
src/populate.py
CHANGED
|
@@ -42,14 +42,23 @@ def get_leaderboard_df(results_path: str, cols: list, benchmark_cols: list) -> p
|
|
| 42 |
data = json.load(fp)
|
| 43 |
all_data_json.append(data)
|
| 44 |
|
| 45 |
-
multi_table_metrics = [task.value.
|
| 46 |
-
single_table_metrics = [task.value.
|
| 47 |
-
single_column_metrics = [task.value.
|
| 48 |
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
# iterate through all json files and add the data to the dataframe
|
| 55 |
for data in all_data_json:
|
|
@@ -58,6 +67,16 @@ def get_leaderboard_df(results_path: str, cols: list, benchmark_cols: list) -> p
|
|
| 58 |
row = {"Dataset": dataset, "Model": model}
|
| 59 |
for metric in multi_table_metrics:
|
| 60 |
stripped_metric = strip_emoji(metric)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
if stripped_metric in data["multi_table_metrics"]:
|
| 62 |
metric_values = []
|
| 63 |
for table in data["multi_table_metrics"][stripped_metric].keys():
|
|
@@ -66,14 +85,15 @@ def get_leaderboard_df(results_path: str, cols: list, benchmark_cols: list) -> p
|
|
| 66 |
if "statistic" in data["multi_table_metrics"][stripped_metric][table]:
|
| 67 |
metric_values.append(data["multi_table_metrics"][stripped_metric][table]["statistic"])
|
| 68 |
|
| 69 |
-
row[
|
| 70 |
else:
|
| 71 |
-
row[
|
| 72 |
multitable_df = pd.concat([multitable_df, pd.DataFrame([row])], ignore_index=True)
|
| 73 |
|
| 74 |
singletable_row = {"Dataset": dataset, "Model": model}
|
| 75 |
for metric in single_table_metrics:
|
| 76 |
stripped_metric = strip_emoji(metric)
|
|
|
|
| 77 |
if stripped_metric in data["single_table_metrics"]:
|
| 78 |
metric_values = []
|
| 79 |
for table in data["single_table_metrics"][stripped_metric].keys():
|
|
@@ -82,15 +102,16 @@ def get_leaderboard_df(results_path: str, cols: list, benchmark_cols: list) -> p
|
|
| 82 |
if "value" in data["single_table_metrics"][stripped_metric][table]:
|
| 83 |
metric_values.append(data["single_table_metrics"][stripped_metric][table]["value"])
|
| 84 |
|
| 85 |
-
singletable_row[
|
| 86 |
else:
|
| 87 |
-
singletable_row[
|
| 88 |
singletable_df = pd.concat([singletable_df, pd.DataFrame([singletable_row])], ignore_index=True)
|
| 89 |
|
| 90 |
singlecolumn_row = {"Dataset": dataset, "Model": model, "Table": ""}
|
| 91 |
# insert row
|
| 92 |
for metric in single_column_metrics:
|
| 93 |
stripped_metric = strip_emoji(metric)
|
|
|
|
| 94 |
if stripped_metric in data["single_column_metrics"]:
|
| 95 |
for table in data["single_column_metrics"][stripped_metric].keys():
|
| 96 |
# check if row where dataset = dataset, model = model, table = table exists
|
|
@@ -115,7 +136,7 @@ def get_leaderboard_df(results_path: str, cols: list, benchmark_cols: list) -> p
|
|
| 115 |
singlecolumn_df.loc[
|
| 116 |
(singlecolumn_df["Dataset"] == dataset) &
|
| 117 |
(singlecolumn_df["Model"] == model) &
|
| 118 |
-
(singlecolumn_df["Table"] == table),
|
| 119 |
|
| 120 |
|
| 121 |
return singlecolumn_df, singletable_df, multitable_df
|
|
|
|
| 42 |
data = json.load(fp)
|
| 43 |
all_data_json.append(data)
|
| 44 |
|
| 45 |
+
multi_table_metrics = [task.value.metric for task in Tasks]
|
| 46 |
+
single_table_metrics = [task.value.metric for task in SingleTableTasks]
|
| 47 |
+
single_column_metrics = [task.value.metric for task in SingleColumnTasks]
|
| 48 |
|
| 49 |
+
multi_table_metric_names = [task.value.col_name for task in Tasks]
|
| 50 |
+
single_table_metric_names = [task.value.col_name for task in SingleTableTasks]
|
| 51 |
+
single_column_metric_names = [task.value.col_name for task in SingleColumnTasks]
|
| 52 |
+
|
| 53 |
+
# Create mapping between metrics and their display names
|
| 54 |
+
multi_table_metric_mapping = dict(zip(multi_table_metrics, multi_table_metric_names))
|
| 55 |
+
single_table_metric_mapping = dict(zip(single_table_metrics, single_table_metric_names))
|
| 56 |
+
single_column_metric_mapping = dict(zip(single_column_metrics, single_column_metric_names))
|
| 57 |
+
|
| 58 |
+
# create empty dataframe with the display column names
|
| 59 |
+
multitable_df = pd.DataFrame(columns=["Dataset", "Model"] + multi_table_metric_names)
|
| 60 |
+
singletable_df = pd.DataFrame(columns=["Dataset", "Model"] + single_table_metric_names)
|
| 61 |
+
singlecolumn_df = pd.DataFrame(columns=["Dataset", "Table", "Model"] + single_column_metric_names)
|
| 62 |
|
| 63 |
# iterate through all json files and add the data to the dataframe
|
| 64 |
for data in all_data_json:
|
|
|
|
| 67 |
row = {"Dataset": dataset, "Model": model}
|
| 68 |
for metric in multi_table_metrics:
|
| 69 |
stripped_metric = strip_emoji(metric)
|
| 70 |
+
display_name = multi_table_metric_mapping[metric] # Get the display name for this metric
|
| 71 |
+
|
| 72 |
+
# Special case for CardinalityShapeSimilarity which is stored under "Trends"
|
| 73 |
+
if "CardinalityShapeSimilarity" in metric:
|
| 74 |
+
if "Trends" in data["multi_table_metrics"] and "cardinality" in data["multi_table_metrics"]["Trends"]:
|
| 75 |
+
row[display_name] = data["multi_table_metrics"]["Trends"]["cardinality"]
|
| 76 |
+
else:
|
| 77 |
+
row[display_name] = np.nan
|
| 78 |
+
continue
|
| 79 |
+
|
| 80 |
if stripped_metric in data["multi_table_metrics"]:
|
| 81 |
metric_values = []
|
| 82 |
for table in data["multi_table_metrics"][stripped_metric].keys():
|
|
|
|
| 85 |
if "statistic" in data["multi_table_metrics"][stripped_metric][table]:
|
| 86 |
metric_values.append(data["multi_table_metrics"][stripped_metric][table]["statistic"])
|
| 87 |
|
| 88 |
+
row[display_name] = np.mean(metric_values).round(decimals=2) # Use display name as column
|
| 89 |
else:
|
| 90 |
+
row[display_name] = np.nan # Use display name as column
|
| 91 |
multitable_df = pd.concat([multitable_df, pd.DataFrame([row])], ignore_index=True)
|
| 92 |
|
| 93 |
singletable_row = {"Dataset": dataset, "Model": model}
|
| 94 |
for metric in single_table_metrics:
|
| 95 |
stripped_metric = strip_emoji(metric)
|
| 96 |
+
display_name = single_table_metric_mapping[metric] # Get the display name for this metric
|
| 97 |
if stripped_metric in data["single_table_metrics"]:
|
| 98 |
metric_values = []
|
| 99 |
for table in data["single_table_metrics"][stripped_metric].keys():
|
|
|
|
| 102 |
if "value" in data["single_table_metrics"][stripped_metric][table]:
|
| 103 |
metric_values.append(data["single_table_metrics"][stripped_metric][table]["value"])
|
| 104 |
|
| 105 |
+
singletable_row[display_name] = np.mean(metric_values).round(decimals=2) # Use display name as column
|
| 106 |
else:
|
| 107 |
+
singletable_row[display_name] = np.nan # Use display name as column
|
| 108 |
singletable_df = pd.concat([singletable_df, pd.DataFrame([singletable_row])], ignore_index=True)
|
| 109 |
|
| 110 |
singlecolumn_row = {"Dataset": dataset, "Model": model, "Table": ""}
|
| 111 |
# insert row
|
| 112 |
for metric in single_column_metrics:
|
| 113 |
stripped_metric = strip_emoji(metric)
|
| 114 |
+
display_name = single_column_metric_mapping[metric] # Get the display name for this metric
|
| 115 |
if stripped_metric in data["single_column_metrics"]:
|
| 116 |
for table in data["single_column_metrics"][stripped_metric].keys():
|
| 117 |
# check if row where dataset = dataset, model = model, table = table exists
|
|
|
|
| 136 |
singlecolumn_df.loc[
|
| 137 |
(singlecolumn_df["Dataset"] == dataset) &
|
| 138 |
(singlecolumn_df["Model"] == model) &
|
| 139 |
+
(singlecolumn_df["Table"] == table), display_name] = np.mean(metric_values).round(decimals=2) # Use display name as column
|
| 140 |
|
| 141 |
|
| 142 |
return singlecolumn_df, singletable_df, multitable_df
|