Roni Goldshmidt
commited on
Commit
·
7ada547
1
Parent(s):
a3cc365
Initial leaderboard setup
Browse files
.ipynb_checkpoints/comparison-checkpoint.py
CHANGED
@@ -8,44 +8,62 @@ warnings.filterwarnings("ignore", category=UserWarning, message="y_pred contains
|
|
8 |
sns.set_style("whitegrid")
|
9 |
|
10 |
class ModelEvaluator:
|
11 |
-
def __init__(self, df_labels, df_predictions, model_name):
|
12 |
"""
|
13 |
Initialize the evaluator with ground truth labels and model predictions.
|
14 |
"""
|
15 |
self.df_labels = df_labels
|
16 |
self.df_predictions = df_predictions
|
17 |
self.model_name = model_name
|
|
|
18 |
self.metrics_df = self.compute_metrics()
|
19 |
-
|
20 |
def merge_data(self):
|
21 |
"""Merge ground truth labels with predictions based on 'id'."""
|
22 |
merged_df = pd.merge(self.df_labels, self.df_predictions, on='id', suffixes=('_true', '_pred'))
|
|
|
|
|
|
|
|
|
23 |
return merged_df
|
24 |
|
25 |
def compute_metrics(self):
|
26 |
"""Compute precision, recall, F1-score, accuracy, and balanced accuracy for each class and category."""
|
27 |
merged_df = self.merge_data()
|
28 |
-
categories =
|
29 |
-
|
30 |
results = []
|
31 |
-
|
32 |
for category in categories:
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
overall_accuracy = accuracy_score(y_true, y_pred)
|
46 |
overall_balanced_acc = balanced_accuracy_score(y_true, y_pred)
|
47 |
-
|
48 |
-
for i, label in enumerate(
|
49 |
results.append({
|
50 |
"Model": self.model_name,
|
51 |
"Category": category,
|
@@ -57,7 +75,7 @@ class ModelEvaluator:
|
|
57 |
"Balanced Acc.": np.nan,
|
58 |
"Support": (y_true == label).sum()
|
59 |
})
|
60 |
-
|
61 |
results.append({
|
62 |
"Model": self.model_name,
|
63 |
"Category": category,
|
@@ -69,9 +87,9 @@ class ModelEvaluator:
|
|
69 |
"Balanced Acc.": overall_balanced_acc,
|
70 |
"Support": len(y_true)
|
71 |
})
|
72 |
-
|
73 |
df_res = pd.DataFrame(results)
|
74 |
-
return df_res.loc[df_res['Support']>0].reset_index(drop=True)
|
75 |
|
76 |
def get_metrics_df(self):
|
77 |
"""Return the computed metrics DataFrame."""
|
|
|
8 |
sns.set_style("whitegrid")
|
9 |
|
10 |
class ModelEvaluator:
|
11 |
+
def __init__(self, df_labels, df_predictions, model_name, categories = ['main-event', 'location', 'zone', 'light-conditions', 'weather-conditions', 'vehicles-density']):
|
12 |
"""
|
13 |
Initialize the evaluator with ground truth labels and model predictions.
|
14 |
"""
|
15 |
self.df_labels = df_labels
|
16 |
self.df_predictions = df_predictions
|
17 |
self.model_name = model_name
|
18 |
+
self.categories = categories
|
19 |
self.metrics_df = self.compute_metrics()
|
20 |
+
|
21 |
def merge_data(self):
|
22 |
"""Merge ground truth labels with predictions based on 'id'."""
|
23 |
merged_df = pd.merge(self.df_labels, self.df_predictions, on='id', suffixes=('_true', '_pred'))
|
24 |
+
for category in list(set(self.categories) - set(['main-event'])):
|
25 |
+
valid_values = self.df_labels[f"{category}"].unique().astype(str)
|
26 |
+
merged_df = merged_df[merged_df[f"{category}_pred"].astype(str).isin(valid_values)]
|
27 |
+
|
28 |
return merged_df
|
29 |
|
30 |
def compute_metrics(self):
|
31 |
"""Compute precision, recall, F1-score, accuracy, and balanced accuracy for each class and category."""
|
32 |
merged_df = self.merge_data()
|
33 |
+
categories = self.categories
|
34 |
+
|
35 |
results = []
|
36 |
+
|
37 |
for category in categories:
|
38 |
+
true_col = f"{category}_true"
|
39 |
+
pred_col = f"{category}_pred"
|
40 |
+
|
41 |
+
if true_col not in merged_df.columns or pred_col not in merged_df.columns:
|
42 |
+
print(f"Skipping {category} - missing columns")
|
43 |
+
continue
|
44 |
+
|
45 |
+
y_true = merged_df[true_col].astype(str)
|
46 |
+
y_pred = merged_df[pred_col].astype(str)
|
47 |
+
|
48 |
+
valid_labels = sorted(set(y_true) | set(y_pred))
|
49 |
+
|
50 |
+
valid_labels = [label for label in valid_labels if (y_true == label).sum() > 0]
|
51 |
+
|
52 |
+
if not valid_labels:
|
53 |
+
print(f"Skipping {category} - No valid labels found.")
|
54 |
+
continue
|
55 |
+
|
56 |
+
class_precisions = precision_score(y_true, y_pred, labels=valid_labels, average=None, zero_division=0)
|
57 |
+
class_recalls = recall_score(y_true, y_pred, labels=valid_labels, average=None, zero_division=0)
|
58 |
+
class_f1 = f1_score(y_true, y_pred, labels=valid_labels, average=None, zero_division=0)
|
59 |
+
|
60 |
+
overall_precision = precision_score(y_true, y_pred, labels=valid_labels, average='macro', zero_division=0)
|
61 |
+
overall_recall = recall_score(y_true, y_pred, labels=valid_labels, average='macro', zero_division=0)
|
62 |
+
overall_f1 = f1_score(y_true, y_pred, labels=valid_labels, average='macro', zero_division=0)
|
63 |
overall_accuracy = accuracy_score(y_true, y_pred)
|
64 |
overall_balanced_acc = balanced_accuracy_score(y_true, y_pred)
|
65 |
+
|
66 |
+
for i, label in enumerate(valid_labels):
|
67 |
results.append({
|
68 |
"Model": self.model_name,
|
69 |
"Category": category,
|
|
|
75 |
"Balanced Acc.": np.nan,
|
76 |
"Support": (y_true == label).sum()
|
77 |
})
|
78 |
+
|
79 |
results.append({
|
80 |
"Model": self.model_name,
|
81 |
"Category": category,
|
|
|
87 |
"Balanced Acc.": overall_balanced_acc,
|
88 |
"Support": len(y_true)
|
89 |
})
|
90 |
+
|
91 |
df_res = pd.DataFrame(results)
|
92 |
+
return df_res.loc[df_res['Support'] > 0].reset_index(drop=True)
|
93 |
|
94 |
def get_metrics_df(self):
|
95 |
"""Return the computed metrics DataFrame."""
|
.ipynb_checkpoints/new_values-checkpoint.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
__pycache__/comparison.cpython-310.pyc
ADDED
Binary file (18.1 kB). View file
|
|
comparison.py
CHANGED
@@ -8,44 +8,62 @@ warnings.filterwarnings("ignore", category=UserWarning, message="y_pred contains
|
|
8 |
sns.set_style("whitegrid")
|
9 |
|
10 |
class ModelEvaluator:
|
11 |
-
def __init__(self, df_labels, df_predictions, model_name):
|
12 |
"""
|
13 |
Initialize the evaluator with ground truth labels and model predictions.
|
14 |
"""
|
15 |
self.df_labels = df_labels
|
16 |
self.df_predictions = df_predictions
|
17 |
self.model_name = model_name
|
|
|
18 |
self.metrics_df = self.compute_metrics()
|
19 |
-
|
20 |
def merge_data(self):
|
21 |
"""Merge ground truth labels with predictions based on 'id'."""
|
22 |
merged_df = pd.merge(self.df_labels, self.df_predictions, on='id', suffixes=('_true', '_pred'))
|
|
|
|
|
|
|
|
|
23 |
return merged_df
|
24 |
|
25 |
def compute_metrics(self):
|
26 |
"""Compute precision, recall, F1-score, accuracy, and balanced accuracy for each class and category."""
|
27 |
merged_df = self.merge_data()
|
28 |
-
categories =
|
29 |
-
|
30 |
results = []
|
31 |
-
|
32 |
for category in categories:
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
overall_accuracy = accuracy_score(y_true, y_pred)
|
46 |
overall_balanced_acc = balanced_accuracy_score(y_true, y_pred)
|
47 |
-
|
48 |
-
for i, label in enumerate(
|
49 |
results.append({
|
50 |
"Model": self.model_name,
|
51 |
"Category": category,
|
@@ -57,7 +75,7 @@ class ModelEvaluator:
|
|
57 |
"Balanced Acc.": np.nan,
|
58 |
"Support": (y_true == label).sum()
|
59 |
})
|
60 |
-
|
61 |
results.append({
|
62 |
"Model": self.model_name,
|
63 |
"Category": category,
|
@@ -69,9 +87,9 @@ class ModelEvaluator:
|
|
69 |
"Balanced Acc.": overall_balanced_acc,
|
70 |
"Support": len(y_true)
|
71 |
})
|
72 |
-
|
73 |
df_res = pd.DataFrame(results)
|
74 |
-
return df_res.loc[df_res['Support']>0].reset_index(drop=True)
|
75 |
|
76 |
def get_metrics_df(self):
|
77 |
"""Return the computed metrics DataFrame."""
|
|
|
8 |
sns.set_style("whitegrid")
|
9 |
|
10 |
class ModelEvaluator:
|
11 |
+
def __init__(self, df_labels, df_predictions, model_name, categories = ['main-event', 'location', 'zone', 'light-conditions', 'weather-conditions', 'vehicles-density']):
|
12 |
"""
|
13 |
Initialize the evaluator with ground truth labels and model predictions.
|
14 |
"""
|
15 |
self.df_labels = df_labels
|
16 |
self.df_predictions = df_predictions
|
17 |
self.model_name = model_name
|
18 |
+
self.categories = categories
|
19 |
self.metrics_df = self.compute_metrics()
|
20 |
+
|
21 |
def merge_data(self):
|
22 |
"""Merge ground truth labels with predictions based on 'id'."""
|
23 |
merged_df = pd.merge(self.df_labels, self.df_predictions, on='id', suffixes=('_true', '_pred'))
|
24 |
+
for category in list(set(self.categories) - set(['main-event'])):
|
25 |
+
valid_values = self.df_labels[f"{category}"].unique().astype(str)
|
26 |
+
merged_df = merged_df[merged_df[f"{category}_pred"].astype(str).isin(valid_values)]
|
27 |
+
|
28 |
return merged_df
|
29 |
|
30 |
def compute_metrics(self):
|
31 |
"""Compute precision, recall, F1-score, accuracy, and balanced accuracy for each class and category."""
|
32 |
merged_df = self.merge_data()
|
33 |
+
categories = self.categories
|
34 |
+
|
35 |
results = []
|
36 |
+
|
37 |
for category in categories:
|
38 |
+
true_col = f"{category}_true"
|
39 |
+
pred_col = f"{category}_pred"
|
40 |
+
|
41 |
+
if true_col not in merged_df.columns or pred_col not in merged_df.columns:
|
42 |
+
print(f"Skipping {category} - missing columns")
|
43 |
+
continue
|
44 |
+
|
45 |
+
y_true = merged_df[true_col].astype(str)
|
46 |
+
y_pred = merged_df[pred_col].astype(str)
|
47 |
+
|
48 |
+
valid_labels = sorted(set(y_true) | set(y_pred))
|
49 |
+
|
50 |
+
valid_labels = [label for label in valid_labels if (y_true == label).sum() > 0]
|
51 |
+
|
52 |
+
if not valid_labels:
|
53 |
+
print(f"Skipping {category} - No valid labels found.")
|
54 |
+
continue
|
55 |
+
|
56 |
+
class_precisions = precision_score(y_true, y_pred, labels=valid_labels, average=None, zero_division=0)
|
57 |
+
class_recalls = recall_score(y_true, y_pred, labels=valid_labels, average=None, zero_division=0)
|
58 |
+
class_f1 = f1_score(y_true, y_pred, labels=valid_labels, average=None, zero_division=0)
|
59 |
+
|
60 |
+
overall_precision = precision_score(y_true, y_pred, labels=valid_labels, average='macro', zero_division=0)
|
61 |
+
overall_recall = recall_score(y_true, y_pred, labels=valid_labels, average='macro', zero_division=0)
|
62 |
+
overall_f1 = f1_score(y_true, y_pred, labels=valid_labels, average='macro', zero_division=0)
|
63 |
overall_accuracy = accuracy_score(y_true, y_pred)
|
64 |
overall_balanced_acc = balanced_accuracy_score(y_true, y_pred)
|
65 |
+
|
66 |
+
for i, label in enumerate(valid_labels):
|
67 |
results.append({
|
68 |
"Model": self.model_name,
|
69 |
"Category": category,
|
|
|
75 |
"Balanced Acc.": np.nan,
|
76 |
"Support": (y_true == label).sum()
|
77 |
})
|
78 |
+
|
79 |
results.append({
|
80 |
"Model": self.model_name,
|
81 |
"Category": category,
|
|
|
87 |
"Balanced Acc.": overall_balanced_acc,
|
88 |
"Support": len(y_true)
|
89 |
})
|
90 |
+
|
91 |
df_res = pd.DataFrame(results)
|
92 |
+
return df_res.loc[df_res['Support'] > 0].reset_index(drop=True)
|
93 |
|
94 |
def get_metrics_df(self):
|
95 |
"""Return the computed metrics DataFrame."""
|
new_values.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
results/.ipynb_checkpoints/Labels-checkpoint.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|