Roni Goldshmidt commited on
Commit
7ada547
·
1 Parent(s): a3cc365

Initial leaderboard setup

Browse files
.ipynb_checkpoints/comparison-checkpoint.py CHANGED
@@ -8,44 +8,62 @@ warnings.filterwarnings("ignore", category=UserWarning, message="y_pred contains
8
  sns.set_style("whitegrid")
9
 
10
  class ModelEvaluator:
11
- def __init__(self, df_labels, df_predictions, model_name):
12
  """
13
  Initialize the evaluator with ground truth labels and model predictions.
14
  """
15
  self.df_labels = df_labels
16
  self.df_predictions = df_predictions
17
  self.model_name = model_name
 
18
  self.metrics_df = self.compute_metrics()
19
-
20
  def merge_data(self):
21
  """Merge ground truth labels with predictions based on 'id'."""
22
  merged_df = pd.merge(self.df_labels, self.df_predictions, on='id', suffixes=('_true', '_pred'))
 
 
 
 
23
  return merged_df
24
 
25
  def compute_metrics(self):
26
  """Compute precision, recall, F1-score, accuracy, and balanced accuracy for each class and category."""
27
  merged_df = self.merge_data()
28
- categories = ['main-event', 'location', 'zone', 'light-conditions', 'weather-conditions', 'vehicles-density']
29
-
30
  results = []
31
-
32
  for category in categories:
33
- y_true = merged_df[f"{category}_true"].astype(str)
34
- y_pred = merged_df[f"{category}_pred"].astype(str)
35
-
36
- labels = sorted(set(y_true) | set(y_pred))
37
-
38
- class_precisions = precision_score(y_true, y_pred, labels=labels, average=None, zero_division=0)
39
- class_recalls = recall_score(y_true, y_pred, labels=labels, average=None, zero_division=0)
40
- class_f1 = f1_score(y_true, y_pred, labels=labels, average=None, zero_division=0)
41
-
42
- overall_precision = precision_score(y_true, y_pred, average='macro', zero_division=0)
43
- overall_recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
44
- overall_f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  overall_accuracy = accuracy_score(y_true, y_pred)
46
  overall_balanced_acc = balanced_accuracy_score(y_true, y_pred)
47
-
48
- for i, label in enumerate(labels):
49
  results.append({
50
  "Model": self.model_name,
51
  "Category": category,
@@ -57,7 +75,7 @@ class ModelEvaluator:
57
  "Balanced Acc.": np.nan,
58
  "Support": (y_true == label).sum()
59
  })
60
-
61
  results.append({
62
  "Model": self.model_name,
63
  "Category": category,
@@ -69,9 +87,9 @@ class ModelEvaluator:
69
  "Balanced Acc.": overall_balanced_acc,
70
  "Support": len(y_true)
71
  })
72
-
73
  df_res = pd.DataFrame(results)
74
- return df_res.loc[df_res['Support']>0].reset_index(drop=True)
75
 
76
  def get_metrics_df(self):
77
  """Return the computed metrics DataFrame."""
 
8
  sns.set_style("whitegrid")
9
 
10
  class ModelEvaluator:
11
+ def __init__(self, df_labels, df_predictions, model_name, categories = ['main-event', 'location', 'zone', 'light-conditions', 'weather-conditions', 'vehicles-density']):
12
  """
13
  Initialize the evaluator with ground truth labels and model predictions.
14
  """
15
  self.df_labels = df_labels
16
  self.df_predictions = df_predictions
17
  self.model_name = model_name
18
+ self.categories = categories
19
  self.metrics_df = self.compute_metrics()
20
+
21
  def merge_data(self):
22
  """Merge ground truth labels with predictions based on 'id'."""
23
  merged_df = pd.merge(self.df_labels, self.df_predictions, on='id', suffixes=('_true', '_pred'))
24
+ for category in list(set(self.categories) - set(['main-event'])):
25
+ valid_values = self.df_labels[f"{category}"].unique().astype(str)
26
+ merged_df = merged_df[merged_df[f"{category}_pred"].astype(str).isin(valid_values)]
27
+
28
  return merged_df
29
 
30
  def compute_metrics(self):
31
  """Compute precision, recall, F1-score, accuracy, and balanced accuracy for each class and category."""
32
  merged_df = self.merge_data()
33
+ categories = self.categories
34
+
35
  results = []
36
+
37
  for category in categories:
38
+ true_col = f"{category}_true"
39
+ pred_col = f"{category}_pred"
40
+
41
+ if true_col not in merged_df.columns or pred_col not in merged_df.columns:
42
+ print(f"Skipping {category} - missing columns")
43
+ continue
44
+
45
+ y_true = merged_df[true_col].astype(str)
46
+ y_pred = merged_df[pred_col].astype(str)
47
+
48
+ valid_labels = sorted(set(y_true) | set(y_pred))
49
+
50
+ valid_labels = [label for label in valid_labels if (y_true == label).sum() > 0]
51
+
52
+ if not valid_labels:
53
+ print(f"Skipping {category} - No valid labels found.")
54
+ continue
55
+
56
+ class_precisions = precision_score(y_true, y_pred, labels=valid_labels, average=None, zero_division=0)
57
+ class_recalls = recall_score(y_true, y_pred, labels=valid_labels, average=None, zero_division=0)
58
+ class_f1 = f1_score(y_true, y_pred, labels=valid_labels, average=None, zero_division=0)
59
+
60
+ overall_precision = precision_score(y_true, y_pred, labels=valid_labels, average='macro', zero_division=0)
61
+ overall_recall = recall_score(y_true, y_pred, labels=valid_labels, average='macro', zero_division=0)
62
+ overall_f1 = f1_score(y_true, y_pred, labels=valid_labels, average='macro', zero_division=0)
63
  overall_accuracy = accuracy_score(y_true, y_pred)
64
  overall_balanced_acc = balanced_accuracy_score(y_true, y_pred)
65
+
66
+ for i, label in enumerate(valid_labels):
67
  results.append({
68
  "Model": self.model_name,
69
  "Category": category,
 
75
  "Balanced Acc.": np.nan,
76
  "Support": (y_true == label).sum()
77
  })
78
+
79
  results.append({
80
  "Model": self.model_name,
81
  "Category": category,
 
87
  "Balanced Acc.": overall_balanced_acc,
88
  "Support": len(y_true)
89
  })
90
+
91
  df_res = pd.DataFrame(results)
92
+ return df_res.loc[df_res['Support'] > 0].reset_index(drop=True)
93
 
94
  def get_metrics_df(self):
95
  """Return the computed metrics DataFrame."""
.ipynb_checkpoints/new_values-checkpoint.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
__pycache__/comparison.cpython-310.pyc ADDED
Binary file (18.1 kB). View file
 
comparison.py CHANGED
@@ -8,44 +8,62 @@ warnings.filterwarnings("ignore", category=UserWarning, message="y_pred contains
8
  sns.set_style("whitegrid")
9
 
10
  class ModelEvaluator:
11
- def __init__(self, df_labels, df_predictions, model_name):
12
  """
13
  Initialize the evaluator with ground truth labels and model predictions.
14
  """
15
  self.df_labels = df_labels
16
  self.df_predictions = df_predictions
17
  self.model_name = model_name
 
18
  self.metrics_df = self.compute_metrics()
19
-
20
  def merge_data(self):
21
  """Merge ground truth labels with predictions based on 'id'."""
22
  merged_df = pd.merge(self.df_labels, self.df_predictions, on='id', suffixes=('_true', '_pred'))
 
 
 
 
23
  return merged_df
24
 
25
  def compute_metrics(self):
26
  """Compute precision, recall, F1-score, accuracy, and balanced accuracy for each class and category."""
27
  merged_df = self.merge_data()
28
- categories = ['main-event', 'location', 'zone', 'light-conditions', 'weather-conditions', 'vehicles-density']
29
-
30
  results = []
31
-
32
  for category in categories:
33
- y_true = merged_df[f"{category}_true"].astype(str)
34
- y_pred = merged_df[f"{category}_pred"].astype(str)
35
-
36
- labels = sorted(set(y_true) | set(y_pred))
37
-
38
- class_precisions = precision_score(y_true, y_pred, labels=labels, average=None, zero_division=0)
39
- class_recalls = recall_score(y_true, y_pred, labels=labels, average=None, zero_division=0)
40
- class_f1 = f1_score(y_true, y_pred, labels=labels, average=None, zero_division=0)
41
-
42
- overall_precision = precision_score(y_true, y_pred, average='macro', zero_division=0)
43
- overall_recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
44
- overall_f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  overall_accuracy = accuracy_score(y_true, y_pred)
46
  overall_balanced_acc = balanced_accuracy_score(y_true, y_pred)
47
-
48
- for i, label in enumerate(labels):
49
  results.append({
50
  "Model": self.model_name,
51
  "Category": category,
@@ -57,7 +75,7 @@ class ModelEvaluator:
57
  "Balanced Acc.": np.nan,
58
  "Support": (y_true == label).sum()
59
  })
60
-
61
  results.append({
62
  "Model": self.model_name,
63
  "Category": category,
@@ -69,9 +87,9 @@ class ModelEvaluator:
69
  "Balanced Acc.": overall_balanced_acc,
70
  "Support": len(y_true)
71
  })
72
-
73
  df_res = pd.DataFrame(results)
74
- return df_res.loc[df_res['Support']>0].reset_index(drop=True)
75
 
76
  def get_metrics_df(self):
77
  """Return the computed metrics DataFrame."""
 
8
  sns.set_style("whitegrid")
9
 
10
  class ModelEvaluator:
11
+ def __init__(self, df_labels, df_predictions, model_name, categories = ['main-event', 'location', 'zone', 'light-conditions', 'weather-conditions', 'vehicles-density']):
12
  """
13
  Initialize the evaluator with ground truth labels and model predictions.
14
  """
15
  self.df_labels = df_labels
16
  self.df_predictions = df_predictions
17
  self.model_name = model_name
18
+ self.categories = categories
19
  self.metrics_df = self.compute_metrics()
20
+
21
  def merge_data(self):
22
  """Merge ground truth labels with predictions based on 'id'."""
23
  merged_df = pd.merge(self.df_labels, self.df_predictions, on='id', suffixes=('_true', '_pred'))
24
+ for category in list(set(self.categories) - set(['main-event'])):
25
+ valid_values = self.df_labels[f"{category}"].unique().astype(str)
26
+ merged_df = merged_df[merged_df[f"{category}_pred"].astype(str).isin(valid_values)]
27
+
28
  return merged_df
29
 
30
  def compute_metrics(self):
31
  """Compute precision, recall, F1-score, accuracy, and balanced accuracy for each class and category."""
32
  merged_df = self.merge_data()
33
+ categories = self.categories
34
+
35
  results = []
36
+
37
  for category in categories:
38
+ true_col = f"{category}_true"
39
+ pred_col = f"{category}_pred"
40
+
41
+ if true_col not in merged_df.columns or pred_col not in merged_df.columns:
42
+ print(f"Skipping {category} - missing columns")
43
+ continue
44
+
45
+ y_true = merged_df[true_col].astype(str)
46
+ y_pred = merged_df[pred_col].astype(str)
47
+
48
+ valid_labels = sorted(set(y_true) | set(y_pred))
49
+
50
+ valid_labels = [label for label in valid_labels if (y_true == label).sum() > 0]
51
+
52
+ if not valid_labels:
53
+ print(f"Skipping {category} - No valid labels found.")
54
+ continue
55
+
56
+ class_precisions = precision_score(y_true, y_pred, labels=valid_labels, average=None, zero_division=0)
57
+ class_recalls = recall_score(y_true, y_pred, labels=valid_labels, average=None, zero_division=0)
58
+ class_f1 = f1_score(y_true, y_pred, labels=valid_labels, average=None, zero_division=0)
59
+
60
+ overall_precision = precision_score(y_true, y_pred, labels=valid_labels, average='macro', zero_division=0)
61
+ overall_recall = recall_score(y_true, y_pred, labels=valid_labels, average='macro', zero_division=0)
62
+ overall_f1 = f1_score(y_true, y_pred, labels=valid_labels, average='macro', zero_division=0)
63
  overall_accuracy = accuracy_score(y_true, y_pred)
64
  overall_balanced_acc = balanced_accuracy_score(y_true, y_pred)
65
+
66
+ for i, label in enumerate(valid_labels):
67
  results.append({
68
  "Model": self.model_name,
69
  "Category": category,
 
75
  "Balanced Acc.": np.nan,
76
  "Support": (y_true == label).sum()
77
  })
78
+
79
  results.append({
80
  "Model": self.model_name,
81
  "Category": category,
 
87
  "Balanced Acc.": overall_balanced_acc,
88
  "Support": len(y_true)
89
  })
90
+
91
  df_res = pd.DataFrame(results)
92
+ return df_res.loc[df_res['Support'] > 0].reset_index(drop=True)
93
 
94
  def get_metrics_df(self):
95
  """Return the computed metrics DataFrame."""
new_values.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
results/.ipynb_checkpoints/Labels-checkpoint.csv ADDED
The diff for this file is too large to render. See raw diff