|
import pandas as pd |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, balanced_accuracy_score |
|
import warnings |
|
warnings.filterwarnings("ignore", category=UserWarning, message="y_pred contains classes not in y_true") |
|
sns.set_style("whitegrid") |
|
|
|
class ModelEvaluator: |
|
def __init__(self, df_labels, df_predictions, model_name, categories = ['main-event', 'location', 'zone', 'light-conditions', 'weather-conditions', 'vehicles-density']): |
|
""" |
|
Initialize the evaluator with ground truth labels and model predictions. |
|
""" |
|
self.df_labels = df_labels |
|
self.df_predictions = df_predictions |
|
self.model_name = model_name |
|
self.categories = categories |
|
self.metrics_df = self.compute_metrics() |
|
|
|
def merge_data(self): |
|
"""Merge ground truth labels with predictions based on 'id'.""" |
|
merged_df = pd.merge(self.df_labels, self.df_predictions, on='id', suffixes=('_true', '_pred')) |
|
for category in list(set(self.categories) - set(['main-event'])): |
|
valid_values = self.df_labels[f"{category}"].unique().astype(str) |
|
merged_df = merged_df[merged_df[f"{category}_pred"].astype(str).isin(valid_values)] |
|
|
|
return merged_df |
|
|
|
def compute_metrics(self): |
|
"""Compute precision, recall, F1-score, accuracy, and balanced accuracy for each class and category.""" |
|
merged_df = self.merge_data() |
|
categories = self.categories |
|
|
|
results = [] |
|
|
|
for category in categories: |
|
true_col = f"{category}_true" |
|
pred_col = f"{category}_pred" |
|
|
|
if true_col not in merged_df.columns or pred_col not in merged_df.columns: |
|
print(f"Skipping {category} - missing columns") |
|
continue |
|
|
|
filtered_df = merged_df[merged_df[true_col] != "unknown"] |
|
|
|
if filtered_df.empty: |
|
print(f"Skipping {category} - only 'unknown' values present.") |
|
continue |
|
|
|
y_true = filtered_df[true_col].astype(str) |
|
y_pred = filtered_df[pred_col].astype(str) |
|
|
|
valid_labels = sorted(set(y_true) | set(y_pred)) |
|
|
|
valid_labels = [label for label in valid_labels if (y_true == label).sum() > 0 and label != "unknown"] |
|
|
|
if not valid_labels: |
|
print(f"Skipping {category} - No valid labels found after filtering.") |
|
continue |
|
|
|
class_precisions = precision_score(y_true, y_pred, labels=valid_labels, average=None, zero_division=0) |
|
class_recalls = recall_score(y_true, y_pred, labels=valid_labels, average=None, zero_division=0) |
|
class_f1 = f1_score(y_true, y_pred, labels=valid_labels, average=None, zero_division=0) |
|
|
|
overall_precision = precision_score(y_true, y_pred, labels=valid_labels, average='macro', zero_division=0) |
|
overall_recall = recall_score(y_true, y_pred, labels=valid_labels, average='macro', zero_division=0) |
|
overall_f1 = f1_score(y_true, y_pred, labels=valid_labels, average='macro', zero_division=0) |
|
overall_accuracy = accuracy_score(y_true, y_pred) |
|
overall_balanced_acc = balanced_accuracy_score(y_true, y_pred) |
|
|
|
for i, label in enumerate(valid_labels): |
|
results.append({ |
|
"Model": self.model_name, |
|
"Category": category, |
|
"Class": label, |
|
"Precision": class_precisions[i], |
|
"Recall": class_recalls[i], |
|
"F1 Score": class_f1[i], |
|
"Accuracy": np.nan, |
|
"Balanced Acc.": np.nan, |
|
"Support": (y_true == label).sum() |
|
}) |
|
|
|
results.append({ |
|
"Model": self.model_name, |
|
"Category": category, |
|
"Class": f"Overall ({category})", |
|
"Precision": overall_precision, |
|
"Recall": overall_recall, |
|
"F1 Score": overall_f1, |
|
"Accuracy": overall_accuracy, |
|
"Balanced Acc.": overall_balanced_acc, |
|
"Support": len(y_true) |
|
}) |
|
|
|
df_res = pd.DataFrame(results) |
|
return df_res.loc[df_res['Support'] > 0].reset_index(drop=True) |
|
|
|
def get_metrics_df(self): |
|
"""Return the computed metrics DataFrame.""" |
|
return self.metrics_df |
|
|
|
|
|
class ModelComparison: |
|
def __init__(self, evaluators): |
|
""" |
|
Compare multiple models based on their evaluation results. |
|
|
|
:param evaluators: List of ModelEvaluator instances. |
|
""" |
|
self.evaluators = evaluators |
|
self.combined_df = self.aggregate_metrics() |
|
|
|
def aggregate_metrics(self): |
|
"""Merge evaluation metrics from multiple models into a single DataFrame.""" |
|
dfs = [evaluator.get_metrics_df() for evaluator in self.evaluators] |
|
return pd.concat(dfs, ignore_index=True) |
|
|
|
def plot_category_comparison(self, metric="F1 Score"): |
|
"""Compare models at the category level using a grouped bar chart with consistent styling.""" |
|
df = self.combined_df[self.combined_df['Class'].str.contains("Overall")] |
|
|
|
plt.figure(figsize=(12, 6)) |
|
colors = sns.color_palette("Set2", len(df["Model"].unique())) |
|
|
|
ax = sns.barplot( |
|
data=df, x="Category", y=metric, hue="Model", palette=colors, edgecolor="black", alpha=0.85 |
|
) |
|
|
|
plt.title(f"{metric} Comparison Across Categories", fontsize=14, fontweight="bold") |
|
plt.ylim(0, 1) |
|
plt.xticks(rotation=45, fontsize=12) |
|
plt.yticks(fontsize=12) |
|
plt.xlabel("Category", fontsize=12) |
|
plt.ylabel(metric, fontsize=12) |
|
plt.legend(title="Model", fontsize=11, loc="upper left") |
|
plt.grid(axis="y", linestyle="--", alpha=0.6) |
|
|
|
plt.tight_layout() |
|
plt.show() |
|
|
|
|
|
def plot_per_class_comparison(self, category, metric="F1 Score"): |
|
"""Compare models for a specific category across individual classes with a standardized design.""" |
|
df = self.combined_df[(self.combined_df["Category"] == category) & (~self.combined_df["Class"].str.contains("Overall"))] |
|
|
|
plt.figure(figsize=(12, 6)) |
|
colors = sns.color_palette("Set2", len(df["Model"].unique())) |
|
|
|
ax = sns.barplot( |
|
data=df, x="Class", y=metric, hue="Model", palette=colors, edgecolor="black", alpha=0.85 |
|
) |
|
|
|
plt.title(f"{metric} for {category} by Model", fontsize=14, fontweight="bold") |
|
plt.ylim(0, 1) |
|
plt.xticks(rotation=45, fontsize=12) |
|
plt.yticks(fontsize=12) |
|
plt.xlabel("Class", fontsize=12) |
|
plt.ylabel(metric, fontsize=12) |
|
plt.legend(title="Model", fontsize=11, loc="upper left") |
|
plt.grid(axis="y", linestyle="--", alpha=0.6) |
|
|
|
plt.tight_layout() |
|
plt.show() |
|
|
|
def plot_precision_recall_per_class(self, class_name=None): |
|
""" |
|
Creates a grouped bar chart per class, displaying precision & recall side by side for all models. |
|
Ensures a consistent design with plot_per_class_comparison and plot_category_comparison. |
|
|
|
:param class_name: (str) If provided, only this class will be plotted. If None, all classes will be plotted. |
|
""" |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
import numpy as np |
|
|
|
sns.set_style("whitegrid") |
|
|
|
|
|
if class_name: |
|
unique_classes = [class_name] |
|
else: |
|
unique_classes = self.combined_df["Class"].unique() |
|
|
|
models = self.combined_df["Model"].unique() |
|
num_models = len(models) |
|
|
|
bar_width = 0.35 |
|
spacing = 0 |
|
|
|
colors = sns.color_palette("Set2", num_models) |
|
|
|
for class_name in unique_classes: |
|
df_class = self.combined_df[self.combined_df["Class"] == class_name] |
|
|
|
if df_class.empty: |
|
print(f"No data available for class: {class_name}") |
|
continue |
|
|
|
plt.figure(figsize=(12, 6)) |
|
|
|
metrics = ["Precision", "Recall"] |
|
x_indices = np.arange(len(metrics)) |
|
|
|
for i, model in enumerate(models): |
|
df_model = df_class[df_class["Model"] == model] |
|
|
|
if df_model.empty: |
|
continue |
|
|
|
precision = df_model["Precision"].values[0] |
|
recall = df_model["Recall"].values[0] |
|
|
|
|
|
plt.bar( |
|
x_indices + (i * bar_width), |
|
[precision, recall], |
|
width=bar_width, |
|
label=model, |
|
color=colors[i], |
|
alpha=0.85, |
|
edgecolor="black" |
|
) |
|
|
|
plt.xlabel("Metric", fontsize=12) |
|
plt.ylabel("Score", fontsize=12) |
|
plt.title(f"Precision & Recall for Class: {class_name}", fontsize=14, fontweight="bold") |
|
|
|
|
|
plt.xticks(x_indices + ((bar_width * (num_models - 1)) / 2), metrics, fontsize=12) |
|
|
|
plt.ylim(0, 1) |
|
plt.legend(title="Model", fontsize=11, loc="upper left") |
|
plt.grid(axis="y", linestyle="--", alpha=0.6) |
|
|
|
plt.tight_layout() |
|
plt.show() |
|
|
|
def plot_recall_trends(self, selected_models=None): |
|
""" |
|
Plot recall trends per class across different models, sorted by recall values in descending order. |
|
|
|
:param selected_models: List of model names to include in the plot. If None, all models in the dataset will be used. |
|
""" |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
import numpy as np |
|
|
|
sns.set_style("whitegrid") |
|
|
|
|
|
if selected_models is None: |
|
selected_models = self.combined_df["Model"].unique().tolist() |
|
|
|
|
|
df_filtered = self.combined_df[self.combined_df["Model"].isin(selected_models)] |
|
df_filtered_no_overall = df_filtered[~df_filtered["Class"].str.contains("Overall")] |
|
|
|
|
|
df_sorted = df_filtered_no_overall.sort_values(by="Recall", ascending=False) |
|
|
|
plt.figure(figsize=(12, 6)) |
|
unique_classes = df_sorted["Class"].unique() |
|
|
|
|
|
colors = dict(zip(selected_models, sns.color_palette("Set2", len(selected_models)))) |
|
|
|
|
|
for class_name in unique_classes: |
|
class_data = df_sorted[df_sorted["Class"] == class_name] |
|
if len(class_data) > 1: |
|
plt.plot( |
|
class_data["Class"], class_data["Recall"], |
|
linestyle="-", alpha=0.5, color="gray", linewidth=1.5, zorder=1 |
|
) |
|
|
|
|
|
for model in selected_models: |
|
model_data = df_sorted[df_sorted["Model"] == model] |
|
plt.scatter( |
|
model_data["Class"], model_data["Recall"], |
|
label=model, color=colors[model], edgecolor="black", s=120, alpha=1.0, zorder=2 |
|
) |
|
|
|
plt.xlabel("Class", fontsize=12) |
|
plt.ylabel("Recall", fontsize=12) |
|
plt.xticks(rotation=45, ha="right", fontsize=12) |
|
plt.yticks(fontsize=12) |
|
plt.title("Recall per Class for Selected Models (Sorted by Recall)", fontsize=14, fontweight="bold") |
|
|
|
|
|
plt.legend(title="Model", fontsize=11, loc="upper right", bbox_to_anchor=(1.15, 1)) |
|
|
|
plt.grid(axis="y", linestyle="--", alpha=0.6) |
|
|
|
plt.tight_layout() |
|
plt.show() |
|
|
|
def plot_metric(self, metric_name, figsize=(10, None), bar_height=0.8, palette="Set2", bar_spacing=0): |
|
""" |
|
Creates a hierarchical visualization of metrics with category headers, |
|
sorted by category-average descending. Ensures slight separation between model bars. |
|
""" |
|
colors = sns.color_palette(palette, len(self.evaluators)) |
|
models = list(self.combined_df["Model"].unique()) |
|
|
|
df = self.combined_df.copy() |
|
df = df.drop_duplicates(subset=['Category', 'Class', 'Model', metric_name]) |
|
|
|
|
|
avg_support = df.groupby(['Category', 'Class'])['Support'].mean().round().astype(int) |
|
|
|
|
|
def safe_get_value(model, category, class_name): |
|
mask = ( |
|
(df['Model'] == model) & |
|
(df['Category'] == category) & |
|
(df['Class'] == class_name) |
|
) |
|
values = df.loc[mask, metric_name] |
|
return values.iloc[0] if not values.empty else np.nan |
|
|
|
|
|
df_no_global = df[df['Category'] != 'Global'] |
|
cat_avgs = df_no_global.groupby('Category', observed=False)[metric_name].mean() |
|
cat_avgs = cat_avgs.sort_values(ascending=False) |
|
categories_ordered = list(cat_avgs.index) |
|
|
|
if 'Global' in df['Category'].unique(): |
|
categories_ordered.append('Global') |
|
|
|
plot_data = [] |
|
yticks = [] |
|
ylabels = [] |
|
y_pos = 0 |
|
category_positions = {} |
|
|
|
|
|
for category in categories_ordered: |
|
if category == 'Global': |
|
continue |
|
|
|
category_data = df[df['Category'] == category] |
|
overall_class_name = f"Overall ({category})" |
|
mask_overall = category_data['Class'] == overall_class_name |
|
category_data_overall = category_data[mask_overall] |
|
category_data_regular = category_data[~mask_overall] |
|
|
|
if not category_data_regular.empty: |
|
class_means = category_data_regular.groupby('Class')[metric_name].mean() |
|
class_means = class_means.sort_values(ascending=False) |
|
sorted_regular_classes = list(class_means.index) |
|
else: |
|
sorted_regular_classes = [] |
|
|
|
|
|
category_start = y_pos |
|
yticks.append(y_pos) |
|
ylabels.append(category.upper()) |
|
y_pos += 1 |
|
|
|
|
|
for class_name in sorted_regular_classes: |
|
values = {model: safe_get_value(model, category, class_name) for model in models} |
|
if any(not np.isnan(v) for v in values.values()): |
|
plot_data.append({ |
|
'category': category, |
|
'label': class_name, |
|
'y_pos': y_pos, |
|
'values': values, |
|
'is_category': False |
|
}) |
|
support = avg_support.get((category, class_name), 0) |
|
yticks.append(y_pos) |
|
ylabels.append(f" {class_name} (n={support:,})") |
|
y_pos += 1 |
|
|
|
|
|
if not category_data_overall.empty: |
|
values = {model: safe_get_value(model, category, overall_class_name) for model in models} |
|
if any(not np.isnan(v) for v in values.values()): |
|
plot_data.append({ |
|
'category': category, |
|
'label': overall_class_name, |
|
'y_pos': y_pos, |
|
'values': values, |
|
'is_category': False |
|
}) |
|
support = avg_support.get((category, overall_class_name), 0) |
|
yticks.append(y_pos) |
|
ylabels.append(f" {overall_class_name} (n={support:,})") |
|
y_pos += 1 |
|
|
|
category_positions[category] = { |
|
'start': category_start, |
|
'end': y_pos - 1 |
|
} |
|
|
|
y_pos += 0.5 |
|
|
|
|
|
total_items = len(plot_data) + len(categories_ordered) |
|
dynamic_height = max(6, total_items * 0.4) |
|
if figsize[1] is None: |
|
figsize = (figsize[0], dynamic_height) |
|
|
|
|
|
bar_width = bar_height / len(models) |
|
|
|
fig, ax = plt.subplots(figsize=figsize) |
|
|
|
for category in categories_ordered: |
|
if category == 'Global': |
|
continue |
|
cat_start = category_positions[category]['start'] - 0.4 |
|
cat_end = category_positions[category]['end'] + 0.4 |
|
ax.axhspan(cat_start, cat_end, color='lightgray', alpha=0.2, zorder=0) |
|
|
|
for i, (model, color) in enumerate(zip(models, colors)): |
|
positions = [] |
|
values = [] |
|
for item in plot_data: |
|
if not item.get('is_category', False): |
|
positions.append(item['y_pos'] + (i - len(models)/2) * bar_width) |
|
values.append(item['values'].get(model, np.nan)) |
|
|
|
ax.barh( |
|
positions, values, height=bar_width, |
|
label=model, color=color, alpha=0.85, edgecolor="black" |
|
) |
|
|
|
|
|
ax.set_title(f'{metric_name} Comparison Across Models', fontsize=16, fontweight='bold', pad=20) |
|
|
|
|
|
ax.set_yticks(yticks) |
|
ax.set_yticklabels(ylabels, fontsize=10) |
|
ax.set_xlabel(metric_name, fontsize=12) |
|
ax.grid(True, axis='x', linestyle="--", alpha=0.7) |
|
|
|
|
|
ax.invert_yaxis() |
|
plt.legend(title="Model", bbox_to_anchor=(1.05, 1), loc='upper left') |
|
|
|
|
|
plt.subplots_adjust(left=0.25, right=0.8, top=0.95, bottom=0.1) |
|
plt.tight_layout() |
|
|
|
return fig |
|
|
|
def plot_precision_recall_for_category(self, category, palette="Set2"): |
|
""" |
|
Creates a modernized Precision-Recall scatter plot for each class within a given category. |
|
""" |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
import math |
|
import numpy as np |
|
|
|
|
|
plt.rcParams['font.size'] = 12 |
|
|
|
|
|
df = self.combined_df[self.combined_df["Category"] == category].copy() |
|
if df.empty: |
|
print(f"No data available for category: {category}") |
|
return None |
|
|
|
|
|
class_data = df[~df["Class"].str.contains("Overall")] |
|
|
|
|
|
models = df["Model"].unique() |
|
colors = dict(zip(models, sns.color_palette(palette, len(models)))) |
|
classes = sorted(class_data["Class"].unique()) |
|
|
|
|
|
cols = 2 |
|
rows = math.ceil(len(classes) / cols) |
|
|
|
|
|
fig, axes = plt.subplots(rows, cols, figsize=(16, rows * 6)) |
|
|
|
|
|
fig.suptitle(f'Precision-Recall Analysis for {category}', |
|
fontsize=20, fontweight='bold', y=1.02) |
|
|
|
|
|
for i, class_name in enumerate(classes): |
|
row, col = divmod(i, cols) |
|
ax = axes[row, col] if rows > 1 else axes[col] |
|
|
|
|
|
class_subset = class_data[class_data["Class"] == class_name] |
|
sns.scatterplot( |
|
data=class_subset, |
|
x="Precision", |
|
y="Recall", |
|
hue="Model", |
|
palette=colors, |
|
ax=ax, |
|
s=200, |
|
alpha=0.85, |
|
edgecolor="black" |
|
) |
|
|
|
|
|
for idx, row in class_subset.iterrows(): |
|
ax.annotate( |
|
row["Model"], |
|
(row["Precision"], row["Recall"]), |
|
xytext=(8, 8), textcoords='offset points', |
|
bbox=dict(facecolor='white', alpha=0.7), |
|
arrowprops=dict( |
|
arrowstyle='->', |
|
connectionstyle='arc3,rad=0.2', |
|
color='black' |
|
) |
|
) |
|
|
|
ax.set_title(f"Class: {class_name}", fontsize=16, fontweight="bold", pad=20) |
|
ax.set_xlim(-0.05, 1.05) |
|
ax.set_ylim(-0.05, 1.05) |
|
ax.grid(True, linestyle="--", alpha=0.5) |
|
ax.set_aspect("equal", adjustable="box") |
|
|
|
|
|
ax.get_legend().remove() |
|
|
|
|
|
ax.set_xlabel("Precision", fontsize=14) |
|
ax.set_ylabel("Recall", fontsize=14) |
|
|
|
|
|
for j in range(i + 1, rows * cols): |
|
fig.delaxes(axes.flatten()[j]) |
|
|
|
|
|
fig.subplots_adjust(top=0.92, bottom=0.08, left=0.08, right=0.92, hspace=0.35, wspace=0.3) |
|
|
|
return fig |
|
|
|
def plot_normalized_radar_chart(self, metric_name="F1 Score", exclude_categories=None, figsize=(12, 10), palette="Set2"): |
|
""" |
|
Create a normalized radar chart comparing performance across different categories. |
|
Each vertex is normalized independently based on its maximum value. |
|
""" |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import seaborn as sns |
|
from matplotlib.patches import Circle |
|
|
|
sns.set_style("whitegrid") |
|
|
|
|
|
df = self.combined_df.copy() |
|
if exclude_categories: |
|
df = df[~df["Category"].isin(exclude_categories)] |
|
|
|
|
|
categories = sorted(df["Category"].unique()) |
|
models = sorted(df["Model"].unique()) |
|
|
|
|
|
colors = dict(zip(models, sns.color_palette(palette, len(models)))) |
|
|
|
|
|
fig = plt.figure(figsize=figsize) |
|
ax = plt.subplot(111, polar=True) |
|
|
|
|
|
for radius in np.linspace(0, 1, 5): |
|
circle = Circle((0, 0), radius, transform=ax.transData._b, |
|
fill=True, color='gray', alpha=0.03) |
|
ax.add_artist(circle) |
|
|
|
|
|
N = len(categories) |
|
angles = np.linspace(0, 2 * np.pi, N, endpoint=False).tolist() |
|
angles += angles[:1] |
|
|
|
|
|
df_overall = df[df["Class"].str.contains("Overall")] |
|
max_values = df_overall.groupby("Category")[metric_name].max().to_dict() |
|
|
|
|
|
normalized_values = {} |
|
|
|
|
|
for model in models: |
|
values = [] |
|
for cat in categories: |
|
val = df_overall[(df_overall["Model"] == model) & |
|
(df_overall["Category"] == cat)][metric_name].values |
|
val = val[0] if len(val) > 0 else 0 |
|
norm_val = val / max_values[cat] if max_values[cat] > 0 else 0 |
|
values.append(norm_val) |
|
normalized_values[model] = values + [values[0]] |
|
|
|
|
|
for model, values in normalized_values.items(): |
|
color = colors[model] |
|
|
|
|
|
ax.fill(angles, values, color=color, alpha=0.15, |
|
edgecolor=color, linewidth=0.5) |
|
|
|
|
|
ax.plot(angles, values, |
|
linewidth=2.5, linestyle='solid', |
|
label=model, color=color, alpha=0.85, |
|
zorder=5) |
|
|
|
|
|
ax.set_theta_offset(np.pi / 2) |
|
ax.set_theta_direction(-1) |
|
ax.set_yticklabels([]) |
|
|
|
|
|
ax.set_thetagrids(np.degrees(angles[:-1]), categories, |
|
fontsize=12, fontweight="bold") |
|
|
|
|
|
for i, (category, angle) in enumerate(zip(categories, angles[:-1])): |
|
max_val = max_values[category] |
|
scales = np.linspace(0, max_val, 5) |
|
|
|
for j, scale in enumerate(scales): |
|
radius = j/4 |
|
|
|
|
|
if radius > 0: |
|
ha = 'center' |
|
va = 'center' |
|
|
|
ax.text(angle, radius, f'{scale:.2f}', |
|
ha=ha, va=va, |
|
color='gray', fontsize=9, fontweight='bold') |
|
|
|
|
|
ax.text(0, 0, '0.00', |
|
ha='center', va='center', |
|
color='gray', fontsize=9, fontweight='bold') |
|
|
|
|
|
ax.grid(True, color='gray', alpha=0.3, linewidth=0.5) |
|
ax.yaxis.grid(True, color='gray', alpha=0.3, linewidth=0.5) |
|
ax.set_rticks(np.linspace(0, 1, 5)) |
|
|
|
|
|
ax.set_facecolor('#f8f9fa') |
|
|
|
|
|
plt.title(f'Model Performance Radar Chart - {metric_name}', |
|
pad=20, fontsize=14, fontweight="bold") |
|
plt.legend(title="Model", fontsize=11, |
|
loc="upper right", bbox_to_anchor=(1.3, 1)) |
|
|
|
|
|
ax.set_aspect('equal') |
|
|
|
plt.tight_layout() |
|
return fig |
|
|
|
def transform_to_leaderboard(self): |
|
|
|
df = self.combined_df.copy() |
|
df = df[~df['Class'].str.contains('Overall', na=False)] |
|
|
|
|
|
pivoted_df = df.pivot_table( |
|
index='Model', |
|
columns='Category', |
|
values=['Precision', 'Recall', 'F1 Score', 'Accuracy'], |
|
aggfunc='mean' |
|
) |
|
|
|
|
|
pivoted_df.columns = ['_'.join(col).strip() for col in pivoted_df.columns.values] |
|
|
|
|
|
pivoted_df['Average F1 Score'] = pivoted_df.filter(like='F1 Score').mean(axis=1) |
|
|
|
|
|
pivoted_df = pivoted_df.reset_index() |
|
cols = ['Model', 'Average F1 Score'] + [col for col in pivoted_df.columns if col not in ['Model', 'Average F1 Score']] |
|
pivoted_df = pivoted_df[cols] |
|
|
|
|
|
pivoted_df = pivoted_df.sort_values(by='Average F1 Score', ascending=False).reset_index(drop=True) |
|
pivoted_df.insert(0, 'Rank', range(1, len(pivoted_df) + 1)) |
|
|
|
return pivoted_df |