from __future__ import annotations from pathlib import Path from typing import Any, ClassVar, Dict, Optional import pandas as pd class EvalManager: """ Stores evaluation results for various datasets and metrics. The data is stored in a pandas DataFrame with a MultiIndex for columns. The first level of the MultiIndex is the dataset name and the second level is the metric name. Usage: >>> evaluator = Evaluator.from_dirpath("data/evaluation_results/") >>> print(evaluator.data) """ model_col: ClassVar[str] = "model" dataset_col: ClassVar[str] = "dataset" metric_col: ClassVar[str] = "metric" def __init__(self, data: Optional[pd.DataFrame] = None): if data is None: data = pd.DataFrame() self._df = data self._df.index = self._df.index.rename(EvalManager.model_col) def __str__(self) -> str: return self.data.__str__() @staticmethod def from_dict(data: Dict[Any, Any]) -> EvalManager: """ Load evaluation results from a dictionary. Expected format: { "model1": pd.read_json(path1).T.stack(), "model2": pd.read_json(path2).T.stack(), } """ df = pd.DataFrame.from_dict(data, orient="index") return EvalManager(df) @staticmethod def from_json(path: str | Path) -> EvalManager: datapath = Path(path) if not datapath.is_file(): raise FileNotFoundError(f"{path} is not a file") data = {} data[datapath.stem] = pd.read_json(datapath).T.stack() # pylint: disable=no-member return EvalManager.from_dict(data) @staticmethod def from_dir(datadir: str | Path) -> EvalManager: datadir_ = Path(datadir) if not datadir_.is_dir(): raise FileNotFoundError(f"{datadir} is not a directory") eval_files = list(datadir_.glob("*.json")) data = {} for filepath in eval_files: data[filepath.stem] = pd.read_json(filepath).T.stack() # pylint: disable=no-member return EvalManager.from_dict(data) @staticmethod def from_csv(path: str | Path) -> EvalManager: """ Load evaluation results from a CSV file. """ try: df = pd.read_csv(path, index_col=0, header=[0, 1]) return EvalManager(df) except Exception as e: print(f"Error loading {path}: {e}") raise e @property def data(self) -> pd.DataFrame: """ Returns the evaluation results as a pandas DataFrame. """ return self._df.copy() @property def models(self) -> pd.Index: """ Returns the models for which there are evaluation results. """ return self.data.index @property def datasets(self) -> pd.Index: """ Returns the datasets for which there are evaluation results. """ return self.data.columns.get_level_values(0).unique() @property def metrics(self) -> pd.Index: """ Returns the metrics for which there are evaluation results. """ return self.data.columns.get_level_values(1) @staticmethod def melt(df: pd.DataFrame) -> pd.DataFrame: """ Melt a suitable DataFrame (e.g. returned by `get_df_for_dataset` and `get_df_for_metric`) into a 'long' format. """ return df.T.reset_index(names=[EvalManager.dataset_col, EvalManager.metric_col]).melt( id_vars=[EvalManager.dataset_col, EvalManager.metric_col], var_name=EvalManager.model_col, value_name="score", ) @property def melted(self) -> pd.DataFrame: """ Returns the evaluation results as a 'melted' DataFrame. Useful for plotting with seaborn. """ return EvalManager.melt(self.data) def get_df_for_model(self, model: str) -> pd.DataFrame: if model not in self.data.index: raise ValueError(f"Model {model} not found in the evaluation results") return self.data.loc[[model], :] # type: ignore def get_df_for_dataset(self, dataset: str) -> pd.DataFrame: if dataset not in self.datasets: raise ValueError(f"Dataset {dataset} not found in the evaluation results") return self.data.loc[:, (dataset, slice(None))] # type: ignore def get_df_for_metric(self, metric: str) -> pd.DataFrame: if metric not in self.metrics: raise ValueError(f"Metric {metric} not found in the evaluation results") return self.data.loc[:, (slice(None), metric)] # type: ignore def sort_by_dataset(self, ascending: bool = True) -> EvalManager: """ Sort the evaluation results by dataset name. """ df = self.data.T.sort_index(level=0, ascending=ascending).T return EvalManager(df) def sort_by_metric(self, ascending: bool = True) -> EvalManager: """ Sort the evaluation results by metric name. """ df = self.data.T.sort_index(level=1, ascending=ascending).T return EvalManager(df) def sort_columns(self, ascending: bool = True) -> EvalManager: """ Sort the evaluation results by dataset name and then by metric name. """ df = self.data.T.sort_index(level=[0, 1], ascending=ascending).T return EvalManager(df) def to_csv(self, path: str | Path): """ Save the evaluation results to a CSV file. Using `Evaluation.from_csv(path_to_saved_csv)` will load the evaluation results back into memory. """ savepath = Path(path) savepath.parent.mkdir(parents=True, exist_ok=True) self.data.to_csv(savepath)