Spaces:
Running
Running
from __future__ import annotations | |
from pathlib import Path | |
from typing import Any, ClassVar, Dict, Optional | |
import pandas as pd | |
class EvalManager: | |
""" | |
Stores evaluation results for various datasets and metrics. | |
The data is stored in a pandas DataFrame with a MultiIndex for columns. | |
The first level of the MultiIndex is the dataset name and the second level is the metric name. | |
Usage: | |
>>> evaluator = Evaluator.from_dirpath("data/evaluation_results/") | |
>>> print(evaluator.data) | |
""" | |
model_col: ClassVar[str] = "model" | |
dataset_col: ClassVar[str] = "dataset" | |
metric_col: ClassVar[str] = "metric" | |
def __init__(self, data: Optional[pd.DataFrame] = None): | |
if data is None: | |
data = pd.DataFrame() | |
self._df = data | |
self._df.index = self._df.index.rename(EvalManager.model_col) | |
def __str__(self) -> str: | |
return self.data.__str__() | |
def from_dict(data: Dict[Any, Any]) -> EvalManager: | |
""" | |
Load evaluation results from a dictionary. | |
Expected format: | |
{ | |
"model1": pd.read_json(path1).T.stack(), | |
"model2": pd.read_json(path2).T.stack(), | |
} | |
""" | |
df = pd.DataFrame.from_dict(data, orient="index") | |
return EvalManager(df) | |
def from_json(path: str | Path) -> EvalManager: | |
datapath = Path(path) | |
if not datapath.is_file(): | |
raise FileNotFoundError(f"{path} is not a file") | |
data = {} | |
data[datapath.stem] = pd.read_json(datapath).T.stack() # pylint: disable=no-member | |
return EvalManager.from_dict(data) | |
def from_dir(datadir: str | Path) -> EvalManager: | |
datadir_ = Path(datadir) | |
if not datadir_.is_dir(): | |
raise FileNotFoundError(f"{datadir} is not a directory") | |
eval_files = list(datadir_.glob("*.json")) | |
data = {} | |
for filepath in eval_files: | |
data[filepath.stem] = pd.read_json(filepath).T.stack() # pylint: disable=no-member | |
return EvalManager.from_dict(data) | |
def from_csv(path: str | Path) -> EvalManager: | |
""" | |
Load evaluation results from a CSV file. | |
""" | |
try: | |
df = pd.read_csv(path, index_col=0, header=[0, 1]) | |
return EvalManager(df) | |
except Exception as e: | |
print(f"Error loading {path}: {e}") | |
raise e | |
def data(self) -> pd.DataFrame: | |
""" | |
Returns the evaluation results as a pandas DataFrame. | |
""" | |
return self._df.copy() | |
def models(self) -> pd.Index: | |
""" | |
Returns the models for which there are evaluation results. | |
""" | |
return self.data.index | |
def datasets(self) -> pd.Index: | |
""" | |
Returns the datasets for which there are evaluation results. | |
""" | |
return self.data.columns.get_level_values(0).unique() | |
def metrics(self) -> pd.Index: | |
""" | |
Returns the metrics for which there are evaluation results. | |
""" | |
return self.data.columns.get_level_values(1) | |
def melt(df: pd.DataFrame) -> pd.DataFrame: | |
""" | |
Melt a suitable DataFrame (e.g. returned by `get_df_for_dataset` and | |
`get_df_for_metric`) into a 'long' format. | |
""" | |
return df.T.reset_index(names=[EvalManager.dataset_col, EvalManager.metric_col]).melt( | |
id_vars=[EvalManager.dataset_col, EvalManager.metric_col], | |
var_name=EvalManager.model_col, | |
value_name="score", | |
) | |
def melted(self) -> pd.DataFrame: | |
""" | |
Returns the evaluation results as a 'melted' DataFrame. | |
Useful for plotting with seaborn. | |
""" | |
return EvalManager.melt(self.data) | |
def get_df_for_model(self, model: str) -> pd.DataFrame: | |
if model not in self.data.index: | |
raise ValueError(f"Model {model} not found in the evaluation results") | |
return self.data.loc[[model], :] # type: ignore | |
def get_df_for_dataset(self, dataset: str) -> pd.DataFrame: | |
if dataset not in self.datasets: | |
raise ValueError(f"Dataset {dataset} not found in the evaluation results") | |
return self.data.loc[:, (dataset, slice(None))] # type: ignore | |
def get_df_for_metric(self, metric: str) -> pd.DataFrame: | |
if metric not in self.metrics: | |
raise ValueError(f"Metric {metric} not found in the evaluation results") | |
return self.data.loc[:, (slice(None), metric)] # type: ignore | |
def sort_by_dataset(self, ascending: bool = True) -> EvalManager: | |
""" | |
Sort the evaluation results by dataset name. | |
""" | |
df = self.data.T.sort_index(level=0, ascending=ascending).T | |
return EvalManager(df) | |
def sort_by_metric(self, ascending: bool = True) -> EvalManager: | |
""" | |
Sort the evaluation results by metric name. | |
""" | |
df = self.data.T.sort_index(level=1, ascending=ascending).T | |
return EvalManager(df) | |
def sort_columns(self, ascending: bool = True) -> EvalManager: | |
""" | |
Sort the evaluation results by dataset name and then by metric name. | |
""" | |
df = self.data.T.sort_index(level=[0, 1], ascending=ascending).T | |
return EvalManager(df) | |
def to_csv(self, path: str | Path): | |
""" | |
Save the evaluation results to a CSV file. | |
Using `Evaluation.from_csv(path_to_saved_csv)` will load the evaluation results back into memory. | |
""" | |
savepath = Path(path) | |
savepath.parent.mkdir(parents=True, exist_ok=True) | |
self.data.to_csv(savepath) | |