Spaces:
Runtime error
Runtime error
| from __future__ import annotations | |
| from pathlib import Path | |
| from typing import Any, ClassVar, Dict, Optional | |
| import pandas as pd | |
| class EvalManager: | |
| """ | |
| Stores evaluation results for various datasets and metrics. | |
| The data is stored in a pandas DataFrame with a MultiIndex for columns. | |
| The first level of the MultiIndex is the dataset name and the second level is the metric name. | |
| Usage: | |
| >>> evaluator = Evaluator.from_dirpath("data/evaluation_results/") | |
| >>> print(evaluator.data) | |
| """ | |
| model_col: ClassVar[str] = "model" | |
| dataset_col: ClassVar[str] = "dataset" | |
| metric_col: ClassVar[str] = "metric" | |
| def __init__(self, data: Optional[pd.DataFrame] = None): | |
| if data is None: | |
| data = pd.DataFrame() | |
| self._df = data | |
| self._df.index = self._df.index.rename(EvalManager.model_col) | |
| def __str__(self) -> str: | |
| return self.data.__str__() | |
| def from_dict(data: Dict[Any, Any]) -> EvalManager: | |
| """ | |
| Load evaluation results from a dictionary. | |
| Expected format: | |
| { | |
| "model1": pd.read_json(path1).T.stack(), | |
| "model2": pd.read_json(path2).T.stack(), | |
| } | |
| """ | |
| df = pd.DataFrame.from_dict(data, orient="index") | |
| return EvalManager(df) | |
| def from_json(path: str | Path) -> EvalManager: | |
| datapath = Path(path) | |
| if not datapath.is_file(): | |
| raise FileNotFoundError(f"{path} is not a file") | |
| data = {} | |
| data[datapath.stem] = pd.read_json(datapath).T.stack() # pylint: disable=no-member | |
| return EvalManager.from_dict(data) | |
| def from_dir(datadir: str | Path) -> EvalManager: | |
| datadir_ = Path(datadir) | |
| if not datadir_.is_dir(): | |
| raise FileNotFoundError(f"{datadir} is not a directory") | |
| eval_files = list(datadir_.glob("*.json")) | |
| data = {} | |
| for filepath in eval_files: | |
| data[filepath.stem] = pd.read_json(filepath).T.stack() # pylint: disable=no-member | |
| return EvalManager.from_dict(data) | |
| def from_csv(path: str | Path) -> EvalManager: | |
| """ | |
| Load evaluation results from a CSV file. | |
| """ | |
| try: | |
| df = pd.read_csv(path, index_col=0, header=[0, 1]) | |
| return EvalManager(df) | |
| except Exception as e: | |
| print(f"Error loading {path}: {e}") | |
| raise e | |
| def data(self) -> pd.DataFrame: | |
| """ | |
| Returns the evaluation results as a pandas DataFrame. | |
| """ | |
| return self._df.copy() | |
| def models(self) -> pd.Index: | |
| """ | |
| Returns the models for which there are evaluation results. | |
| """ | |
| return self.data.index | |
| def datasets(self) -> pd.Index: | |
| """ | |
| Returns the datasets for which there are evaluation results. | |
| """ | |
| return self.data.columns.get_level_values(0).unique() | |
| def metrics(self) -> pd.Index: | |
| """ | |
| Returns the metrics for which there are evaluation results. | |
| """ | |
| return self.data.columns.get_level_values(1) | |
| def melt(df: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| Melt a suitable DataFrame (e.g. returned by `get_df_for_dataset` and | |
| `get_df_for_metric`) into a 'long' format. | |
| """ | |
| return df.T.reset_index(names=[EvalManager.dataset_col, EvalManager.metric_col]).melt( | |
| id_vars=[EvalManager.dataset_col, EvalManager.metric_col], | |
| var_name=EvalManager.model_col, | |
| value_name="score", | |
| ) | |
| def melted(self) -> pd.DataFrame: | |
| """ | |
| Returns the evaluation results as a 'melted' DataFrame. | |
| Useful for plotting with seaborn. | |
| """ | |
| return EvalManager.melt(self.data) | |
| def get_df_for_model(self, model: str) -> pd.DataFrame: | |
| if model not in self.data.index: | |
| raise ValueError(f"Model {model} not found in the evaluation results") | |
| return self.data.loc[[model], :] # type: ignore | |
| def get_df_for_dataset(self, dataset: str) -> pd.DataFrame: | |
| if dataset not in self.datasets: | |
| raise ValueError(f"Dataset {dataset} not found in the evaluation results") | |
| return self.data.loc[:, (dataset, slice(None))] # type: ignore | |
| def get_df_for_metric(self, metric: str) -> pd.DataFrame: | |
| if metric not in self.metrics: | |
| raise ValueError(f"Metric {metric} not found in the evaluation results") | |
| return self.data.loc[:, (slice(None), metric)] # type: ignore | |
| def sort_by_dataset(self, ascending: bool = True) -> EvalManager: | |
| """ | |
| Sort the evaluation results by dataset name. | |
| """ | |
| df = self.data.T.sort_index(level=0, ascending=ascending).T | |
| return EvalManager(df) | |
| def sort_by_metric(self, ascending: bool = True) -> EvalManager: | |
| """ | |
| Sort the evaluation results by metric name. | |
| """ | |
| df = self.data.T.sort_index(level=1, ascending=ascending).T | |
| return EvalManager(df) | |
| def sort_columns(self, ascending: bool = True) -> EvalManager: | |
| """ | |
| Sort the evaluation results by dataset name and then by metric name. | |
| """ | |
| df = self.data.T.sort_index(level=[0, 1], ascending=ascending).T | |
| return EvalManager(df) | |
| def to_csv(self, path: str | Path): | |
| """ | |
| Save the evaluation results to a CSV file. | |
| Using `Evaluation.from_csv(path_to_saved_csv)` will load the evaluation results back into memory. | |
| """ | |
| savepath = Path(path) | |
| savepath.parent.mkdir(parents=True, exist_ok=True) | |
| self.data.to_csv(savepath) | |