Spaces:
Running
Running
File size: 5,793 Bytes
9ff79dc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
from __future__ import annotations
from pathlib import Path
from typing import Any, ClassVar, Dict, Optional
import pandas as pd
class EvalManager:
"""
Stores evaluation results for various datasets and metrics.
The data is stored in a pandas DataFrame with a MultiIndex for columns.
The first level of the MultiIndex is the dataset name and the second level is the metric name.
Usage:
>>> evaluator = Evaluator.from_dirpath("data/evaluation_results/")
>>> print(evaluator.data)
"""
model_col: ClassVar[str] = "model"
dataset_col: ClassVar[str] = "dataset"
metric_col: ClassVar[str] = "metric"
def __init__(self, data: Optional[pd.DataFrame] = None):
if data is None:
data = pd.DataFrame()
self._df = data
self._df.index = self._df.index.rename(EvalManager.model_col)
def __str__(self) -> str:
return self.data.__str__()
@staticmethod
def from_dict(data: Dict[Any, Any]) -> EvalManager:
"""
Load evaluation results from a dictionary.
Expected format:
{
"model1": pd.read_json(path1).T.stack(),
"model2": pd.read_json(path2).T.stack(),
}
"""
df = pd.DataFrame.from_dict(data, orient="index")
return EvalManager(df)
@staticmethod
def from_json(path: str | Path) -> EvalManager:
datapath = Path(path)
if not datapath.is_file():
raise FileNotFoundError(f"{path} is not a file")
data = {}
data[datapath.stem] = pd.read_json(datapath).T.stack() # pylint: disable=no-member
return EvalManager.from_dict(data)
@staticmethod
def from_dir(datadir: str | Path) -> EvalManager:
datadir_ = Path(datadir)
if not datadir_.is_dir():
raise FileNotFoundError(f"{datadir} is not a directory")
eval_files = list(datadir_.glob("*.json"))
data = {}
for filepath in eval_files:
data[filepath.stem] = pd.read_json(filepath).T.stack() # pylint: disable=no-member
return EvalManager.from_dict(data)
@staticmethod
def from_csv(path: str | Path) -> EvalManager:
"""
Load evaluation results from a CSV file.
"""
try:
df = pd.read_csv(path, index_col=0, header=[0, 1])
return EvalManager(df)
except Exception as e:
print(f"Error loading {path}: {e}")
raise e
@property
def data(self) -> pd.DataFrame:
"""
Returns the evaluation results as a pandas DataFrame.
"""
return self._df.copy()
@property
def models(self) -> pd.Index:
"""
Returns the models for which there are evaluation results.
"""
return self.data.index
@property
def datasets(self) -> pd.Index:
"""
Returns the datasets for which there are evaluation results.
"""
return self.data.columns.get_level_values(0).unique()
@property
def metrics(self) -> pd.Index:
"""
Returns the metrics for which there are evaluation results.
"""
return self.data.columns.get_level_values(1)
@staticmethod
def melt(df: pd.DataFrame) -> pd.DataFrame:
"""
Melt a suitable DataFrame (e.g. returned by `get_df_for_dataset` and
`get_df_for_metric`) into a 'long' format.
"""
return df.T.reset_index(names=[EvalManager.dataset_col, EvalManager.metric_col]).melt(
id_vars=[EvalManager.dataset_col, EvalManager.metric_col],
var_name=EvalManager.model_col,
value_name="score",
)
@property
def melted(self) -> pd.DataFrame:
"""
Returns the evaluation results as a 'melted' DataFrame.
Useful for plotting with seaborn.
"""
return EvalManager.melt(self.data)
def get_df_for_model(self, model: str) -> pd.DataFrame:
if model not in self.data.index:
raise ValueError(f"Model {model} not found in the evaluation results")
return self.data.loc[[model], :] # type: ignore
def get_df_for_dataset(self, dataset: str) -> pd.DataFrame:
if dataset not in self.datasets:
raise ValueError(f"Dataset {dataset} not found in the evaluation results")
return self.data.loc[:, (dataset, slice(None))] # type: ignore
def get_df_for_metric(self, metric: str) -> pd.DataFrame:
if metric not in self.metrics:
raise ValueError(f"Metric {metric} not found in the evaluation results")
return self.data.loc[:, (slice(None), metric)] # type: ignore
def sort_by_dataset(self, ascending: bool = True) -> EvalManager:
"""
Sort the evaluation results by dataset name.
"""
df = self.data.T.sort_index(level=0, ascending=ascending).T
return EvalManager(df)
def sort_by_metric(self, ascending: bool = True) -> EvalManager:
"""
Sort the evaluation results by metric name.
"""
df = self.data.T.sort_index(level=1, ascending=ascending).T
return EvalManager(df)
def sort_columns(self, ascending: bool = True) -> EvalManager:
"""
Sort the evaluation results by dataset name and then by metric name.
"""
df = self.data.T.sort_index(level=[0, 1], ascending=ascending).T
return EvalManager(df)
def to_csv(self, path: str | Path):
"""
Save the evaluation results to a CSV file.
Using `Evaluation.from_csv(path_to_saved_csv)` will load the evaluation results back into memory.
"""
savepath = Path(path)
savepath.parent.mkdir(parents=True, exist_ok=True)
self.data.to_csv(savepath)
|