Spaces:

HUANG-Stephanie
/

cvquest-colpali

Running

App Files Files Community

cvquest-colpali / colpali-main /colpali_engine /evaluation /eval_manager.py

HUANG-Stephanie

Upload 88 files

9ff79dc verified 12 months ago

raw

history blame contribute delete

5.79 kB

	from __future__ import annotations

	from pathlib import Path
	from typing import Any, ClassVar, Dict, Optional

	import pandas as pd


	class EvalManager:
	"""
	Stores evaluation results for various datasets and metrics.

	The data is stored in a pandas DataFrame with a MultiIndex for columns.
	The first level of the MultiIndex is the dataset name and the second level is the metric name.

	Usage:
	>>> evaluator = Evaluator.from_dirpath("data/evaluation_results/")
	>>> print(evaluator.data)

	"""

	model_col: ClassVar[str] = "model"
	dataset_col: ClassVar[str] = "dataset"
	metric_col: ClassVar[str] = "metric"

	def __init__(self, data: Optional[pd.DataFrame] = None):
	if data is None:
	data = pd.DataFrame()
	self._df = data
	self._df.index = self._df.index.rename(EvalManager.model_col)

	def __str__(self) -> str:
	return self.data.__str__()

	@staticmethod
	def from_dict(data: Dict[Any, Any]) -> EvalManager:
	"""
	Load evaluation results from a dictionary.

	Expected format:
	{
	"model1": pd.read_json(path1).T.stack(),
	"model2": pd.read_json(path2).T.stack(),
	}

	"""
	df = pd.DataFrame.from_dict(data, orient="index")
	return EvalManager(df)

	@staticmethod
	def from_json(path: str \| Path) -> EvalManager:
	datapath = Path(path)
	if not datapath.is_file():
	raise FileNotFoundError(f"{path} is not a file")
	data = {}
	data[datapath.stem] = pd.read_json(datapath).T.stack() # pylint: disable=no-member
	return EvalManager.from_dict(data)

	@staticmethod
	def from_dir(datadir: str \| Path) -> EvalManager:
	datadir_ = Path(datadir)
	if not datadir_.is_dir():
	raise FileNotFoundError(f"{datadir} is not a directory")

	eval_files = list(datadir_.glob("*.json"))

	data = {}

	for filepath in eval_files:
	data[filepath.stem] = pd.read_json(filepath).T.stack() # pylint: disable=no-member

	return EvalManager.from_dict(data)

	@staticmethod
	def from_csv(path: str \| Path) -> EvalManager:
	"""
	Load evaluation results from a CSV file.
	"""
	try:
	df = pd.read_csv(path, index_col=0, header=[0, 1])
	return EvalManager(df)
	except Exception as e:
	print(f"Error loading {path}: {e}")
	raise e

	@property
	def data(self) -> pd.DataFrame:
	"""
	Returns the evaluation results as a pandas DataFrame.
	"""
	return self._df.copy()

	@property
	def models(self) -> pd.Index:
	"""
	Returns the models for which there are evaluation results.
	"""
	return self.data.index

	@property
	def datasets(self) -> pd.Index:
	"""
	Returns the datasets for which there are evaluation results.
	"""
	return self.data.columns.get_level_values(0).unique()

	@property
	def metrics(self) -> pd.Index:
	"""
	Returns the metrics for which there are evaluation results.
	"""
	return self.data.columns.get_level_values(1)

	@staticmethod
	def melt(df: pd.DataFrame) -> pd.DataFrame:
	"""
	Melt a suitable DataFrame (e.g. returned by `get_df_for_dataset` and
	`get_df_for_metric`) into a 'long' format.
	"""
	return df.T.reset_index(names=[EvalManager.dataset_col, EvalManager.metric_col]).melt(
	id_vars=[EvalManager.dataset_col, EvalManager.metric_col],
	var_name=EvalManager.model_col,
	value_name="score",
	)

	@property
	def melted(self) -> pd.DataFrame:
	"""
	Returns the evaluation results as a 'melted' DataFrame.
	Useful for plotting with seaborn.
	"""
	return EvalManager.melt(self.data)

	def get_df_for_model(self, model: str) -> pd.DataFrame:
	if model not in self.data.index:
	raise ValueError(f"Model {model} not found in the evaluation results")
	return self.data.loc[[model], :] # type: ignore

	def get_df_for_dataset(self, dataset: str) -> pd.DataFrame:
	if dataset not in self.datasets:
	raise ValueError(f"Dataset {dataset} not found in the evaluation results")
	return self.data.loc[:, (dataset, slice(None))] # type: ignore

	def get_df_for_metric(self, metric: str) -> pd.DataFrame:
	if metric not in self.metrics:
	raise ValueError(f"Metric {metric} not found in the evaluation results")
	return self.data.loc[:, (slice(None), metric)] # type: ignore

	def sort_by_dataset(self, ascending: bool = True) -> EvalManager:
	"""
	Sort the evaluation results by dataset name.
	"""
	df = self.data.T.sort_index(level=0, ascending=ascending).T
	return EvalManager(df)

	def sort_by_metric(self, ascending: bool = True) -> EvalManager:
	"""
	Sort the evaluation results by metric name.
	"""
	df = self.data.T.sort_index(level=1, ascending=ascending).T
	return EvalManager(df)

	def sort_columns(self, ascending: bool = True) -> EvalManager:
	"""
	Sort the evaluation results by dataset name and then by metric name.
	"""
	df = self.data.T.sort_index(level=[0, 1], ascending=ascending).T
	return EvalManager(df)

	def to_csv(self, path: str \| Path):
	"""
	Save the evaluation results to a CSV file.

	Using `Evaluation.from_csv(path_to_saved_csv)` will load the evaluation results back into memory.
	"""
	savepath = Path(path)
	savepath.parent.mkdir(parents=True, exist_ok=True)
	self.data.to_csv(savepath)