Spaces:
Runtime error
Runtime error
Refactor evaluation logic
Browse files- evaluation.py +14 -15
evaluation.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
from dataclasses import dataclass
|
| 2 |
|
| 3 |
import streamlit as st
|
|
@@ -5,7 +6,7 @@ from huggingface_hub import DatasetFilter, HfApi
|
|
| 5 |
from huggingface_hub.hf_api import DatasetInfo
|
| 6 |
|
| 7 |
|
| 8 |
-
@dataclass(frozen=True, eq=True)
|
| 9 |
class EvaluationInfo:
|
| 10 |
task: str
|
| 11 |
model: str
|
|
@@ -15,30 +16,29 @@ class EvaluationInfo:
|
|
| 15 |
metrics: set
|
| 16 |
|
| 17 |
|
| 18 |
-
def
|
| 19 |
if dataset_info.cardData is not None:
|
| 20 |
metadata = dataset_info.cardData["eval_info"]
|
| 21 |
metadata.pop("col_mapping", None)
|
| 22 |
# TODO(lewtun): populate dataset cards with metric info
|
| 23 |
if "metrics" not in metadata:
|
| 24 |
metadata["metrics"] = frozenset()
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
return
|
| 28 |
-
else:
|
| 29 |
-
return None
|
| 30 |
|
| 31 |
|
| 32 |
-
def
|
| 33 |
filt = DatasetFilter(author="autoevaluate")
|
| 34 |
evaluation_datasets = HfApi().list_datasets(filter=filt, full=True)
|
| 35 |
-
return [
|
| 36 |
|
| 37 |
|
| 38 |
def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_split, metrics):
|
| 39 |
-
|
|
|
|
| 40 |
|
| 41 |
-
for
|
| 42 |
evaluation_info = EvaluationInfo(
|
| 43 |
task=task,
|
| 44 |
model=model,
|
|
@@ -47,12 +47,11 @@ def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_
|
|
| 47 |
dataset_split=dataset_split,
|
| 48 |
metrics=frozenset(metrics),
|
| 49 |
)
|
| 50 |
-
|
| 51 |
-
if candidate_id in evaluation_ids:
|
| 52 |
st.info(
|
| 53 |
-
f"Model `{model}` has already been evaluated on this configuration. \
|
| 54 |
This model will be excluded from the evaluation job..."
|
| 55 |
)
|
| 56 |
-
models.
|
| 57 |
|
| 58 |
return models
|
|
|
|
| 1 |
+
import copy
|
| 2 |
from dataclasses import dataclass
|
| 3 |
|
| 4 |
import streamlit as st
|
|
|
|
| 6 |
from huggingface_hub.hf_api import DatasetInfo
|
| 7 |
|
| 8 |
|
| 9 |
+
@dataclass(frozen=True, eq=True, unsafe_hash=True)
|
| 10 |
class EvaluationInfo:
|
| 11 |
task: str
|
| 12 |
model: str
|
|
|
|
| 16 |
metrics: set
|
| 17 |
|
| 18 |
|
| 19 |
+
def create_evaluation_info(dataset_info: DatasetInfo) -> int:
|
| 20 |
if dataset_info.cardData is not None:
|
| 21 |
metadata = dataset_info.cardData["eval_info"]
|
| 22 |
metadata.pop("col_mapping", None)
|
| 23 |
# TODO(lewtun): populate dataset cards with metric info
|
| 24 |
if "metrics" not in metadata:
|
| 25 |
metadata["metrics"] = frozenset()
|
| 26 |
+
else:
|
| 27 |
+
metadata["metrics"] = frozenset(metadata["metrics"])
|
| 28 |
+
return EvaluationInfo(**metadata)
|
|
|
|
|
|
|
| 29 |
|
| 30 |
|
| 31 |
+
def get_evaluation_infos():
|
| 32 |
filt = DatasetFilter(author="autoevaluate")
|
| 33 |
evaluation_datasets = HfApi().list_datasets(filter=filt, full=True)
|
| 34 |
+
return [create_evaluation_info(dset) for dset in evaluation_datasets]
|
| 35 |
|
| 36 |
|
| 37 |
def filter_evaluated_models(models, task, dataset_name, dataset_config, dataset_split, metrics):
|
| 38 |
+
evaluation_infos = get_evaluation_infos()
|
| 39 |
+
models_to_filter = copy.copy(models)
|
| 40 |
|
| 41 |
+
for model in models_to_filter:
|
| 42 |
evaluation_info = EvaluationInfo(
|
| 43 |
task=task,
|
| 44 |
model=model,
|
|
|
|
| 47 |
dataset_split=dataset_split,
|
| 48 |
metrics=frozenset(metrics),
|
| 49 |
)
|
| 50 |
+
if evaluation_info in evaluation_infos:
|
|
|
|
| 51 |
st.info(
|
| 52 |
+
f"Model [`{model}`](https://huggingface.co/{model}) has already been evaluated on this configuration. \
|
| 53 |
This model will be excluded from the evaluation job..."
|
| 54 |
)
|
| 55 |
+
models.remove(model)
|
| 56 |
|
| 57 |
return models
|