Spaces:
Running
Running
File size: 2,279 Bytes
ad108b7 86e679c ad108b7 86e679c ad108b7 86e679c ad108b7 86e679c ad108b7 86e679c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import pandas as pd
from typing import List
from os.path import join as opj
import json
from config import dataset2info, model2info, LOCAL_RESULTS_DIR
import logging
logger = logging.getLogger(__name__)
def load_language_results(
model_id: str, dataset_id: str, lang_ids: List[str], setup: str
):
lang_gaps = dict()
for lang in lang_ids:
try:
with open(
opj(
LOCAL_RESULTS_DIR,
"evaluation",
dataset_id,
f"results_{model_id}_{dataset_id}_devtest_{lang}_gender_{setup}.json",
)
) as fp:
data = json.load(fp)
lang_gaps[lang] = data[f"{data['eval_metric']}_diff_mean"]
except FileNotFoundError:
logger.debug(
f"We could not find the result file for <model,dataset,lang>: {model_id}, {dataset_id}, {lang}"
)
lang_gaps[lang] = None
return lang_gaps
def read_all_configs(setup: str):
all_datasets = dataset2info.keys()
print("Parsing results datasets:", all_datasets)
all_models = model2info.keys()
print("Parsing results models:", all_models)
rows = list()
for dataset_id in all_datasets:
for model_id in all_models:
lang_gaps = load_language_results(
model_id, dataset_id, dataset2info[dataset_id].langs, setup
)
rows.extend(
[
{
"Model": model_id,
"Dataset": dataset_id,
"Language": lang,
"Gap": lang_gaps[lang],
}
for lang in lang_gaps
]
)
results_df = pd.DataFrame(rows)
# results_df = results_df.drop(columns=["Dataset"])
# results_df = results_df.sort_values(by="Mean Gap", ascending=True)
return results_df
def get_common_langs():
"""Return a list of langs that are support by all models"""
common_langs = set(model2info[list(model2info.keys())[0]].langs)
for model_id in model2info.keys():
common_langs = common_langs.intersection(model2info[model_id].langs)
return list(common_langs)
|