import pandas as pd from typing import List from os.path import join as opj import json from config import dataset2info, model2info, LOCAL_RESULTS_DIR def load_language_results( model_id: str, dataset_id: str, lang_ids: List[str], setup: str ): lang_gaps = dict() for lang in lang_ids: with open( opj( LOCAL_RESULTS_DIR, "evaluation", dataset_id, f"results_{model_id}_{dataset_id}_devtest_{lang}_gender_{setup}.json", ) ) as fp: data = json.load(fp) lang_gaps[lang] = data[f"{data['eval_metric']}_diff_mean"] return lang_gaps def read_all_configs(setup: str): all_datasets = dataset2info.keys() print("Parsing results datasets:", all_datasets) all_models = model2info.keys() print("Parsing results models:", all_models) rows = list() for dataset_id in all_datasets: for model_id in all_models: lang_gaps = load_language_results( model_id, dataset_id, dataset2info[dataset_id].langs, setup ) rows.extend( [ { "Model": model_id, "Dataset": dataset_id, "Language": lang, "Gap": lang_gaps[lang], } for lang in lang_gaps ] ) results_df = pd.DataFrame(rows) results_df = results_df.drop(columns=["Dataset"]) # results_df = results_df.sort_values(by="Mean Gap", ascending=True) return results_df