fair-asr-leaderboard / parsing.py
g8a9's picture
add minimal structure and parsing cv17 results
ad108b7
raw
history blame
1.64 kB
import pandas as pd
from typing import List
from os.path import join as opj
import json
from config import dataset2info, model2info, LOCAL_RESULTS_DIR
def load_language_results(
model_id: str, dataset_id: str, lang_ids: List[str], setup: str
):
lang_gaps = dict()
for lang in lang_ids:
with open(
opj(
LOCAL_RESULTS_DIR,
"evaluation",
dataset_id,
f"results_{model_id}_{dataset_id}_devtest_{lang}_gender_{setup}.json",
)
) as fp:
data = json.load(fp)
lang_gaps[lang] = data[f"{data['eval_metric']}_diff_mean"]
return lang_gaps
def read_all_configs(setup: str):
all_datasets = dataset2info.keys()
print("Parsing results datasets:", all_datasets)
all_models = model2info.keys()
print("Parsing results models:", all_models)
rows = list()
for dataset_id in all_datasets:
for model_id in all_models:
lang_gaps = load_language_results(
model_id, dataset_id, dataset2info[dataset_id].langs, setup
)
rows.extend(
[
{
"Model": model_id,
"Dataset": dataset_id,
"Language": lang,
"Gap": lang_gaps[lang],
}
for lang in lang_gaps
]
)
results_df = pd.DataFrame(rows)
results_df = results_df.drop(columns=["Dataset"])
# results_df = results_df.sort_values(by="Mean Gap", ascending=True)
return results_df