Spaces:

MERaLiON
/

AudioBench-Leaderboard

Running

He Yingxu

breakdown asr sea

2330259 9 days ago

4.4 kB

	import os
	import re
	import sys
	import json
	import random

	import pandas as pd
	import numpy as np

	from app.content import *

	data_to_df = []


	log_dir = "path/to/audiobench/log"

	all_evaluated_models = os.listdir(log_dir)
	for model_name in all_evaluated_models:
	if "geyu_whisper" in model_name:
	continue

	if "activation_checkpointing" in model_name:
	continue

	model_dir = os.path.join(log_dir, model_name)

	if not os.path.isdir(model_dir):
	continue

	for log_file in os.listdir(model_dir):
	if not log_file.endswith("score.json"):
	continue

	match = re.match("^(.*?)_(llama3_70b_judge\|wer\|bleu)_score.json$", log_file)
	ds_name = match.group(1)
	metrics = match.group(2)

	eval_path = os.path.join(model_dir, log_file)

	with open(eval_path, "r") as f:
	eval_data = json.load(f)

	if metrics == "llama3_70b_judge":
	value = eval_data[metrics]["judge_score"]
	elif metrics == "wer":
	value = eval_data[metrics]
	elif metrics == "bleu":
	value = eval_data[metrics]

	data_to_df.append([model_name, ds_name, metrics, value])


	eval_result_df = pd.DataFrame(data_to_df, columns=["model", "dataset", "metrics", "value"])
	eval_result_df["model"] = eval_result_df["model"].replace("MERaLiON_AudioLLM_v1_hf", "MERaLiON-AudioLLM-Whisper-SEA-LION")

	# original results_organized
	archive_results_dir = "results_organized_archive"
	output_results_dir = "results_organized"


	def merge_results(display_datasets, metrics, result_sub_path=None):
	raw_ds_names = [displayname2datasetname[dis_name] for dis_name in display_datasets]

	new_result = eval_result_df[eval_result_df["dataset"].isin(raw_ds_names) & (eval_result_df["metrics"] == metrics)]
	new_result = new_result.drop(columns=["metrics"])
	new_result = new_result.pivot(index="model", columns="dataset", values="value").reset_index()
	new_result = new_result.rename(columns={"model": "Model"})
	new_result = new_result.dropna(axis=0, how="any")

	archive_result_path = os.path.join(archive_results_dir, result_sub_path)
	if os.path.exists(archive_result_path):
	archive_result = pd.read_csv(archive_result_path)
	archive_columns = [col for col in archive_result.columns if col in raw_ds_names]
	archive_result = archive_result[["Model"] + archive_columns]
	combined_result = pd.concat([archive_result, new_result], ignore_index=True)
	combined_result = combined_result.drop_duplicates(subset=["Model"], keep="last", ignore_index=True)

	return new_result, combined_result

	return new_result, new_result


	result_file_mapper = {
	"bleu/st.csv": speech_translation_datasets,
	"llama3_70b_judge/accent_recognition.csv": ar_datasets,
	"llama3_70b_judge/audio_captioning.csv": ac_datasets,
	"llama3_70b_judge/audio_scene_question_answering.csv": asqa_datasets,
	"llama3_70b_judge/emotion_recognition.csv": er_datasets,
	"llama3_70b_judge/gender_recognition.csv": gr_datasets,
	"llama3_70b_judge/music_understanding.csv": music_datasets,
	"llama3_70b_judge/sds_singlish.csv": sds_datasets,
	"llama3_70b_judge/speech_instruction.csv": si_datasets,
	"llama3_70b_judge/sqa_english.csv": speech_qa_english_datasets,
	"llama3_70b_judge/sqa_singlish.csv": speech_qa_singlish_datasets,
	"llama3_70b_judge/under_development_llama3_70b_judge.csv": non_wer_development_datasets,
	"meteor/audio_captioning.csv": ac_datasets,
	"wer/asr_english.csv": asr_english_datasets,
	"wer/asr_singlish.csv": asr_singlish_datasets,
	"wer/asr_mandarin.csv": asr_mandarin_datasets,
	"wer/asr_malay.csv": asr_malay_datasets,
	"wer/asr_tamil.csv": asr_tamil_datasets,
	"wer/asr_indonesian.csv": asr_indonesian_datasets,
	"wer/asr_thai.csv": asr_thai_datasets,
	"wer/asr_vietnamese.csv": asr_vietnamese_datasets,
	"wer/asr_private.csv": asr_private_datasets,
	"wer/under_development_wer.csv": wer_development_datasets,
	}


	for sub_path, display_ds in result_file_mapper.items():
	metrics = sub_path.split("/")[0]
	new_result, combined_result = merge_results(display_ds, metrics, sub_path)

	output_path = os.path.join(output_results_dir, sub_path)
	os.makedirs(os.path.dirname(output_path), exist_ok=True)
	combined_result.to_csv(output_path, index=False)