Spaces:

MERaLiON
/

AudioBench-Leaderboard

Running

App Files Files Community

AudioBench-Leaderboard / app /show_examples.py

zhuohan-7

Upload folder using huggingface_hub

067fc9d about 1 month ago

raw

history blame

8.15 kB

	import streamlit as st
	import datasets
	import numpy as np
	import io
	import soundfile as sf

	import html

	from app.content import displayname2datasetname

	def show_dataset_examples(display_name):
	st.divider()
	dataset_name = displayname2datasetname[display_name]
	sample_folder = f"./examples/{dataset_name}"

	# load dataset
	dataset = datasets.load_from_disk(sample_folder)

	for index in range(len(dataset)):
	with st.container():
	st.markdown(f'##### Example-{index+1}')
	col1, col2 = st.columns([0.3, 0.7], vertical_alignment="center")

	with col1:
	# Convert the NumPy array to a WAV file in memory
	bytes_io = io.BytesIO()
	sf.write(bytes_io, dataset[index]['context']['audio']['array'], dataset[index]['context']['audio']['sampling_rate'], format='WAV')
	bytes_io.seek(0)
	# Play audio in Streamlit
	st.audio(bytes_io, format='audio/wav')
	# st.audio(f'{sample_folder}/sample_{index}.wav', format="audio/wav")

	if dataset_name in ['CN-College-Listen-MCQ-Test', 'DREAM-TTS-MCQ-Test']:

	choices = dataset[index]['other_attributes']['choices']
	if isinstance(choices, str):
	choices_text = choices
	elif isinstance(choices, list):
	choices_text = ' '.join(i for i in choices)

	question_text = f"""{dataset[index]['instruction']['text']} {choices_text}"""
	else:
	question_text = f"""{dataset[index]['instruction']['text']}"""

	question_text = html.escape(question_text)

	# with st.container():
	with col2:
	custom_css = """
	<style>
	.my-container-table, p.my-container-text {
	background-color: #fcf8dc;
	padding: 10px;
	border-radius: 5px;
	font-size: 13px;
	# height: 50px;
	word-wrap: break-word
	}
	</style>
	"""
	st.markdown(custom_css, unsafe_allow_html=True)

	body_details = f"""<table style="table-layout: fixed; width:100%">
	<thead>
	<tr style="text-align: center;">
	<th style="width:50%">PROMPT</th>
	<th style="width:50%">ANSWER</th>
	</tr>
	<tr>
	<td><b>{html.escape(question_text.replace('(A)', '<br>(A)').replace('(B)', '<br>(B)').replace('(C)', '<br>(C)'))}
	</td>
	<td><b>{html.escape(dataset[index]['answer']['text'])}
	</td>
	</tr>
	</thead>
	</table>"""

	st.markdown(f"""<div class="my-container-table">
	{body_details}
	</div>""", unsafe_allow_html=True)

	st.text("")

	st.divider()


	def show_examples(category_name, dataset_name, model_lists, display_model_names):
	st.divider()
	sample_folder = f"./examples/{category_name}/{dataset_name}"

	dataset = datasets.load_from_disk(sample_folder)

	for index in range(len(dataset)):
	with st.container():
	st.markdown(f'##### Example-{index+1}')
	col1, col2 = st.columns([0.3, 0.7], vertical_alignment="center")

	# with col1:
	st.audio(f'{sample_folder}/sample_{index}.wav', format="audio/wav")

	if dataset_name in ['CN-College-Listen-MCQ-Test', 'DREAM-TTS-MCQ-Test']:

	choices = dataset[index]['other_attributes']['choices']
	if isinstance(choices, str):
	choices_text = choices
	elif isinstance(choices, list):
	choices_text = ' '.join(i for i in choices)

	question_text = f"""{dataset[index]['instruction']['text']} {choices_text}"""
	else:
	question_text = f"""{dataset[index]['instruction']['text']}"""

	question_text = html.escape(question_text)

	# st.divider()
	with st.container():
	custom_css = """
	<style>
	.my-container-table, p.my-container-text {
	background-color: #fcf8dc;
	padding: 10px;
	border-radius: 5px;
	font-size: 13px;
	# height: 50px;
	word-wrap: break-word
	}
	</style>
	"""
	st.markdown(custom_css, unsafe_allow_html=True)

	model_lists.sort()

	s = f"""<tr>
	<td><b>REFERENCE</td>
	<td><b>{html.escape(question_text.replace('(A)', '<br>(A)').replace('(B)', '<br>(B)').replace('(C)', '<br>(C)'))}
	</td>
	<td><b>{html.escape(dataset[index]['answer']['text'])}
	</td>
	</tr>
	"""
	if dataset_name in ['CN-College-Listen-MCQ-Test', 'DREAM-TTS-MCQ-Test']:
	for model in model_lists:
	try:

	model_prediction = dataset[index][model]['model_prediction']
	model_prediction = model_prediction.replace('<','').replace('>','').replace('\n','(newline)').replace('*','')

	s += f"""<tr>
	<td>{display_model_names[model]}</td>
	<td>
	{dataset[index][model]['text'].replace('Choices:', '<br>Choices:').replace('(A)', '<br>(A)').replace('(B)', '<br>(B)').replace('(C)', '<br>(C)')
	}
	</td>
	<td>{html.escape(model_prediction)}</td>
	</tr>"""
	except:
	print(f"{model} is not in {dataset_name}")
	continue
	else:
	for model in model_lists:

	print(dataset[index][model]['model_prediction'])

	try:

	model_prediction = dataset[index][model]['model_prediction']
	model_prediction = model_prediction.replace('<','').replace('>','').replace('\n','(newline)').replace('*','')

	s += f"""<tr>
	<td>{display_model_names[model]}</td>
	<td>{html.escape(dataset[index][model]['text'])}</td>
	<td>{html.escape(model_prediction)}</td>
	</tr>"""
	except:
	print(f"{model} is not in {dataset_name}")
	continue


	body_details = f"""<table style="table-layout: fixed; width:100%">
	<thead>
	<tr style="text-align: center;">
	<th style="width:20%">MODEL</th>
	<th style="width:30%">QUESTION</th>
	<th style="width:50%">MODEL PREDICTION</th>
	</tr>
	{s}
	</thead>
	</table>"""

	st.markdown(f"""<div class="my-container-table">
	{body_details}
	</div>""", unsafe_allow_html=True)

	st.text("")

	st.divider()