Upload folder using huggingface_hub
Browse files- app/content.py +74 -0
- app/pages.py +56 -21
app/content.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
asr_datsets = {'LibriSpeech-Test-Clean': 'aa',
|
| 2 |
+
'LibriSpeech-Test-Other': 'bb',
|
| 3 |
+
'Common-Voice-15-En-Test': 'cc',
|
| 4 |
+
'Peoples-Speech-Test': 'dd',
|
| 5 |
+
'GigaSpeech-Test': 'ee',
|
| 6 |
+
'Earnings21-Test': 'ff',
|
| 7 |
+
'Earnings22-Test': 'gg',
|
| 8 |
+
'Tedlium3-Test': 'hh',
|
| 9 |
+
'Tedlium3-Longform-Test': 'ii',
|
| 10 |
+
'IMDA-Part1-ASR-Test': 'jj',
|
| 11 |
+
'IMDA-Part2-ASR-Test': 'kk',
|
| 12 |
+
'IMDA-Part3-ASR-Test': 'll',
|
| 13 |
+
'IMDA-Part4-ASR-Test': 'mm',
|
| 14 |
+
'IMDA-Part5-ASR-Test': 'nn',
|
| 15 |
+
'IMDA-Part6-ASR-Test': 'oo'
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
sqa_datasets = {'CN-College-Listen-MCQ-Test': 'aa',
|
| 19 |
+
'DREAM-TTS-MCQ-Test': 'bb',
|
| 20 |
+
'SLUE-P2-SQA5-Test': 'cc',
|
| 21 |
+
'Public-SG-Speech-QA-Test': 'dd',
|
| 22 |
+
'Spoken-Squad-v1': 'ee'
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
si_datasets = {'OpenHermes-Audio-Test': 'aa',
|
| 26 |
+
'ALPACA-Audio-Test': 'bb'
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
ac_datasets = {
|
| 30 |
+
'WavCaps-Test': 'aa',
|
| 31 |
+
'AudioCaps-Test': 'bb'
|
| 32 |
+
}
|
| 33 |
+
|
| 34 |
+
asqa_datasets = {
|
| 35 |
+
'Clotho-AQA-Test': 'aa',
|
| 36 |
+
'WavCaps-QA-Test': 'bb',
|
| 37 |
+
'AudioCaps-QA-Test': 'cc'
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
er_datasets = {
|
| 41 |
+
'IEMOCAP-Emotion-Test': 'aa',
|
| 42 |
+
'MELD-Sentiment-Test': 'bb',
|
| 43 |
+
'MELD-Emotion-Test': 'cc'
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
ar_datsets = {
|
| 47 |
+
'VoxCeleb-Accent-Test': 'aa'
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
gr_datasets = {
|
| 51 |
+
'VoxCeleb-Gender-Test': 'aa',
|
| 52 |
+
'IEMOCAP-Gender-Test': 'bb'
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
spt_datasets = {
|
| 56 |
+
'Covost2-EN-ID-test': 'aa',
|
| 57 |
+
'Covost2-EN-ZH-test': 'bb',
|
| 58 |
+
'Covost2-EN-TA-test': 'cc',
|
| 59 |
+
'Covost2-ID-EN-test': 'dd',
|
| 60 |
+
'Covost2-ZH-EN-test': 'ee',
|
| 61 |
+
'Covost2-TA-EN-test': 'ff'
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
cnasr_datasets = {
|
| 65 |
+
'Aishell-ASR-ZH-Test': 'aa'
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
metrics = {
|
| 69 |
+
'wer': '11',
|
| 70 |
+
'llama3_70b_judge_binary': '22',
|
| 71 |
+
'llama3_70b_judge': '33',
|
| 72 |
+
'meteor': '44',
|
| 73 |
+
'bleu': '55'
|
| 74 |
+
}
|
app/pages.py
CHANGED
|
@@ -1,5 +1,29 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
from app.draw_diagram import *
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
def dashboard():
|
| 5 |
|
|
@@ -107,9 +131,10 @@ def asr():
|
|
| 107 |
# sorted = st.selectbox('by', ['Ascending', 'Descending'])
|
| 108 |
|
| 109 |
if filter_1:
|
|
|
|
| 110 |
draw('su', 'ASR', filter_1, 'wer')
|
| 111 |
-
else:
|
| 112 |
-
|
| 113 |
|
| 114 |
|
| 115 |
## examples
|
|
@@ -133,11 +158,14 @@ def sqa():
|
|
| 133 |
|
| 134 |
if filter_1:
|
| 135 |
if filter_1 in binary:
|
|
|
|
| 136 |
draw('su', 'SQA', filter_1, 'llama3_70b_judge_binary')
|
|
|
|
| 137 |
else:
|
|
|
|
| 138 |
draw('su', 'SQA', filter_1, 'llama3_70b_judge')
|
| 139 |
-
else:
|
| 140 |
-
|
| 141 |
|
| 142 |
def si():
|
| 143 |
st.title("Speech Question Answering")
|
|
@@ -151,9 +179,10 @@ def si():
|
|
| 151 |
filter_1 = st.selectbox('Select Dataset', filters_levelone)
|
| 152 |
|
| 153 |
if filter_1:
|
|
|
|
| 154 |
draw('su', 'SI', filter_1, 'llama3_70b_judge')
|
| 155 |
-
else:
|
| 156 |
-
|
| 157 |
|
| 158 |
def ac():
|
| 159 |
st.title("Audio Captioning")
|
|
@@ -181,9 +210,10 @@ def ac():
|
|
| 181 |
# sorted = st.selectbox('by', ['Ascending', 'Descending'])
|
| 182 |
|
| 183 |
if filter_1 or metric:
|
|
|
|
| 184 |
draw('asu', 'AC',filter_1, metric.lower().replace('-', '_'))
|
| 185 |
-
else:
|
| 186 |
-
|
| 187 |
|
| 188 |
def asqa():
|
| 189 |
st.title("Audio Scene Question Answering")
|
|
@@ -198,9 +228,10 @@ def asqa():
|
|
| 198 |
filter_1 = st.selectbox('Select Dataset', filters_levelone)
|
| 199 |
|
| 200 |
if filter_1:
|
|
|
|
| 201 |
draw('asu', 'AQA',filter_1, 'llama3_70b_judge')
|
| 202 |
-
else:
|
| 203 |
-
|
| 204 |
|
| 205 |
def er():
|
| 206 |
st.title("Emotion Recognition")
|
|
@@ -208,7 +239,7 @@ def er():
|
|
| 208 |
filters_levelone = ['IEMOCAP-Emotion-Test',
|
| 209 |
'MELD-Sentiment-Test',
|
| 210 |
'MELD-Emotion-Test']
|
| 211 |
-
sort_leveltwo = []
|
| 212 |
|
| 213 |
left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
|
| 214 |
|
|
@@ -231,9 +262,10 @@ def er():
|
|
| 231 |
# sorted = st.selectbox('by', ['Ascending', 'Descending'])
|
| 232 |
|
| 233 |
if filter_1:
|
|
|
|
| 234 |
draw('vu', 'ER', filter_1, 'llama3_70b_judge_binary')
|
| 235 |
-
else:
|
| 236 |
-
|
| 237 |
|
| 238 |
def ar():
|
| 239 |
st.title("Accent Recognition")
|
|
@@ -247,9 +279,9 @@ def ar():
|
|
| 247 |
|
| 248 |
|
| 249 |
if filter_1:
|
|
|
|
| 250 |
draw('vu', 'AR', filter_1, 'llama3_70b_judge')
|
| 251 |
-
|
| 252 |
-
draw('vu', 'AR', 'VoxCeleb-Accent-Test', 'llama3_70b_judge')
|
| 253 |
|
| 254 |
def gr():
|
| 255 |
st.title("Emotion Recognition")
|
|
@@ -263,9 +295,10 @@ def gr():
|
|
| 263 |
filter_1 = st.selectbox('Select Dataset', filters_levelone)
|
| 264 |
|
| 265 |
if filter_1:
|
|
|
|
| 266 |
draw('vu', 'GR', filter_1, 'llama3_70b_judge_binary')
|
| 267 |
-
else:
|
| 268 |
-
|
| 269 |
|
| 270 |
def spt():
|
| 271 |
st.title("Speech Translation")
|
|
@@ -283,9 +316,10 @@ def spt():
|
|
| 283 |
filter_1 = st.selectbox('Select Dataset', filters_levelone)
|
| 284 |
|
| 285 |
if filter_1:
|
|
|
|
| 286 |
draw('su', 'ST', filter_1, 'bleu')
|
| 287 |
-
else:
|
| 288 |
-
|
| 289 |
|
| 290 |
def cnasr():
|
| 291 |
st.title("Chinese Automatic Speech Recognition")
|
|
@@ -298,6 +332,7 @@ def cnasr():
|
|
| 298 |
filter_1 = st.selectbox('Select Dataset', filters_levelone)
|
| 299 |
|
| 300 |
if filter_1:
|
|
|
|
| 301 |
draw('su', 'CNASR', filter_1, 'wer')
|
| 302 |
-
else:
|
| 303 |
-
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
from app.draw_diagram import *
|
| 3 |
+
from app.content import *
|
| 4 |
+
|
| 5 |
+
def dataset_contents(dataset, metrics):
|
| 6 |
+
|
| 7 |
+
custom_css = """
|
| 8 |
+
<style>
|
| 9 |
+
.my-dataset-info {
|
| 10 |
+
# background-color: #F9EBEA;
|
| 11 |
+
# padding: 10px;
|
| 12 |
+
color: #626567;
|
| 13 |
+
font-style: italic;
|
| 14 |
+
font-size: 8px;
|
| 15 |
+
height: auto;
|
| 16 |
+
}
|
| 17 |
+
</style>
|
| 18 |
+
"""
|
| 19 |
+
st.markdown(custom_css, unsafe_allow_html=True)
|
| 20 |
+
st.markdown(f"""<div class="my-dataset-info">
|
| 21 |
+
<p>DATASET INFORMATION: {dataset}</p>
|
| 22 |
+
</div>""", unsafe_allow_html=True)
|
| 23 |
+
st.markdown(f"""<div class="my-dataset-info">
|
| 24 |
+
<p>METRIC INFORMATION: {metrics}</p>
|
| 25 |
+
</div>""", unsafe_allow_html=True)
|
| 26 |
+
|
| 27 |
|
| 28 |
def dashboard():
|
| 29 |
|
|
|
|
| 131 |
# sorted = st.selectbox('by', ['Ascending', 'Descending'])
|
| 132 |
|
| 133 |
if filter_1:
|
| 134 |
+
dataset_contents(asr_datsets[filter_1], metrics['wer'])
|
| 135 |
draw('su', 'ASR', filter_1, 'wer')
|
| 136 |
+
# else:
|
| 137 |
+
# draw('su', 'ASR', 'LibriSpeech-Test-Clean', 'wer')
|
| 138 |
|
| 139 |
|
| 140 |
## examples
|
|
|
|
| 158 |
|
| 159 |
if filter_1:
|
| 160 |
if filter_1 in binary:
|
| 161 |
+
dataset_contents(sqa_datasets[filter_1], metrics['llama3_70b_judge_binary'])
|
| 162 |
draw('su', 'SQA', filter_1, 'llama3_70b_judge_binary')
|
| 163 |
+
|
| 164 |
else:
|
| 165 |
+
dataset_contents(sqa_datasets[filter_1], metrics['llama3_70b_judge'])
|
| 166 |
draw('su', 'SQA', filter_1, 'llama3_70b_judge')
|
| 167 |
+
# else:
|
| 168 |
+
# draw('su', 'SQA', 'CN-College-Listen-Test', 'llama3_70b_judge_binary')
|
| 169 |
|
| 170 |
def si():
|
| 171 |
st.title("Speech Question Answering")
|
|
|
|
| 179 |
filter_1 = st.selectbox('Select Dataset', filters_levelone)
|
| 180 |
|
| 181 |
if filter_1:
|
| 182 |
+
dataset_contents(si_datasets[filter_1], metrics['llama3_70b_judge'])
|
| 183 |
draw('su', 'SI', filter_1, 'llama3_70b_judge')
|
| 184 |
+
# else:
|
| 185 |
+
# draw('su', 'SI', 'OpenHermes-Audio-Test', 'llama3_70b_judge')
|
| 186 |
|
| 187 |
def ac():
|
| 188 |
st.title("Audio Captioning")
|
|
|
|
| 210 |
# sorted = st.selectbox('by', ['Ascending', 'Descending'])
|
| 211 |
|
| 212 |
if filter_1 or metric:
|
| 213 |
+
dataset_contents(ac_datasets[filter_1], metrics[metric.lower().replace('-', '_')])
|
| 214 |
draw('asu', 'AC',filter_1, metric.lower().replace('-', '_'))
|
| 215 |
+
# else:
|
| 216 |
+
# draw('asu', 'AC', 'WavCaps-Test', 'llama3_70b_judge')
|
| 217 |
|
| 218 |
def asqa():
|
| 219 |
st.title("Audio Scene Question Answering")
|
|
|
|
| 228 |
filter_1 = st.selectbox('Select Dataset', filters_levelone)
|
| 229 |
|
| 230 |
if filter_1:
|
| 231 |
+
dataset_contents(asqa_datasets[filter_1], metrics['llama3_70b_judge'])
|
| 232 |
draw('asu', 'AQA',filter_1, 'llama3_70b_judge')
|
| 233 |
+
# else:
|
| 234 |
+
# draw('asu', 'AQA', 'Clotho-AQA-Test', 'llama3_70b_judge')
|
| 235 |
|
| 236 |
def er():
|
| 237 |
st.title("Emotion Recognition")
|
|
|
|
| 239 |
filters_levelone = ['IEMOCAP-Emotion-Test',
|
| 240 |
'MELD-Sentiment-Test',
|
| 241 |
'MELD-Emotion-Test']
|
| 242 |
+
# sort_leveltwo = []
|
| 243 |
|
| 244 |
left, center, _, middle,right = st.columns([0.2, 0.2, 0.2, 0.2 ,0.2])
|
| 245 |
|
|
|
|
| 262 |
# sorted = st.selectbox('by', ['Ascending', 'Descending'])
|
| 263 |
|
| 264 |
if filter_1:
|
| 265 |
+
dataset_contents(er_datasets[filter_1], metrics['llama3_70b_judge_binary'])
|
| 266 |
draw('vu', 'ER', filter_1, 'llama3_70b_judge_binary')
|
| 267 |
+
# else:
|
| 268 |
+
# draw('vu', 'ER', 'IEMOCAP-Emotion-Test', 'llama3_70b_judge_binary')
|
| 269 |
|
| 270 |
def ar():
|
| 271 |
st.title("Accent Recognition")
|
|
|
|
| 279 |
|
| 280 |
|
| 281 |
if filter_1:
|
| 282 |
+
dataset_contents(ar_datsets[filter_1], metrics['llama3_70b_judge'])
|
| 283 |
draw('vu', 'AR', filter_1, 'llama3_70b_judge')
|
| 284 |
+
|
|
|
|
| 285 |
|
| 286 |
def gr():
|
| 287 |
st.title("Emotion Recognition")
|
|
|
|
| 295 |
filter_1 = st.selectbox('Select Dataset', filters_levelone)
|
| 296 |
|
| 297 |
if filter_1:
|
| 298 |
+
dataset_contents(gr_datasets[filter_1], metrics['llama3_70b_judge_binary'])
|
| 299 |
draw('vu', 'GR', filter_1, 'llama3_70b_judge_binary')
|
| 300 |
+
# else:
|
| 301 |
+
# draw('vu', 'GR', 'VoxCeleb1-Gender-Test', 'llama3_70b_judge_binary')
|
| 302 |
|
| 303 |
def spt():
|
| 304 |
st.title("Speech Translation")
|
|
|
|
| 316 |
filter_1 = st.selectbox('Select Dataset', filters_levelone)
|
| 317 |
|
| 318 |
if filter_1:
|
| 319 |
+
dataset_contents(spt_datasets[filter_1], metrics['bleu'])
|
| 320 |
draw('su', 'ST', filter_1, 'bleu')
|
| 321 |
+
# else:
|
| 322 |
+
# draw('su', 'ST', 'Covost2-EN-ID-test', 'bleu')
|
| 323 |
|
| 324 |
def cnasr():
|
| 325 |
st.title("Chinese Automatic Speech Recognition")
|
|
|
|
| 332 |
filter_1 = st.selectbox('Select Dataset', filters_levelone)
|
| 333 |
|
| 334 |
if filter_1:
|
| 335 |
+
dataset_contents(cnasr_datasets[filter_1], metrics['wer'])
|
| 336 |
draw('su', 'CNASR', filter_1, 'wer')
|
| 337 |
+
# else:
|
| 338 |
+
# draw('su', 'CNASR', 'Aishell-ASR-ZH-Test', 'wer')
|