diff --git a/.gitattributes b/.gitattributes index a9259ff0147e4ff3cc1677a41c974d0babde90f3..ea72b65b2004bbf03fdfc511e3568eb09e6a64d3 100644 --- a/.gitattributes +++ b/.gitattributes @@ -60,3 +60,21 @@ examples/SQA/CN-College-Listen-MCQ-Test/sample_0.wav filter=lfs diff=lfs merge=l examples/SQA/Spoken-Squad-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text examples/SQA/Spoken-Squad-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text examples/SQA/Spoken-Squad-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/2ASR/Earnings21-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/2ASR/Earnings21-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text +examples/2ASR/Earnings21-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/2ASR/Earnings22-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/2ASR/Earnings22-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text +examples/2ASR/Earnings22-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/2ASR/Tedlium3-Long-form-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/2ASR/Tedlium3-Long-form-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text +examples/2ASR/Tedlium3-Long-form-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/2SQA/CN-College-Listen-MCQ-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/2SQA/Public-SG-Speech-QA-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/2SQA/SLUE-P2-SQA5-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/2SQA/SLUE-P2-SQA5-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text +examples/2SQA/SLUE-P2-SQA5-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/2SQA/Spoken-Squad-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/2SQA/Spoken-Squad-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text +examples/2SQA/Spoken-Squad-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text diff --git a/examples/2AC/AudioCaps-Test/dataset_info.json b/examples/2AC/AudioCaps-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..82148686a795bb258e6676260855fb8cf9ef19e4 --- /dev/null +++ b/examples/2AC/AudioCaps-Test/dataset_info.json @@ -0,0 +1,160 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "audiocap_id": { + "dtype": "string", + "_type": "Value" + }, + "start_time": { + "dtype": "string", + "_type": "Value" + }, + "youtube_id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2AC/AudioCaps-Test/sample_0.wav b/examples/2AC/AudioCaps-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..4b2f8047fa38f9ba3acef7485b26ea02f4ada359 Binary files /dev/null and b/examples/2AC/AudioCaps-Test/sample_0.wav differ diff --git a/examples/2AC/AudioCaps-Test/sample_1.wav b/examples/2AC/AudioCaps-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..8724df3f01ccd22778f84a7a851871f1d73434fe Binary files /dev/null and b/examples/2AC/AudioCaps-Test/sample_1.wav differ diff --git a/examples/2AC/AudioCaps-Test/sample_2.wav b/examples/2AC/AudioCaps-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..48d08b19be5f0904ca976c35fbe4ae4d6c19435f Binary files /dev/null and b/examples/2AC/AudioCaps-Test/sample_2.wav differ diff --git a/examples/2AC/AudioCaps-Test/state.json b/examples/2AC/AudioCaps-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..fcc438d7c2db870c1b636d8299a9d843d607fbc6 --- /dev/null +++ b/examples/2AC/AudioCaps-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "e736bf1821a473f3", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2AC/WavCaps-Test/dataset_info.json b/examples/2AC/WavCaps-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..acf11db0c4cc1869f7763270ddadbfe4c30f73d4 --- /dev/null +++ b/examples/2AC/WavCaps-Test/dataset_info.json @@ -0,0 +1,156 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "audio_path": { + "dtype": "string", + "_type": "Value" + }, + "duration": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2AC/WavCaps-Test/sample_0.wav b/examples/2AC/WavCaps-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..ad8d45455c35860d7309e0554c6610ba6ddccb68 Binary files /dev/null and b/examples/2AC/WavCaps-Test/sample_0.wav differ diff --git a/examples/2AC/WavCaps-Test/sample_1.wav b/examples/2AC/WavCaps-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..ea10461620e829d47fb78bf4d827b95322791340 Binary files /dev/null and b/examples/2AC/WavCaps-Test/sample_1.wav differ diff --git a/examples/2AC/WavCaps-Test/sample_2.wav b/examples/2AC/WavCaps-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..b7fef91fbedf60a5d58f4f9fb93d95c1e205bf67 Binary files /dev/null and b/examples/2AC/WavCaps-Test/sample_2.wav differ diff --git a/examples/2AC/WavCaps-Test/state.json b/examples/2AC/WavCaps-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..0d52b23c8e5b93506a8af809adf9680c9cc7bf86 --- /dev/null +++ b/examples/2AC/WavCaps-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "742ab313af054565", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2AQA/AudioCaps-QA-Test/dataset_info.json b/examples/2AQA/AudioCaps-QA-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..c6d61c8e72325cf36fabc952fbec1ca42e49e5e8 --- /dev/null +++ b/examples/2AQA/AudioCaps-QA-Test/dataset_info.json @@ -0,0 +1,164 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "audiocap_id": { + "dtype": "string", + "_type": "Value" + }, + "caption": { + "dtype": "string", + "_type": "Value" + }, + "start_time": { + "dtype": "string", + "_type": "Value" + }, + "youtube_id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2AQA/AudioCaps-QA-Test/sample_0.wav b/examples/2AQA/AudioCaps-QA-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..2eec3251fe8dc9acf17f43f66f187a277cf6c6b0 Binary files /dev/null and b/examples/2AQA/AudioCaps-QA-Test/sample_0.wav differ diff --git a/examples/2AQA/AudioCaps-QA-Test/sample_1.wav b/examples/2AQA/AudioCaps-QA-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..f7e101c5918451111738962b722e47041dd59227 Binary files /dev/null and b/examples/2AQA/AudioCaps-QA-Test/sample_1.wav differ diff --git a/examples/2AQA/AudioCaps-QA-Test/sample_2.wav b/examples/2AQA/AudioCaps-QA-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..cb15b2ffff83c6ec5541c8b54a8205d58292a2d3 Binary files /dev/null and b/examples/2AQA/AudioCaps-QA-Test/sample_2.wav differ diff --git a/examples/2AQA/AudioCaps-QA-Test/state.json b/examples/2AQA/AudioCaps-QA-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..267d6cbee3f52f7b8f77f20b959ca9ce159aed16 --- /dev/null +++ b/examples/2AQA/AudioCaps-QA-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "52bc1dfcaf2a0f4b", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2AQA/Clotho-AQA-Test/dataset_info.json b/examples/2AQA/Clotho-AQA-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..1b05abb4dce6b496c0a3c6043f27e4ca1f225320 --- /dev/null +++ b/examples/2AQA/Clotho-AQA-Test/dataset_info.json @@ -0,0 +1,147 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": {}, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2AQA/Clotho-AQA-Test/sample_0.wav b/examples/2AQA/Clotho-AQA-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..d6a07e6172778d85080c04531658efc7443ae03d Binary files /dev/null and b/examples/2AQA/Clotho-AQA-Test/sample_0.wav differ diff --git a/examples/2AQA/Clotho-AQA-Test/sample_1.wav b/examples/2AQA/Clotho-AQA-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..139bd3226ca457718b3cdab6d1e7a99dd5e4bd01 Binary files /dev/null and b/examples/2AQA/Clotho-AQA-Test/sample_1.wav differ diff --git a/examples/2AQA/Clotho-AQA-Test/sample_2.wav b/examples/2AQA/Clotho-AQA-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..b62a8feb71883e7540f521d41ac6e3eefe3862a3 Binary files /dev/null and b/examples/2AQA/Clotho-AQA-Test/sample_2.wav differ diff --git a/examples/2AQA/Clotho-AQA-Test/state.json b/examples/2AQA/Clotho-AQA-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..c58228cce70f0b257254856751c37d68dd8cd64f --- /dev/null +++ b/examples/2AQA/Clotho-AQA-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "e2e76326f448d7c4", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2AQA/WavCaps-QA-Test/dataset_info.json b/examples/2AQA/WavCaps-QA-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..72ceb742ffcaf0f6ff67811fa628b1e1c7a1167e --- /dev/null +++ b/examples/2AQA/WavCaps-QA-Test/dataset_info.json @@ -0,0 +1,160 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "audio_path": { + "dtype": "string", + "_type": "Value" + }, + "caption": { + "dtype": "string", + "_type": "Value" + }, + "duration": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2AQA/WavCaps-QA-Test/sample_0.wav b/examples/2AQA/WavCaps-QA-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..7639cdca2866a648ce90b4f5e385e3e6dc56c04a Binary files /dev/null and b/examples/2AQA/WavCaps-QA-Test/sample_0.wav differ diff --git a/examples/2AQA/WavCaps-QA-Test/sample_1.wav b/examples/2AQA/WavCaps-QA-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..d2cc1a6def6014328e02ea5ea25019414f8960b4 Binary files /dev/null and b/examples/2AQA/WavCaps-QA-Test/sample_1.wav differ diff --git a/examples/2AQA/WavCaps-QA-Test/sample_2.wav b/examples/2AQA/WavCaps-QA-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..9629f69dd6d6b8a713b9122b03ee04ec4aae8857 Binary files /dev/null and b/examples/2AQA/WavCaps-QA-Test/sample_2.wav differ diff --git a/examples/2AQA/WavCaps-QA-Test/state.json b/examples/2AQA/WavCaps-QA-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..a85966493650dfae84811c91b9f42d7c2811ad9c --- /dev/null +++ b/examples/2AQA/WavCaps-QA-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "40995a6cc1fe3dc7", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2AR/VoxCeleb-Accent-Test/dataset_info.json b/examples/2AR/VoxCeleb-Accent-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..913e85b36737f9004f81286043e7493d6f61b737 --- /dev/null +++ b/examples/2AR/VoxCeleb-Accent-Test/dataset_info.json @@ -0,0 +1,168 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Gender": { + "dtype": "string", + "_type": "Value" + }, + "Nationality": { + "dtype": "string", + "_type": "Value" + }, + "VGGFace1 ID": { + "dtype": "string", + "_type": "Value" + }, + "VoxCeleb1 ID": { + "dtype": "string", + "_type": "Value" + }, + "index": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2AR/VoxCeleb-Accent-Test/sample_0.wav b/examples/2AR/VoxCeleb-Accent-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..ae8061522976216887910263ba9414a7e60685eb Binary files /dev/null and b/examples/2AR/VoxCeleb-Accent-Test/sample_0.wav differ diff --git a/examples/2AR/VoxCeleb-Accent-Test/sample_1.wav b/examples/2AR/VoxCeleb-Accent-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..209cf78dcde2791b463e32f9a7245514655790aa Binary files /dev/null and b/examples/2AR/VoxCeleb-Accent-Test/sample_1.wav differ diff --git a/examples/2AR/VoxCeleb-Accent-Test/sample_2.wav b/examples/2AR/VoxCeleb-Accent-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..0d44634575f30bfe9c2fa2c2765ae34c192c9c90 Binary files /dev/null and b/examples/2AR/VoxCeleb-Accent-Test/sample_2.wav differ diff --git a/examples/2AR/VoxCeleb-Accent-Test/state.json b/examples/2AR/VoxCeleb-Accent-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..5354f99a49cf31f0949fd2359846d46859efd5ff --- /dev/null +++ b/examples/2AR/VoxCeleb-Accent-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "fa91a59f90c22c3c", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/Common-Voice-15-En-Test/dataset_info.json b/examples/2ASR/Common-Voice-15-En-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..08f8bd6abcb7df02ab18d592990cc082baa8bfa3 --- /dev/null +++ b/examples/2ASR/Common-Voice-15-En-Test/dataset_info.json @@ -0,0 +1,188 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "accents": { + "dtype": "null", + "_type": "Value" + }, + "age": { + "dtype": "null", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "down_votes": { + "dtype": "int64", + "_type": "Value" + }, + "gender": { + "dtype": "null", + "_type": "Value" + }, + "language": { + "dtype": "string", + "_type": "Value" + }, + "locale": { + "dtype": "string", + "_type": "Value" + }, + "segment": { + "dtype": "null", + "_type": "Value" + }, + "up_votes": { + "dtype": "int64", + "_type": "Value" + }, + "variant": { + "dtype": "null", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/Common-Voice-15-En-Test/sample_0.wav b/examples/2ASR/Common-Voice-15-En-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..42753b756f05c733803356b486de2df1b1224de4 Binary files /dev/null and b/examples/2ASR/Common-Voice-15-En-Test/sample_0.wav differ diff --git a/examples/2ASR/Common-Voice-15-En-Test/sample_1.wav b/examples/2ASR/Common-Voice-15-En-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..643e1d9e9e461c2465856a18fbf89bb27f577a18 Binary files /dev/null and b/examples/2ASR/Common-Voice-15-En-Test/sample_1.wav differ diff --git a/examples/2ASR/Common-Voice-15-En-Test/sample_2.wav b/examples/2ASR/Common-Voice-15-En-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..eb0894ce127ebe7c2fadb5b11feea3e5b0ace14f Binary files /dev/null and b/examples/2ASR/Common-Voice-15-En-Test/sample_2.wav differ diff --git a/examples/2ASR/Common-Voice-15-En-Test/state.json b/examples/2ASR/Common-Voice-15-En-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..1ff74abf94d1cbf9804c3911eac7edf199fb36a2 --- /dev/null +++ b/examples/2ASR/Common-Voice-15-En-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "468db91ad949e4d4", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/Earnings21-Test/dataset_info.json b/examples/2ASR/Earnings21-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..1bf27e3aaa89f2fa43812252ac2377fab8ae1708 --- /dev/null +++ b/examples/2ASR/Earnings21-Test/dataset_info.json @@ -0,0 +1,152 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/Earnings21-Test/state.json b/examples/2ASR/Earnings21-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..ce1987be7235db123fcdddfe4a75272abf7513d4 --- /dev/null +++ b/examples/2ASR/Earnings21-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "8cc0ad99446f1aba", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/Earnings22-Test/dataset_info.json b/examples/2ASR/Earnings22-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..1bf27e3aaa89f2fa43812252ac2377fab8ae1708 --- /dev/null +++ b/examples/2ASR/Earnings22-Test/dataset_info.json @@ -0,0 +1,152 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/Earnings22-Test/state.json b/examples/2ASR/Earnings22-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..ac26bdba683e345445fdeb81908f03626f2ced73 --- /dev/null +++ b/examples/2ASR/Earnings22-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "331c061bce6e651c", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/GigaSpeech-Test2/dataset_info.json b/examples/2ASR/GigaSpeech-Test2/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..1dd0025578e934e74b979da9e81789eedd9a2f29 --- /dev/null +++ b/examples/2ASR/GigaSpeech-Test2/dataset_info.json @@ -0,0 +1,180 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "audio_id": { + "dtype": "string", + "_type": "Value" + }, + "begin_time": { + "dtype": "float64", + "_type": "Value" + }, + "category": { + "dtype": "int64", + "_type": "Value" + }, + "end_time": { + "dtype": "float64", + "_type": "Value" + }, + "segment_id": { + "dtype": "string", + "_type": "Value" + }, + "source": { + "dtype": "int64", + "_type": "Value" + }, + "speaker": { + "dtype": "string", + "_type": "Value" + }, + "url": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/GigaSpeech-Test2/sample_0.wav b/examples/2ASR/GigaSpeech-Test2/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..0d232b1996ee07bc47f24d06fe8b860ee1b63b11 Binary files /dev/null and b/examples/2ASR/GigaSpeech-Test2/sample_0.wav differ diff --git a/examples/2ASR/GigaSpeech-Test2/sample_1.wav b/examples/2ASR/GigaSpeech-Test2/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..ee5fced84fd0691c2b4b288be9a5ad73ac67bc6e Binary files /dev/null and b/examples/2ASR/GigaSpeech-Test2/sample_1.wav differ diff --git a/examples/2ASR/GigaSpeech-Test2/sample_2.wav b/examples/2ASR/GigaSpeech-Test2/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..2a9edb2ea3e8a96c7ab70b232249205e765f02d0 Binary files /dev/null and b/examples/2ASR/GigaSpeech-Test2/sample_2.wav differ diff --git a/examples/2ASR/GigaSpeech-Test2/state.json b/examples/2ASR/GigaSpeech-Test2/state.json new file mode 100644 index 0000000000000000000000000000000000000000..8bd5fd3d45201fc6807cb2364c48d5ba722bb4bf --- /dev/null +++ b/examples/2ASR/GigaSpeech-Test2/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "68d371cc267ff1d2", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/IMDA-Part1-ASR-Test/dataset_info.json b/examples/2ASR/IMDA-Part1-ASR-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..ede02d74b595679e2a6f3b2256ab4c69e535f09e --- /dev/null +++ b/examples/2ASR/IMDA-Part1-ASR-Test/dataset_info.json @@ -0,0 +1,200 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "conversation_id": { + "dtype": "string", + "_type": "Value" + }, + "partition": { + "dtype": "string", + "_type": "Value" + }, + "settings": { + "channel": { + "dtype": "string", + "_type": "Value" + }, + "session": { + "dtype": "string", + "_type": "Value" + } + }, + "speaker": { + "device_c0": { + "dtype": "string", + "_type": "Value" + }, + "device_c1": { + "dtype": "string", + "_type": "Value" + }, + "device_c2": { + "dtype": "string", + "_type": "Value" + }, + "ethnic_group": { + "dtype": "string", + "_type": "Value" + }, + "gender": { + "dtype": "string", + "_type": "Value" + }, + "part1_id": { + "dtype": "string", + "_type": "Value" + }, + "part2_id": { + "dtype": "string", + "_type": "Value" + }, + "speaker_id": { + "dtype": "string", + "_type": "Value" + } + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/IMDA-Part1-ASR-Test/sample_0.wav b/examples/2ASR/IMDA-Part1-ASR-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..b100f0ace853e729d509b43b5b9e00601dbbc4e8 Binary files /dev/null and b/examples/2ASR/IMDA-Part1-ASR-Test/sample_0.wav differ diff --git a/examples/2ASR/IMDA-Part1-ASR-Test/sample_1.wav b/examples/2ASR/IMDA-Part1-ASR-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..adbeb1917d8397ebb3ab9b6b216c607bfe62e881 Binary files /dev/null and b/examples/2ASR/IMDA-Part1-ASR-Test/sample_1.wav differ diff --git a/examples/2ASR/IMDA-Part1-ASR-Test/sample_2.wav b/examples/2ASR/IMDA-Part1-ASR-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..4f4800df16ae7a179504dcfe0c30468936287ae4 Binary files /dev/null and b/examples/2ASR/IMDA-Part1-ASR-Test/sample_2.wav differ diff --git a/examples/2ASR/IMDA-Part1-ASR-Test/state.json b/examples/2ASR/IMDA-Part1-ASR-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..03121b2bd71d513ba2edeec803b648f34087ad6e --- /dev/null +++ b/examples/2ASR/IMDA-Part1-ASR-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "ddfb47abed13c356", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/IMDA-Part2-ASR-Test2/dataset_info.json b/examples/2ASR/IMDA-Part2-ASR-Test2/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..af0260da818c604e190e3b198aebef1a391419ec --- /dev/null +++ b/examples/2ASR/IMDA-Part2-ASR-Test2/dataset_info.json @@ -0,0 +1,92 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "conversation_id": { + "dtype": "string", + "_type": "Value" + }, + "partition": { + "dtype": "string", + "_type": "Value" + }, + "settings": { + "channel": { + "dtype": "string", + "_type": "Value" + }, + "session": { + "dtype": "string", + "_type": "Value" + } + }, + "speaker": { + "device_c0": { + "dtype": "string", + "_type": "Value" + }, + "device_c1": { + "dtype": "string", + "_type": "Value" + }, + "device_c2": { + "dtype": "string", + "_type": "Value" + }, + "ethnic_group": { + "dtype": "string", + "_type": "Value" + }, + "gender": { + "dtype": "string", + "_type": "Value" + }, + "part1_id": { + "dtype": "string", + "_type": "Value" + }, + "part2_id": { + "dtype": "string", + "_type": "Value" + }, + "speaker_id": { + "dtype": "string", + "_type": "Value" + } + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/IMDA-Part2-ASR-Test2/sample_0.wav b/examples/2ASR/IMDA-Part2-ASR-Test2/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..d79fde19cec7b066b9f3f546d6ff6366dfa4daef Binary files /dev/null and b/examples/2ASR/IMDA-Part2-ASR-Test2/sample_0.wav differ diff --git a/examples/2ASR/IMDA-Part2-ASR-Test2/sample_1.wav b/examples/2ASR/IMDA-Part2-ASR-Test2/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..9fac54a02b8b9ad1ee6502ef41ef1d50a7213de5 Binary files /dev/null and b/examples/2ASR/IMDA-Part2-ASR-Test2/sample_1.wav differ diff --git a/examples/2ASR/IMDA-Part2-ASR-Test2/sample_2.wav b/examples/2ASR/IMDA-Part2-ASR-Test2/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..d866ad7a1ddfc3541ce3adbecf00f688489cdd3f Binary files /dev/null and b/examples/2ASR/IMDA-Part2-ASR-Test2/sample_2.wav differ diff --git a/examples/2ASR/IMDA-Part2-ASR-Test2/state.json b/examples/2ASR/IMDA-Part2-ASR-Test2/state.json new file mode 100644 index 0000000000000000000000000000000000000000..3c6b051944e6842913bce863ca9c239da3f8e0be --- /dev/null +++ b/examples/2ASR/IMDA-Part2-ASR-Test2/state.json @@ -0,0 +1,18 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "9f3d440792a605d2", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/LibriSpeech-Test-Clean/dataset_info.json b/examples/2ASR/LibriSpeech-Test-Clean/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..5a91f71cfb2044e6060c8f395ee4b798384d32d8 --- /dev/null +++ b/examples/2ASR/LibriSpeech-Test-Clean/dataset_info.json @@ -0,0 +1,164 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "id": { + "dtype": "string", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/LibriSpeech-Test-Clean/sample_0.wav b/examples/2ASR/LibriSpeech-Test-Clean/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..051a14c05f7c270da3d842024c5936075cb5c2e6 Binary files /dev/null and b/examples/2ASR/LibriSpeech-Test-Clean/sample_0.wav differ diff --git a/examples/2ASR/LibriSpeech-Test-Clean/sample_1.wav b/examples/2ASR/LibriSpeech-Test-Clean/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..30816d1c205dd136109c6abfca19abf249813c68 Binary files /dev/null and b/examples/2ASR/LibriSpeech-Test-Clean/sample_1.wav differ diff --git a/examples/2ASR/LibriSpeech-Test-Clean/sample_2.wav b/examples/2ASR/LibriSpeech-Test-Clean/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..a228ce116181b6b19b741cd9ffc2e1853704adc5 Binary files /dev/null and b/examples/2ASR/LibriSpeech-Test-Clean/sample_2.wav differ diff --git a/examples/2ASR/LibriSpeech-Test-Clean/state.json b/examples/2ASR/LibriSpeech-Test-Clean/state.json new file mode 100644 index 0000000000000000000000000000000000000000..dda0f24f40bffbb5dbb1f236bc44f6e715655e67 --- /dev/null +++ b/examples/2ASR/LibriSpeech-Test-Clean/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "d9f5d173c305ae96", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/LibriSpeech-Test-Other/dataset_info.json b/examples/2ASR/LibriSpeech-Test-Other/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..5a91f71cfb2044e6060c8f395ee4b798384d32d8 --- /dev/null +++ b/examples/2ASR/LibriSpeech-Test-Other/dataset_info.json @@ -0,0 +1,164 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "id": { + "dtype": "string", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/LibriSpeech-Test-Other/sample_0.wav b/examples/2ASR/LibriSpeech-Test-Other/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..c274d02bd3392db09ea1a95bb050a248627b91cc Binary files /dev/null and b/examples/2ASR/LibriSpeech-Test-Other/sample_0.wav differ diff --git a/examples/2ASR/LibriSpeech-Test-Other/sample_1.wav b/examples/2ASR/LibriSpeech-Test-Other/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..eea20cd08457ea3ae2d55e91c7240602ae30436f Binary files /dev/null and b/examples/2ASR/LibriSpeech-Test-Other/sample_1.wav differ diff --git a/examples/2ASR/LibriSpeech-Test-Other/sample_2.wav b/examples/2ASR/LibriSpeech-Test-Other/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..b7cc7a7ab0cb2cb02a5253dcecede16c688acf86 Binary files /dev/null and b/examples/2ASR/LibriSpeech-Test-Other/sample_2.wav differ diff --git a/examples/2ASR/LibriSpeech-Test-Other/state.json b/examples/2ASR/LibriSpeech-Test-Other/state.json new file mode 100644 index 0000000000000000000000000000000000000000..952838bee518f052b1de767f99c58c1282b17596 --- /dev/null +++ b/examples/2ASR/LibriSpeech-Test-Other/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "8bd0648dc412be04", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/Peoples-Speech-Test/dataset_info.json b/examples/2ASR/Peoples-Speech-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..a8d496de62c861269b2a5c8bf9826fedc8abf807 --- /dev/null +++ b/examples/2ASR/Peoples-Speech-Test/dataset_info.json @@ -0,0 +1,156 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "duration_ms": { + "dtype": "int64", + "_type": "Value" + }, + "id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/Peoples-Speech-Test/sample_0.wav b/examples/2ASR/Peoples-Speech-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..ff25cea2d23ec61f9199873fc67227221c1bebca Binary files /dev/null and b/examples/2ASR/Peoples-Speech-Test/sample_0.wav differ diff --git a/examples/2ASR/Peoples-Speech-Test/sample_1.wav b/examples/2ASR/Peoples-Speech-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..4e7ddfc1977129dfb02d6c4424b362bcd60c1a15 Binary files /dev/null and b/examples/2ASR/Peoples-Speech-Test/sample_1.wav differ diff --git a/examples/2ASR/Peoples-Speech-Test/sample_2.wav b/examples/2ASR/Peoples-Speech-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..9f590a810351e28a60b5da87f6821f06f8916790 Binary files /dev/null and b/examples/2ASR/Peoples-Speech-Test/sample_2.wav differ diff --git a/examples/2ASR/Peoples-Speech-Test/state.json b/examples/2ASR/Peoples-Speech-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..60db649d02fbab6497da719d1c536be91f9bda39 --- /dev/null +++ b/examples/2ASR/Peoples-Speech-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "de704174c1b2e1ea", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/Tedlium3-Long-form-Test/dataset_info.json b/examples/2ASR/Tedlium3-Long-form-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..55009f0312ac6d6605288017abbf50e0bafefdc3 --- /dev/null +++ b/examples/2ASR/Tedlium3-Long-form-Test/dataset_info.json @@ -0,0 +1,56 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "file": { + "dtype": "string", + "_type": "Value" + }, + "gender": { + "dtype": "int64", + "_type": "Value" + }, + "id": { + "dtype": "string", + "_type": "Value" + }, + "speaker_id": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/Tedlium3-Long-form-Test/state.json b/examples/2ASR/Tedlium3-Long-form-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..3a56b9cee5a756794d32d1b5f03c01a501c41606 --- /dev/null +++ b/examples/2ASR/Tedlium3-Long-form-Test/state.json @@ -0,0 +1,18 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "f95b9bf4e3dea7c1", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ASR/Tedlium3-Test/dataset_info.json b/examples/2ASR/Tedlium3-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..55009f0312ac6d6605288017abbf50e0bafefdc3 --- /dev/null +++ b/examples/2ASR/Tedlium3-Test/dataset_info.json @@ -0,0 +1,56 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "file": { + "dtype": "string", + "_type": "Value" + }, + "gender": { + "dtype": "int64", + "_type": "Value" + }, + "id": { + "dtype": "string", + "_type": "Value" + }, + "speaker_id": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ASR/Tedlium3-Test/sample_0.wav b/examples/2ASR/Tedlium3-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..a07fc005b1f77a01b066c0ef962b04e634f4c356 Binary files /dev/null and b/examples/2ASR/Tedlium3-Test/sample_0.wav differ diff --git a/examples/2ASR/Tedlium3-Test/sample_1.wav b/examples/2ASR/Tedlium3-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..f864baa998ef015b529fc69d8bccca6f284233f1 Binary files /dev/null and b/examples/2ASR/Tedlium3-Test/sample_1.wav differ diff --git a/examples/2ASR/Tedlium3-Test/sample_2.wav b/examples/2ASR/Tedlium3-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..66c6482678614fbd0c658553c4c727a50d80c57a Binary files /dev/null and b/examples/2ASR/Tedlium3-Test/sample_2.wav differ diff --git a/examples/2ASR/Tedlium3-Test/state.json b/examples/2ASR/Tedlium3-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..ffb37e795661eaf0f656a4272372d0919a492fe0 --- /dev/null +++ b/examples/2ASR/Tedlium3-Test/state.json @@ -0,0 +1,18 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "fb20b90d5641df89", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2CNASR/Aishell-ASR-ZH-Test/dataset_info.json b/examples/2CNASR/Aishell-ASR-ZH-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..051243e1e6b3046a83599b80eb901679ff2608d8 --- /dev/null +++ b/examples/2CNASR/Aishell-ASR-ZH-Test/dataset_info.json @@ -0,0 +1,144 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "id": { + "dtype": "string", + "_type": "Value" + }, + "speaker": { + "gender": { + "dtype": "string", + "_type": "Value" + }, + "speaker_id": { + "dtype": "string", + "_type": "Value" + } + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2CNASR/Aishell-ASR-ZH-Test/sample_0.wav b/examples/2CNASR/Aishell-ASR-ZH-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..a69d64c8284caa8ca7ef3f5ecaf6ebc0519020ef Binary files /dev/null and b/examples/2CNASR/Aishell-ASR-ZH-Test/sample_0.wav differ diff --git a/examples/2CNASR/Aishell-ASR-ZH-Test/sample_1.wav b/examples/2CNASR/Aishell-ASR-ZH-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..a149a1ab68e19b0029225518d217608c573d37e2 Binary files /dev/null and b/examples/2CNASR/Aishell-ASR-ZH-Test/sample_1.wav differ diff --git a/examples/2CNASR/Aishell-ASR-ZH-Test/sample_2.wav b/examples/2CNASR/Aishell-ASR-ZH-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..b2816f1cb90418b611227d1e6d044e886712b426 Binary files /dev/null and b/examples/2CNASR/Aishell-ASR-ZH-Test/sample_2.wav differ diff --git a/examples/2CNASR/Aishell-ASR-ZH-Test/state.json b/examples/2CNASR/Aishell-ASR-ZH-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..4495f512dec2e1549747a3cd9e31137e0afb8081 --- /dev/null +++ b/examples/2CNASR/Aishell-ASR-ZH-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "f9833c929864587b", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ER/IEMOCAP-Emotion-Test/dataset_info.json b/examples/2ER/IEMOCAP-Emotion-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..ae1585a301a57eef40e3c39259d56a7e70e2be43 --- /dev/null +++ b/examples/2ER/IEMOCAP-Emotion-Test/dataset_info.json @@ -0,0 +1,168 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Audio ID": { + "dtype": "string", + "_type": "Value" + }, + "Emotion": { + "dtype": "int64", + "_type": "Value" + }, + "End Time": { + "dtype": "float64", + "_type": "Value" + }, + "Gender": { + "dtype": "int64", + "_type": "Value" + }, + "Start Time": { + "dtype": "float64", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ER/IEMOCAP-Emotion-Test/sample_0.wav b/examples/2ER/IEMOCAP-Emotion-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..3aea288a199cf828777f07051ce17bb65dd122b9 Binary files /dev/null and b/examples/2ER/IEMOCAP-Emotion-Test/sample_0.wav differ diff --git a/examples/2ER/IEMOCAP-Emotion-Test/sample_1.wav b/examples/2ER/IEMOCAP-Emotion-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..f20cf5efc4a86d62d733d80fc2cde556ea107245 Binary files /dev/null and b/examples/2ER/IEMOCAP-Emotion-Test/sample_1.wav differ diff --git a/examples/2ER/IEMOCAP-Emotion-Test/sample_2.wav b/examples/2ER/IEMOCAP-Emotion-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..6085d420403bb54190cde8d1cffef75b35f2fa88 Binary files /dev/null and b/examples/2ER/IEMOCAP-Emotion-Test/sample_2.wav differ diff --git a/examples/2ER/IEMOCAP-Emotion-Test/state.json b/examples/2ER/IEMOCAP-Emotion-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..f9e210ccb738232f4a7ce004649cc0811b5622e7 --- /dev/null +++ b/examples/2ER/IEMOCAP-Emotion-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "78bf80b897adbddb", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ER/MELD-Emotion-Test/dataset_info.json b/examples/2ER/MELD-Emotion-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..506cf34c0d7158134159a3234f9f98b8e6b74f28 --- /dev/null +++ b/examples/2ER/MELD-Emotion-Test/dataset_info.json @@ -0,0 +1,184 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Audio ID": { + "dtype": "string", + "_type": "Value" + }, + "Emotion": { + "dtype": "int64", + "_type": "Value" + }, + "EndTime": { + "dtype": "string", + "_type": "Value" + }, + "Episode": { + "dtype": "int64", + "_type": "Value" + }, + "Gender": { + "dtype": "null", + "_type": "Value" + }, + "Season": { + "dtype": "int64", + "_type": "Value" + }, + "Sentiment": { + "dtype": "int64", + "_type": "Value" + }, + "Speaker": { + "dtype": "string", + "_type": "Value" + }, + "StartTime": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ER/MELD-Emotion-Test/sample_0.wav b/examples/2ER/MELD-Emotion-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..819fccfa77653af1d839db36a4d89d6c5073676d Binary files /dev/null and b/examples/2ER/MELD-Emotion-Test/sample_0.wav differ diff --git a/examples/2ER/MELD-Emotion-Test/sample_1.wav b/examples/2ER/MELD-Emotion-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..a21acc5a4a0831f75c28e76a93e0339f98a5dab9 Binary files /dev/null and b/examples/2ER/MELD-Emotion-Test/sample_1.wav differ diff --git a/examples/2ER/MELD-Emotion-Test/sample_2.wav b/examples/2ER/MELD-Emotion-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..98c1a4ec43768374663eb5cae1305480110d87e8 Binary files /dev/null and b/examples/2ER/MELD-Emotion-Test/sample_2.wav differ diff --git a/examples/2ER/MELD-Emotion-Test/state.json b/examples/2ER/MELD-Emotion-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..9c219f8e85ce34e78d9b81244ed1bd84435ebb4c --- /dev/null +++ b/examples/2ER/MELD-Emotion-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "a5a596edab97a213", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ER/MELD-Sentiment-Test/dataset_info.json b/examples/2ER/MELD-Sentiment-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..506cf34c0d7158134159a3234f9f98b8e6b74f28 --- /dev/null +++ b/examples/2ER/MELD-Sentiment-Test/dataset_info.json @@ -0,0 +1,184 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Audio ID": { + "dtype": "string", + "_type": "Value" + }, + "Emotion": { + "dtype": "int64", + "_type": "Value" + }, + "EndTime": { + "dtype": "string", + "_type": "Value" + }, + "Episode": { + "dtype": "int64", + "_type": "Value" + }, + "Gender": { + "dtype": "null", + "_type": "Value" + }, + "Season": { + "dtype": "int64", + "_type": "Value" + }, + "Sentiment": { + "dtype": "int64", + "_type": "Value" + }, + "Speaker": { + "dtype": "string", + "_type": "Value" + }, + "StartTime": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ER/MELD-Sentiment-Test/sample_0.wav b/examples/2ER/MELD-Sentiment-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..025684f18597120ad16569fb77cc1700b855f6c9 Binary files /dev/null and b/examples/2ER/MELD-Sentiment-Test/sample_0.wav differ diff --git a/examples/2ER/MELD-Sentiment-Test/sample_1.wav b/examples/2ER/MELD-Sentiment-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..8d64fff1bab98e316eec6c61842e48f7b8f333c4 Binary files /dev/null and b/examples/2ER/MELD-Sentiment-Test/sample_1.wav differ diff --git a/examples/2ER/MELD-Sentiment-Test/sample_2.wav b/examples/2ER/MELD-Sentiment-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..f639c22e1b1e24a5d6db354da2e8cf424cdb45be Binary files /dev/null and b/examples/2ER/MELD-Sentiment-Test/sample_2.wav differ diff --git a/examples/2ER/MELD-Sentiment-Test/state.json b/examples/2ER/MELD-Sentiment-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..917ccc849fc69d74df055b821dd46d31b29e2e8d --- /dev/null +++ b/examples/2ER/MELD-Sentiment-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "a052e830551840d2", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2GR/IEMOCAP-Gender-Test/dataset_info.json b/examples/2GR/IEMOCAP-Gender-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..ae1585a301a57eef40e3c39259d56a7e70e2be43 --- /dev/null +++ b/examples/2GR/IEMOCAP-Gender-Test/dataset_info.json @@ -0,0 +1,168 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Audio ID": { + "dtype": "string", + "_type": "Value" + }, + "Emotion": { + "dtype": "int64", + "_type": "Value" + }, + "End Time": { + "dtype": "float64", + "_type": "Value" + }, + "Gender": { + "dtype": "int64", + "_type": "Value" + }, + "Start Time": { + "dtype": "float64", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2GR/IEMOCAP-Gender-Test/sample_0.wav b/examples/2GR/IEMOCAP-Gender-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..0f29a85f3b63eb74da8cd026aab5aa13498e0125 Binary files /dev/null and b/examples/2GR/IEMOCAP-Gender-Test/sample_0.wav differ diff --git a/examples/2GR/IEMOCAP-Gender-Test/sample_1.wav b/examples/2GR/IEMOCAP-Gender-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..56cec7eeb6836d092e76201787aa22b9436c13f2 Binary files /dev/null and b/examples/2GR/IEMOCAP-Gender-Test/sample_1.wav differ diff --git a/examples/2GR/IEMOCAP-Gender-Test/sample_2.wav b/examples/2GR/IEMOCAP-Gender-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..5c68747f7620a99d23ad13f8d2fd7386ed49332c Binary files /dev/null and b/examples/2GR/IEMOCAP-Gender-Test/sample_2.wav differ diff --git a/examples/2GR/IEMOCAP-Gender-Test/state.json b/examples/2GR/IEMOCAP-Gender-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..909b8a4a39ef78e0e286d8b51244d38d68e3aa31 --- /dev/null +++ b/examples/2GR/IEMOCAP-Gender-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "7a4eb80e3f03a3f4", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2GR/VoxCeleb-Gender-Test/dataset_info.json b/examples/2GR/VoxCeleb-Gender-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..913e85b36737f9004f81286043e7493d6f61b737 --- /dev/null +++ b/examples/2GR/VoxCeleb-Gender-Test/dataset_info.json @@ -0,0 +1,168 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Gender": { + "dtype": "string", + "_type": "Value" + }, + "Nationality": { + "dtype": "string", + "_type": "Value" + }, + "VGGFace1 ID": { + "dtype": "string", + "_type": "Value" + }, + "VoxCeleb1 ID": { + "dtype": "string", + "_type": "Value" + }, + "index": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2GR/VoxCeleb-Gender-Test/sample_0.wav b/examples/2GR/VoxCeleb-Gender-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..2f6e333859e59c14c99e08c0ed07ec34da06b7cc Binary files /dev/null and b/examples/2GR/VoxCeleb-Gender-Test/sample_0.wav differ diff --git a/examples/2GR/VoxCeleb-Gender-Test/sample_1.wav b/examples/2GR/VoxCeleb-Gender-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..cdc77dfd8a54374fbf21b32128129fde94d44c2c Binary files /dev/null and b/examples/2GR/VoxCeleb-Gender-Test/sample_1.wav differ diff --git a/examples/2GR/VoxCeleb-Gender-Test/sample_2.wav b/examples/2GR/VoxCeleb-Gender-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..8b3634c13fc5cefe14475fc673f9458aa7c79815 Binary files /dev/null and b/examples/2GR/VoxCeleb-Gender-Test/sample_2.wav differ diff --git a/examples/2GR/VoxCeleb-Gender-Test/state.json b/examples/2GR/VoxCeleb-Gender-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..d0591ec49e94a4c129e72d00c70af869214dafd8 --- /dev/null +++ b/examples/2GR/VoxCeleb-Gender-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "29964e6c779e5e22", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2SI/ALPACA-Audio-Test/dataset_info.json b/examples/2SI/ALPACA-Audio-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..882789dac9ac95ae77467581f55b4f296746e7a9 --- /dev/null +++ b/examples/2SI/ALPACA-Audio-Test/dataset_info.json @@ -0,0 +1,176 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "index": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2SI/ALPACA-Audio-Test/sample_0.wav b/examples/2SI/ALPACA-Audio-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..70248c9fd2ff3b5792099536311d05bed53fba47 Binary files /dev/null and b/examples/2SI/ALPACA-Audio-Test/sample_0.wav differ diff --git a/examples/2SI/ALPACA-Audio-Test/sample_1.wav b/examples/2SI/ALPACA-Audio-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..0fa85b0e999023c1118dd960236ae84689438330 Binary files /dev/null and b/examples/2SI/ALPACA-Audio-Test/sample_1.wav differ diff --git a/examples/2SI/ALPACA-Audio-Test/sample_2.wav b/examples/2SI/ALPACA-Audio-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..b5f187db3678a198b23df8b683f8492ae5ff60ac Binary files /dev/null and b/examples/2SI/ALPACA-Audio-Test/sample_2.wav differ diff --git a/examples/2SI/ALPACA-Audio-Test/state.json b/examples/2SI/ALPACA-Audio-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..6ba2848fa1bbfd0cf127c67ce6315770b72d590d --- /dev/null +++ b/examples/2SI/ALPACA-Audio-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "f46fe3d489641513", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2SI/OpenHermes-Audio-Test/dataset_info.json b/examples/2SI/OpenHermes-Audio-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..ac515de4ff190d0b7bd0ce417dfbe2b34d6c0bb8 --- /dev/null +++ b/examples/2SI/OpenHermes-Audio-Test/dataset_info.json @@ -0,0 +1,188 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "index": { + "dtype": "string", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2SI/OpenHermes-Audio-Test/sample_0.wav b/examples/2SI/OpenHermes-Audio-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..add65880600b56384cc5350e64c30be65d0ec19d Binary files /dev/null and b/examples/2SI/OpenHermes-Audio-Test/sample_0.wav differ diff --git a/examples/2SI/OpenHermes-Audio-Test/sample_1.wav b/examples/2SI/OpenHermes-Audio-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..8fa3cc5ffd5e8304b28e2c00f15edfda78c293b2 Binary files /dev/null and b/examples/2SI/OpenHermes-Audio-Test/sample_1.wav differ diff --git a/examples/2SI/OpenHermes-Audio-Test/sample_2.wav b/examples/2SI/OpenHermes-Audio-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..47dc6714c50f2650cb528c81edf0f18a8f3148e9 Binary files /dev/null and b/examples/2SI/OpenHermes-Audio-Test/sample_2.wav differ diff --git a/examples/2SI/OpenHermes-Audio-Test/state.json b/examples/2SI/OpenHermes-Audio-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..9a19e3c159f16d99ee14394ad0aab31b7594a7eb --- /dev/null +++ b/examples/2SI/OpenHermes-Audio-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "bec0fd435c621121", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2SQA/CN-College-Listen-MCQ-Test/dataset_info.json b/examples/2SQA/CN-College-Listen-MCQ-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..623b82f8d7d18ff8870b56c298783f1b5f32dc57 --- /dev/null +++ b/examples/2SQA/CN-College-Listen-MCQ-Test/dataset_info.json @@ -0,0 +1,160 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "audio_name": { + "dtype": "string", + "_type": "Value" + }, + "choices": { + "dtype": "string", + "_type": "Value" + }, + "mc_answer": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2SQA/CN-College-Listen-MCQ-Test/sample_1.wav b/examples/2SQA/CN-College-Listen-MCQ-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..8aea2bff27874d78fbee581ad8849d0ab0ac9fc8 Binary files /dev/null and b/examples/2SQA/CN-College-Listen-MCQ-Test/sample_1.wav differ diff --git a/examples/2SQA/CN-College-Listen-MCQ-Test/sample_2.wav b/examples/2SQA/CN-College-Listen-MCQ-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..750f1b8414060167651ac33128408b510b1545ca Binary files /dev/null and b/examples/2SQA/CN-College-Listen-MCQ-Test/sample_2.wav differ diff --git a/examples/2SQA/CN-College-Listen-MCQ-Test/state.json b/examples/2SQA/CN-College-Listen-MCQ-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..1afd2605e58a9e5304f9f324f51af5e7d535d636 --- /dev/null +++ b/examples/2SQA/CN-College-Listen-MCQ-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "b4fb19374756e22d", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2SQA/DREAM-TTS-MCQ-Test/dataset_info.json b/examples/2SQA/DREAM-TTS-MCQ-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..b12863f3596549cd265ea212a5964a91c271a7cf --- /dev/null +++ b/examples/2SQA/DREAM-TTS-MCQ-Test/dataset_info.json @@ -0,0 +1,156 @@ +{ + "citation": "", + "description": "", + "features": { + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "choices": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "dialogue": { + "feature": { + "dtype": "string", + "_type": "Value" + }, + "_type": "Sequence" + }, + "dialogue_id": { + "dtype": "string", + "_type": "Value" + }, + "id": { + "dtype": "int64", + "_type": "Value" + }, + "mc_answer": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2SQA/DREAM-TTS-MCQ-Test/sample_0.wav b/examples/2SQA/DREAM-TTS-MCQ-Test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..ea5e271157879441098607537ffd8481426eca1f Binary files /dev/null and b/examples/2SQA/DREAM-TTS-MCQ-Test/sample_0.wav differ diff --git a/examples/2SQA/DREAM-TTS-MCQ-Test/sample_1.wav b/examples/2SQA/DREAM-TTS-MCQ-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..15f7e2c92aa5c8e199277474dfbb81d27c9ef002 Binary files /dev/null and b/examples/2SQA/DREAM-TTS-MCQ-Test/sample_1.wav differ diff --git a/examples/2SQA/DREAM-TTS-MCQ-Test/sample_2.wav b/examples/2SQA/DREAM-TTS-MCQ-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..55ad57e609bfdbffb7be3006b19f1b5594996eb5 Binary files /dev/null and b/examples/2SQA/DREAM-TTS-MCQ-Test/sample_2.wav differ diff --git a/examples/2SQA/DREAM-TTS-MCQ-Test/state.json b/examples/2SQA/DREAM-TTS-MCQ-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..9429aff4cfdd4360de95ee6eaf6f11a40d6ceefa --- /dev/null +++ b/examples/2SQA/DREAM-TTS-MCQ-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "4ae1a389c9652fd2", + "_format_columns": [ + "answer", + "context", + "instruction", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2SQA/Public-SG-Speech-QA-Test/dataset_info.json b/examples/2SQA/Public-SG-Speech-QA-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..5b865ccbdc90f460864fa658307b1fd6e12b5ac6 --- /dev/null +++ b/examples/2SQA/Public-SG-Speech-QA-Test/dataset_info.json @@ -0,0 +1,150 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "Answer Score": { + "dtype": "float64", + "_type": "Value" + }, + "Audio ID": { + "dtype": "string", + "_type": "Value" + }, + "Gender": { + "dtype": "int64", + "_type": "Value" + }, + "Question Score": { + "dtype": "float64", + "_type": "Value" + }, + "Speaker": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2SQA/Public-SG-Speech-QA-Test/sample_1.wav b/examples/2SQA/Public-SG-Speech-QA-Test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..8b87c520e6acc02af8e3ed71f1961be81e93f4ce Binary files /dev/null and b/examples/2SQA/Public-SG-Speech-QA-Test/sample_1.wav differ diff --git a/examples/2SQA/Public-SG-Speech-QA-Test/sample_2.wav b/examples/2SQA/Public-SG-Speech-QA-Test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..2b168de69ee8289ea666ea3d3a82849f085256d3 Binary files /dev/null and b/examples/2SQA/Public-SG-Speech-QA-Test/sample_2.wav differ diff --git a/examples/2SQA/Public-SG-Speech-QA-Test/state.json b/examples/2SQA/Public-SG-Speech-QA-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..0f2668023e6a567ada41c9ea082f99773f2e8f6e --- /dev/null +++ b/examples/2SQA/Public-SG-Speech-QA-Test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "ede505a635b66631", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2SQA/SLUE-P2-SQA5-Test/dataset_info.json b/examples/2SQA/SLUE-P2-SQA5-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..6c681f17c89b019564bbd2d0509865a05ce4db3a --- /dev/null +++ b/examples/2SQA/SLUE-P2-SQA5-Test/dataset_info.json @@ -0,0 +1,197 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "array": { + "feature": { + "dtype": "float64", + "_type": "Value" + }, + "_type": "Sequence" + }, + "path": { + "dtype": "null", + "_type": "Value" + }, + "sampling_rate": { + "dtype": "int64", + "_type": "Value" + } + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "document_id": { + "dtype": "string", + "_type": "Value" + }, + "document_speaker_id": { + "dtype": "string", + "_type": "Value" + }, + "id": { + "dtype": "string", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "normalized_document_text": { + "dtype": "string", + "_type": "Value" + }, + "normalized_question_text": { + "dtype": "string", + "_type": "Value" + }, + "question_speaker_id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2SQA/SLUE-P2-SQA5-Test/state.json b/examples/2SQA/SLUE-P2-SQA5-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..64fcf1074f14e6cc130ef25406cd0718f61798a7 --- /dev/null +++ b/examples/2SQA/SLUE-P2-SQA5-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "400b504ce3034854", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2SQA/Spoken-Squad-Test/dataset_info.json b/examples/2SQA/Spoken-Squad-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..5b50e517bf0760f04194c6b7c720d733151674aa --- /dev/null +++ b/examples/2SQA/Spoken-Squad-Test/dataset_info.json @@ -0,0 +1,172 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "paragraph_id": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + }, + "topic_id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2SQA/Spoken-Squad-Test/state.json b/examples/2SQA/Spoken-Squad-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..bad03ce21b0c5b24ac005094f5a25236804a54a7 --- /dev/null +++ b/examples/2SQA/Spoken-Squad-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "416952584cf805a4", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ST/Covost2-EN-ID-test/dataset_info.json b/examples/2ST/Covost2-EN-ID-test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..75447219133e63a2e07347f4f15add29dc2f358f --- /dev/null +++ b/examples/2ST/Covost2-EN-ID-test/dataset_info.json @@ -0,0 +1,154 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "src_sentence": { + "dtype": "string", + "_type": "Value" + }, + "tgt_sentence": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ST/Covost2-EN-ID-test/sample_0.wav b/examples/2ST/Covost2-EN-ID-test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..5e70047fc638745caac9c90c2539cc9f18168f8a Binary files /dev/null and b/examples/2ST/Covost2-EN-ID-test/sample_0.wav differ diff --git a/examples/2ST/Covost2-EN-ID-test/sample_1.wav b/examples/2ST/Covost2-EN-ID-test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..832716522492f3cbeb066ed58c45eab2e9036a2f Binary files /dev/null and b/examples/2ST/Covost2-EN-ID-test/sample_1.wav differ diff --git a/examples/2ST/Covost2-EN-ID-test/sample_2.wav b/examples/2ST/Covost2-EN-ID-test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..2ccb333e9339d1fda08f7c84fe4fe2cd0b22508c Binary files /dev/null and b/examples/2ST/Covost2-EN-ID-test/sample_2.wav differ diff --git a/examples/2ST/Covost2-EN-ID-test/state.json b/examples/2ST/Covost2-EN-ID-test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..d460d81ba05ec305339ae4a50c65c2654e706950 --- /dev/null +++ b/examples/2ST/Covost2-EN-ID-test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "17a5c97a84a7f33c", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ST/Covost2-EN-TA-test/dataset_info.json b/examples/2ST/Covost2-EN-TA-test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..75447219133e63a2e07347f4f15add29dc2f358f --- /dev/null +++ b/examples/2ST/Covost2-EN-TA-test/dataset_info.json @@ -0,0 +1,154 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "src_sentence": { + "dtype": "string", + "_type": "Value" + }, + "tgt_sentence": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ST/Covost2-EN-TA-test/sample_0.wav b/examples/2ST/Covost2-EN-TA-test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..ad0f73bf6dbfe42e17635b063161d6154cfcf28a Binary files /dev/null and b/examples/2ST/Covost2-EN-TA-test/sample_0.wav differ diff --git a/examples/2ST/Covost2-EN-TA-test/sample_1.wav b/examples/2ST/Covost2-EN-TA-test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..e5d11995c4bd1e3484f0c15d828c678300d899b2 Binary files /dev/null and b/examples/2ST/Covost2-EN-TA-test/sample_1.wav differ diff --git a/examples/2ST/Covost2-EN-TA-test/sample_2.wav b/examples/2ST/Covost2-EN-TA-test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..8910c6f24aebdc2aab0e2517f304448129282655 Binary files /dev/null and b/examples/2ST/Covost2-EN-TA-test/sample_2.wav differ diff --git a/examples/2ST/Covost2-EN-TA-test/state.json b/examples/2ST/Covost2-EN-TA-test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..de82636ce42828a45d6c8c4ca1edde9874d87cc3 --- /dev/null +++ b/examples/2ST/Covost2-EN-TA-test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "e9d273226522711f", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ST/Covost2-EN-ZH-test/dataset_info.json b/examples/2ST/Covost2-EN-ZH-test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..75447219133e63a2e07347f4f15add29dc2f358f --- /dev/null +++ b/examples/2ST/Covost2-EN-ZH-test/dataset_info.json @@ -0,0 +1,154 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "src_sentence": { + "dtype": "string", + "_type": "Value" + }, + "tgt_sentence": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ST/Covost2-EN-ZH-test/sample_0.wav b/examples/2ST/Covost2-EN-ZH-test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..f428967146cecccb88b318b8388e3b897cd14fca Binary files /dev/null and b/examples/2ST/Covost2-EN-ZH-test/sample_0.wav differ diff --git a/examples/2ST/Covost2-EN-ZH-test/sample_1.wav b/examples/2ST/Covost2-EN-ZH-test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..e91b4565eb72ab11f760c9b276daf4f6998f9e5f Binary files /dev/null and b/examples/2ST/Covost2-EN-ZH-test/sample_1.wav differ diff --git a/examples/2ST/Covost2-EN-ZH-test/sample_2.wav b/examples/2ST/Covost2-EN-ZH-test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..38f60afc485334971149ebeb3985573093dd9a96 Binary files /dev/null and b/examples/2ST/Covost2-EN-ZH-test/sample_2.wav differ diff --git a/examples/2ST/Covost2-EN-ZH-test/state.json b/examples/2ST/Covost2-EN-ZH-test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..7dafba85d0dbd927e3c9aae50f0d1a3cf3e2c71c --- /dev/null +++ b/examples/2ST/Covost2-EN-ZH-test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "86243bad639f0cb6", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ST/Covost2-ID-EN-test/dataset_info.json b/examples/2ST/Covost2-ID-EN-test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..75447219133e63a2e07347f4f15add29dc2f358f --- /dev/null +++ b/examples/2ST/Covost2-ID-EN-test/dataset_info.json @@ -0,0 +1,154 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "src_sentence": { + "dtype": "string", + "_type": "Value" + }, + "tgt_sentence": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ST/Covost2-ID-EN-test/sample_0.wav b/examples/2ST/Covost2-ID-EN-test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..60cf3a26a30e046d87c72a4e4d4f15cf54732039 Binary files /dev/null and b/examples/2ST/Covost2-ID-EN-test/sample_0.wav differ diff --git a/examples/2ST/Covost2-ID-EN-test/sample_1.wav b/examples/2ST/Covost2-ID-EN-test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..0b37d62b0b132a59e5ce6d2c4551c1d701143efb Binary files /dev/null and b/examples/2ST/Covost2-ID-EN-test/sample_1.wav differ diff --git a/examples/2ST/Covost2-ID-EN-test/sample_2.wav b/examples/2ST/Covost2-ID-EN-test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..5eda0ef756def2b097ee2bef2a92b8b9e99ff5c3 Binary files /dev/null and b/examples/2ST/Covost2-ID-EN-test/sample_2.wav differ diff --git a/examples/2ST/Covost2-ID-EN-test/state.json b/examples/2ST/Covost2-ID-EN-test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..592b67b79c5171bdccfcf07d28855f756e7f4f23 --- /dev/null +++ b/examples/2ST/Covost2-ID-EN-test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "69b492f6dd79179e", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ST/Covost2-TA-EN-test/dataset_info.json b/examples/2ST/Covost2-TA-EN-test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..0a34b7bc1ac66816e090968b4de959503e2c4168 --- /dev/null +++ b/examples/2ST/Covost2-TA-EN-test/dataset_info.json @@ -0,0 +1,154 @@ +{ + "citation": "", + "description": "", + "features": { + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "src_sentence": { + "dtype": "string", + "_type": "Value" + }, + "tgt_sentence": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ST/Covost2-TA-EN-test/sample_0.wav b/examples/2ST/Covost2-TA-EN-test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..642a3386437533748a4df9cbf7c45ab6f5bd0b6f Binary files /dev/null and b/examples/2ST/Covost2-TA-EN-test/sample_0.wav differ diff --git a/examples/2ST/Covost2-TA-EN-test/sample_1.wav b/examples/2ST/Covost2-TA-EN-test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..a9c003707ecdee7e8918014b2f77a69adbe71d93 Binary files /dev/null and b/examples/2ST/Covost2-TA-EN-test/sample_1.wav differ diff --git a/examples/2ST/Covost2-TA-EN-test/sample_2.wav b/examples/2ST/Covost2-TA-EN-test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..cd0ce42ab1336cc1591caa5cd56acf6db9afbc29 Binary files /dev/null and b/examples/2ST/Covost2-TA-EN-test/sample_2.wav differ diff --git a/examples/2ST/Covost2-TA-EN-test/state.json b/examples/2ST/Covost2-TA-EN-test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..c7aef0119e13ade230e9d5c50ca9b66df7193400 --- /dev/null +++ b/examples/2ST/Covost2-TA-EN-test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "6f095ca26fe268ab", + "_format_columns": [ + "answer", + "context", + "instruction", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/2ST/Covost2-ZH-EN-test/dataset_info.json b/examples/2ST/Covost2-ZH-EN-test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..75447219133e63a2e07347f4f15add29dc2f358f --- /dev/null +++ b/examples/2ST/Covost2-ZH-EN-test/dataset_info.json @@ -0,0 +1,154 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "client_id": { + "dtype": "string", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "src_sentence": { + "dtype": "string", + "_type": "Value" + }, + "tgt_sentence": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/2ST/Covost2-ZH-EN-test/sample_0.wav b/examples/2ST/Covost2-ZH-EN-test/sample_0.wav new file mode 100644 index 0000000000000000000000000000000000000000..a0add517f30ee8b82cef1be3aba2d471645bd648 Binary files /dev/null and b/examples/2ST/Covost2-ZH-EN-test/sample_0.wav differ diff --git a/examples/2ST/Covost2-ZH-EN-test/sample_1.wav b/examples/2ST/Covost2-ZH-EN-test/sample_1.wav new file mode 100644 index 0000000000000000000000000000000000000000..6fad51ce295d62bb0b68826f1d1f3c3f4a2756e9 Binary files /dev/null and b/examples/2ST/Covost2-ZH-EN-test/sample_1.wav differ diff --git a/examples/2ST/Covost2-ZH-EN-test/sample_2.wav b/examples/2ST/Covost2-ZH-EN-test/sample_2.wav new file mode 100644 index 0000000000000000000000000000000000000000..e7bebe13c8efed3bb9b8c9b9a46831c8152240b5 Binary files /dev/null and b/examples/2ST/Covost2-ZH-EN-test/sample_2.wav differ diff --git a/examples/2ST/Covost2-ZH-EN-test/state.json b/examples/2ST/Covost2-ZH-EN-test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..4199c3b1fc42c18396a4f54e001d032ad37e6d97 --- /dev/null +++ b/examples/2ST/Covost2-ZH-EN-test/state.json @@ -0,0 +1,23 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "977bd2807131826b", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/AC/AudioCaps-Test/dataset_info.json b/examples/AC/AudioCaps-Test/dataset_info.json index 82148686a795bb258e6676260855fb8cf9ef19e4..c5138402a850f4b4605862059b4c7ab8debdb92b 100644 --- a/examples/AC/AudioCaps-Test/dataset_info.json +++ b/examples/AC/AudioCaps-Test/dataset_info.json @@ -100,6 +100,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/AC/AudioCaps-Test/sample_0.wav b/examples/AC/AudioCaps-Test/sample_0.wav index 4b2f8047fa38f9ba3acef7485b26ea02f4ada359..4d69d901b51460ad829bd5c3b96bd16b4a62909e 100644 Binary files a/examples/AC/AudioCaps-Test/sample_0.wav and b/examples/AC/AudioCaps-Test/sample_0.wav differ diff --git a/examples/AC/AudioCaps-Test/sample_1.wav b/examples/AC/AudioCaps-Test/sample_1.wav index 8724df3f01ccd22778f84a7a851871f1d73434fe..fb2163d74f884d02d085d1680e467f5fcfdb91d3 100644 Binary files a/examples/AC/AudioCaps-Test/sample_1.wav and b/examples/AC/AudioCaps-Test/sample_1.wav differ diff --git a/examples/AC/AudioCaps-Test/sample_2.wav b/examples/AC/AudioCaps-Test/sample_2.wav index 48d08b19be5f0904ca976c35fbe4ae4d6c19435f..27962998a41716585567178d244d99ad6f8684e7 100644 Binary files a/examples/AC/AudioCaps-Test/sample_2.wav and b/examples/AC/AudioCaps-Test/sample_2.wav differ diff --git a/examples/AC/AudioCaps-Test/state.json b/examples/AC/AudioCaps-Test/state.json index fcc438d7c2db870c1b636d8299a9d843d607fbc6..014ade4c15956eca02b3b36e38274d20e16d0618 100644 --- a/examples/AC/AudioCaps-Test/state.json +++ b/examples/AC/AudioCaps-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "e736bf1821a473f3", + "_fingerprint": "7dd956b95601f713", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/AC/WavCaps-Test/dataset_info.json b/examples/AC/WavCaps-Test/dataset_info.json index acf11db0c4cc1869f7763270ddadbfe4c30f73d4..8ac6e1be5f6fbdee81efb2e5d107213ff13d5377 100644 --- a/examples/AC/WavCaps-Test/dataset_info.json +++ b/examples/AC/WavCaps-Test/dataset_info.json @@ -96,6 +96,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/AC/WavCaps-Test/sample_0.wav b/examples/AC/WavCaps-Test/sample_0.wav index ad8d45455c35860d7309e0554c6610ba6ddccb68..3ae6c0b454d470c5565fc6770051d08ca2bf693f 100644 Binary files a/examples/AC/WavCaps-Test/sample_0.wav and b/examples/AC/WavCaps-Test/sample_0.wav differ diff --git a/examples/AC/WavCaps-Test/sample_1.wav b/examples/AC/WavCaps-Test/sample_1.wav index ea10461620e829d47fb78bf4d827b95322791340..0579abdb9ca1dc82c841c8024cdbd4fc5dbd0f9e 100644 Binary files a/examples/AC/WavCaps-Test/sample_1.wav and b/examples/AC/WavCaps-Test/sample_1.wav differ diff --git a/examples/AC/WavCaps-Test/sample_2.wav b/examples/AC/WavCaps-Test/sample_2.wav index b7fef91fbedf60a5d58f4f9fb93d95c1e205bf67..4c647b74ede7e40740775fd68323fb57229d1383 100644 Binary files a/examples/AC/WavCaps-Test/sample_2.wav and b/examples/AC/WavCaps-Test/sample_2.wav differ diff --git a/examples/AC/WavCaps-Test/state.json b/examples/AC/WavCaps-Test/state.json index 0d52b23c8e5b93506a8af809adf9680c9cc7bf86..77521aca2021217bce1aaaeb9ffca5a48285c78d 100644 --- a/examples/AC/WavCaps-Test/state.json +++ b/examples/AC/WavCaps-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "742ab313af054565", + "_fingerprint": "22a6dfe54867e49c", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/AQA/AudioCaps-QA-Test/dataset_info.json b/examples/AQA/AudioCaps-QA-Test/dataset_info.json index c6d61c8e72325cf36fabc952fbec1ca42e49e5e8..a271f8f69652bbad9be548c5545f57ef7d351f0e 100644 --- a/examples/AQA/AudioCaps-QA-Test/dataset_info.json +++ b/examples/AQA/AudioCaps-QA-Test/dataset_info.json @@ -104,6 +104,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/AQA/AudioCaps-QA-Test/sample_0.wav b/examples/AQA/AudioCaps-QA-Test/sample_0.wav index 2eec3251fe8dc9acf17f43f66f187a277cf6c6b0..21e1b511fd264d1a7659a9e513407961bf087cdb 100644 Binary files a/examples/AQA/AudioCaps-QA-Test/sample_0.wav and b/examples/AQA/AudioCaps-QA-Test/sample_0.wav differ diff --git a/examples/AQA/AudioCaps-QA-Test/sample_1.wav b/examples/AQA/AudioCaps-QA-Test/sample_1.wav index f7e101c5918451111738962b722e47041dd59227..4c39f41dc29ac5cb08966dfdd8a73f904ddb1823 100644 Binary files a/examples/AQA/AudioCaps-QA-Test/sample_1.wav and b/examples/AQA/AudioCaps-QA-Test/sample_1.wav differ diff --git a/examples/AQA/AudioCaps-QA-Test/sample_2.wav b/examples/AQA/AudioCaps-QA-Test/sample_2.wav index cb15b2ffff83c6ec5541c8b54a8205d58292a2d3..35e1e28a00d6c890d2a92f65fd54f6dc6b071e97 100644 Binary files a/examples/AQA/AudioCaps-QA-Test/sample_2.wav and b/examples/AQA/AudioCaps-QA-Test/sample_2.wav differ diff --git a/examples/AQA/AudioCaps-QA-Test/state.json b/examples/AQA/AudioCaps-QA-Test/state.json index 267d6cbee3f52f7b8f77f20b959ca9ce159aed16..d8870c294493bc158c42378391238a63e792d3b6 100644 --- a/examples/AQA/AudioCaps-QA-Test/state.json +++ b/examples/AQA/AudioCaps-QA-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "52bc1dfcaf2a0f4b", + "_fingerprint": "60b01046f3ad5343", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/AQA/Clotho-AQA-Test/dataset_info.json b/examples/AQA/Clotho-AQA-Test/dataset_info.json index 1b05abb4dce6b496c0a3c6043f27e4ca1f225320..6c56b97992cd0d890362752b2624919a9de2a1ee 100644 --- a/examples/AQA/Clotho-AQA-Test/dataset_info.json +++ b/examples/AQA/Clotho-AQA-Test/dataset_info.json @@ -87,6 +87,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/AQA/Clotho-AQA-Test/sample_0.wav b/examples/AQA/Clotho-AQA-Test/sample_0.wav index d6a07e6172778d85080c04531658efc7443ae03d..f5f0a048306163ed4345c00aadeae8426b9d797a 100644 Binary files a/examples/AQA/Clotho-AQA-Test/sample_0.wav and b/examples/AQA/Clotho-AQA-Test/sample_0.wav differ diff --git a/examples/AQA/Clotho-AQA-Test/sample_1.wav b/examples/AQA/Clotho-AQA-Test/sample_1.wav index 139bd3226ca457718b3cdab6d1e7a99dd5e4bd01..e1d918453e30499c4e72944444dcbc7b91919383 100644 Binary files a/examples/AQA/Clotho-AQA-Test/sample_1.wav and b/examples/AQA/Clotho-AQA-Test/sample_1.wav differ diff --git a/examples/AQA/Clotho-AQA-Test/sample_2.wav b/examples/AQA/Clotho-AQA-Test/sample_2.wav index b62a8feb71883e7540f521d41ac6e3eefe3862a3..a62fd4ca863efed677bdde085e94389031fcdbf4 100644 Binary files a/examples/AQA/Clotho-AQA-Test/sample_2.wav and b/examples/AQA/Clotho-AQA-Test/sample_2.wav differ diff --git a/examples/AQA/Clotho-AQA-Test/state.json b/examples/AQA/Clotho-AQA-Test/state.json index c58228cce70f0b257254856751c37d68dd8cd64f..4283473fd35325b09ea63487bc79660b9bfd8083 100644 --- a/examples/AQA/Clotho-AQA-Test/state.json +++ b/examples/AQA/Clotho-AQA-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "e2e76326f448d7c4", + "_fingerprint": "9728812a68aca05b", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/AQA/WavCaps-QA-Test/dataset_info.json b/examples/AQA/WavCaps-QA-Test/dataset_info.json index 72ceb742ffcaf0f6ff67811fa628b1e1c7a1167e..aa59daad62b54e79ee3ad8bb220dcef211f82c84 100644 --- a/examples/AQA/WavCaps-QA-Test/dataset_info.json +++ b/examples/AQA/WavCaps-QA-Test/dataset_info.json @@ -100,6 +100,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/AQA/WavCaps-QA-Test/sample_0.wav b/examples/AQA/WavCaps-QA-Test/sample_0.wav index 7639cdca2866a648ce90b4f5e385e3e6dc56c04a..ed90d471475217726db780c39603232512bf3785 100644 Binary files a/examples/AQA/WavCaps-QA-Test/sample_0.wav and b/examples/AQA/WavCaps-QA-Test/sample_0.wav differ diff --git a/examples/AQA/WavCaps-QA-Test/sample_1.wav b/examples/AQA/WavCaps-QA-Test/sample_1.wav index d2cc1a6def6014328e02ea5ea25019414f8960b4..9882a2cca41a0736468480868d4a2a41de7ededb 100644 Binary files a/examples/AQA/WavCaps-QA-Test/sample_1.wav and b/examples/AQA/WavCaps-QA-Test/sample_1.wav differ diff --git a/examples/AQA/WavCaps-QA-Test/sample_2.wav b/examples/AQA/WavCaps-QA-Test/sample_2.wav index 9629f69dd6d6b8a713b9122b03ee04ec4aae8857..36983956624f0737ac2cb3da85677cfd5d530e54 100644 Binary files a/examples/AQA/WavCaps-QA-Test/sample_2.wav and b/examples/AQA/WavCaps-QA-Test/sample_2.wav differ diff --git a/examples/AQA/WavCaps-QA-Test/state.json b/examples/AQA/WavCaps-QA-Test/state.json index a85966493650dfae84811c91b9f42d7c2811ad9c..caa4e1e8d47964acfec8a4601e6cc62cfa0cd9e0 100644 --- a/examples/AQA/WavCaps-QA-Test/state.json +++ b/examples/AQA/WavCaps-QA-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "40995a6cc1fe3dc7", + "_fingerprint": "2b00ba42b5d66bed", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/AR/VoxCeleb-Accent-Test/dataset_info.json b/examples/AR/VoxCeleb-Accent-Test/dataset_info.json index 913e85b36737f9004f81286043e7493d6f61b737..91b217e4844573d2e3a2d0072b2276b37723047c 100644 --- a/examples/AR/VoxCeleb-Accent-Test/dataset_info.json +++ b/examples/AR/VoxCeleb-Accent-Test/dataset_info.json @@ -108,6 +108,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/AR/VoxCeleb-Accent-Test/sample_0.wav b/examples/AR/VoxCeleb-Accent-Test/sample_0.wav index ae8061522976216887910263ba9414a7e60685eb..430d0d31ed6af0e3d219b84402902587aacfd716 100644 Binary files a/examples/AR/VoxCeleb-Accent-Test/sample_0.wav and b/examples/AR/VoxCeleb-Accent-Test/sample_0.wav differ diff --git a/examples/AR/VoxCeleb-Accent-Test/sample_1.wav b/examples/AR/VoxCeleb-Accent-Test/sample_1.wav index 209cf78dcde2791b463e32f9a7245514655790aa..c13b7c8d4d9189f4a6ce7b106e9c5419f0e06352 100644 Binary files a/examples/AR/VoxCeleb-Accent-Test/sample_1.wav and b/examples/AR/VoxCeleb-Accent-Test/sample_1.wav differ diff --git a/examples/AR/VoxCeleb-Accent-Test/sample_2.wav b/examples/AR/VoxCeleb-Accent-Test/sample_2.wav index 0d44634575f30bfe9c2fa2c2765ae34c192c9c90..cb33b1aabb60d6150a60e2f3c296c184bba786ac 100644 Binary files a/examples/AR/VoxCeleb-Accent-Test/sample_2.wav and b/examples/AR/VoxCeleb-Accent-Test/sample_2.wav differ diff --git a/examples/AR/VoxCeleb-Accent-Test/state.json b/examples/AR/VoxCeleb-Accent-Test/state.json index 5354f99a49cf31f0949fd2359846d46859efd5ff..1a799ec15938c1058eb0d2806c8d2125c6d64cd6 100644 --- a/examples/AR/VoxCeleb-Accent-Test/state.json +++ b/examples/AR/VoxCeleb-Accent-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "fa91a59f90c22c3c", + "_fingerprint": "8e8e0515e988a016", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ASR/Common-Voice-15-En-Test/dataset_info.json b/examples/ASR/Common-Voice-15-En-Test/dataset_info.json index 08f8bd6abcb7df02ab18d592990cc082baa8bfa3..532307de6238db4f6c7e9d7084dc690a975d8920 100644 --- a/examples/ASR/Common-Voice-15-En-Test/dataset_info.json +++ b/examples/ASR/Common-Voice-15-En-Test/dataset_info.json @@ -128,6 +128,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/ASR/Common-Voice-15-En-Test/sample_0.wav b/examples/ASR/Common-Voice-15-En-Test/sample_0.wav index 42753b756f05c733803356b486de2df1b1224de4..d1259db1843cdd79bc9a3bb4778067fd209a65f3 100644 Binary files a/examples/ASR/Common-Voice-15-En-Test/sample_0.wav and b/examples/ASR/Common-Voice-15-En-Test/sample_0.wav differ diff --git a/examples/ASR/Common-Voice-15-En-Test/sample_1.wav b/examples/ASR/Common-Voice-15-En-Test/sample_1.wav index 643e1d9e9e461c2465856a18fbf89bb27f577a18..2b2a7d92a7ec2749ba9ef870edc34c5b5fc99ed0 100644 Binary files a/examples/ASR/Common-Voice-15-En-Test/sample_1.wav and b/examples/ASR/Common-Voice-15-En-Test/sample_1.wav differ diff --git a/examples/ASR/Common-Voice-15-En-Test/sample_2.wav b/examples/ASR/Common-Voice-15-En-Test/sample_2.wav index eb0894ce127ebe7c2fadb5b11feea3e5b0ace14f..7f5ceed8701b8ba50e59431c5e9f7b95ed7c1727 100644 Binary files a/examples/ASR/Common-Voice-15-En-Test/sample_2.wav and b/examples/ASR/Common-Voice-15-En-Test/sample_2.wav differ diff --git a/examples/ASR/Common-Voice-15-En-Test/state.json b/examples/ASR/Common-Voice-15-En-Test/state.json index 1ff74abf94d1cbf9804c3911eac7edf199fb36a2..22446de8f160a7d55b4ac6835a39fd0c0fffb62c 100644 --- a/examples/ASR/Common-Voice-15-En-Test/state.json +++ b/examples/ASR/Common-Voice-15-En-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "468db91ad949e4d4", + "_fingerprint": "23bec5037b5ce6a4", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ASR/Earnings21-Test/dataset_info.json b/examples/ASR/Earnings21-Test/dataset_info.json index 1bf27e3aaa89f2fa43812252ac2377fab8ae1708..83b8d9f868f8dda4f2f055e065c0e23ebbdc4734 100644 --- a/examples/ASR/Earnings21-Test/dataset_info.json +++ b/examples/ASR/Earnings21-Test/dataset_info.json @@ -92,6 +92,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/ASR/Earnings21-Test/state.json b/examples/ASR/Earnings21-Test/state.json index ce1987be7235db123fcdddfe4a75272abf7513d4..cf8c9f552b4bb777dc628a2cf84b39eb0a86a964 100644 --- a/examples/ASR/Earnings21-Test/state.json +++ b/examples/ASR/Earnings21-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "8cc0ad99446f1aba", + "_fingerprint": "6d8e11ac5a63a2d2", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ASR/Earnings22-Test/state.json b/examples/ASR/Earnings22-Test/state.json index ac26bdba683e345445fdeb81908f03626f2ced73..ae076d2547c5302d7bfe3408a69af25738b0730e 100644 --- a/examples/ASR/Earnings22-Test/state.json +++ b/examples/ASR/Earnings22-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "331c061bce6e651c", + "_fingerprint": "f71b90ac0caefff8", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/GigaSpeech-Test/dataset_info.json b/examples/ASR/GigaSpeech-Test/dataset_info.json index 1dd0025578e934e74b979da9e81789eedd9a2f29..4a71f4f719892bbe1f5c71332420787fe5dc92fe 100644 --- a/examples/ASR/GigaSpeech-Test/dataset_info.json +++ b/examples/ASR/GigaSpeech-Test/dataset_info.json @@ -120,6 +120,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/ASR/GigaSpeech-Test/sample_0.wav b/examples/ASR/GigaSpeech-Test/sample_0.wav index 0d232b1996ee07bc47f24d06fe8b860ee1b63b11..f08bbdf0a0af40f77b8c73c44976c63a78d2fc39 100644 Binary files a/examples/ASR/GigaSpeech-Test/sample_0.wav and b/examples/ASR/GigaSpeech-Test/sample_0.wav differ diff --git a/examples/ASR/GigaSpeech-Test/sample_1.wav b/examples/ASR/GigaSpeech-Test/sample_1.wav index ee5fced84fd0691c2b4b288be9a5ad73ac67bc6e..62d149d28202bbf67bc52cf39c3c87da9e934a4c 100644 Binary files a/examples/ASR/GigaSpeech-Test/sample_1.wav and b/examples/ASR/GigaSpeech-Test/sample_1.wav differ diff --git a/examples/ASR/GigaSpeech-Test/sample_2.wav b/examples/ASR/GigaSpeech-Test/sample_2.wav index 2a9edb2ea3e8a96c7ab70b232249205e765f02d0..3ce3d6a5694f07d04ab17fde1e29f308312e5517 100644 Binary files a/examples/ASR/GigaSpeech-Test/sample_2.wav and b/examples/ASR/GigaSpeech-Test/sample_2.wav differ diff --git a/examples/ASR/GigaSpeech-Test/state.json b/examples/ASR/GigaSpeech-Test/state.json index 8bd5fd3d45201fc6807cb2364c48d5ba722bb4bf..8fca1d869d122159c4afa7b0214a240abe2f382d 100644 --- a/examples/ASR/GigaSpeech-Test/state.json +++ b/examples/ASR/GigaSpeech-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "68d371cc267ff1d2", + "_fingerprint": "67d1ab1b99556a9f", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ASR/IMDA-Part1-ASR-Test/dataset_info.json b/examples/ASR/IMDA-Part1-ASR-Test/dataset_info.json index af0260da818c604e190e3b198aebef1a391419ec..ede02d74b595679e2a6f3b2256ab4c69e535f09e 100644 --- a/examples/ASR/IMDA-Part1-ASR-Test/dataset_info.json +++ b/examples/ASR/IMDA-Part1-ASR-Test/dataset_info.json @@ -85,6 +85,114 @@ "_type": "Value" } } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } } }, "homepage": "", diff --git a/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav b/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav index 2d696049b68e6f64944055f3bf5b7a01c8bcdad6..37142cc04ab5d70e5c1a3f00c48c5555c1b44b31 100644 Binary files a/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav and b/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav differ diff --git a/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav b/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav index 6f926f6a97132a8df675f05d2f14f1dec232a704..d66fd5602e0d455844807ef1f2a176aaaee1610b 100644 Binary files a/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav and b/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav differ diff --git a/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav b/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav index f03f65739284f1757c34e88313998578f6dd67d5..dc6701f3abb29f0fc5957697385ecc4a7d6c740b 100644 Binary files a/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav and b/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav differ diff --git a/examples/ASR/IMDA-Part1-ASR-Test/state.json b/examples/ASR/IMDA-Part1-ASR-Test/state.json index f1cdd75238904ec8645cf1ef5a1a7e20981862a2..7c287183cce740341697c42ff3ca2cb4344caddf 100644 --- a/examples/ASR/IMDA-Part1-ASR-Test/state.json +++ b/examples/ASR/IMDA-Part1-ASR-Test/state.json @@ -4,12 +4,18 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "9c67b4f2f347692a", + "_fingerprint": "6de71e0f4c76af43", "_format_columns": [ "context", "instruction", "answer", - "other_attributes" + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" ], "_format_kwargs": {}, "_format_type": null, diff --git a/examples/ASR/IMDA-Part2-ASR-Test/dataset_info.json b/examples/ASR/IMDA-Part2-ASR-Test/dataset_info.json index af0260da818c604e190e3b198aebef1a391419ec..ede02d74b595679e2a6f3b2256ab4c69e535f09e 100644 --- a/examples/ASR/IMDA-Part2-ASR-Test/dataset_info.json +++ b/examples/ASR/IMDA-Part2-ASR-Test/dataset_info.json @@ -85,6 +85,114 @@ "_type": "Value" } } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } } }, "homepage": "", diff --git a/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav b/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav index d79fde19cec7b066b9f3f546d6ff6366dfa4daef..e49eb3e417896071b646badc40adc4b92f6b99da 100644 Binary files a/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav and b/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav differ diff --git a/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav b/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav index 9fac54a02b8b9ad1ee6502ef41ef1d50a7213de5..56cd23b4ba9c56fd053ae80c292ea8aa93e92293 100644 Binary files a/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav and b/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav differ diff --git a/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav b/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav index d866ad7a1ddfc3541ce3adbecf00f688489cdd3f..e1d594ef2f027e10f0bc02ffb197a5129fc3fbe7 100644 Binary files a/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav and b/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav differ diff --git a/examples/ASR/IMDA-Part2-ASR-Test/state.json b/examples/ASR/IMDA-Part2-ASR-Test/state.json index 3c6b051944e6842913bce863ca9c239da3f8e0be..9e63a4d63533a0a5fd894966479ccc22f2528377 100644 --- a/examples/ASR/IMDA-Part2-ASR-Test/state.json +++ b/examples/ASR/IMDA-Part2-ASR-Test/state.json @@ -4,12 +4,18 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "9f3d440792a605d2", + "_fingerprint": "58564e4bc21961b9", "_format_columns": [ "context", "instruction", "answer", - "other_attributes" + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" ], "_format_kwargs": {}, "_format_type": null, diff --git a/examples/ASR/LibriSpeech-Test-Clean/dataset_info.json b/examples/ASR/LibriSpeech-Test-Clean/dataset_info.json index 5a91f71cfb2044e6060c8f395ee4b798384d32d8..43a7d2a02a5e9b58fc92641d1fa33f66bbb3ffb8 100644 --- a/examples/ASR/LibriSpeech-Test-Clean/dataset_info.json +++ b/examples/ASR/LibriSpeech-Test-Clean/dataset_info.json @@ -104,6 +104,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav b/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav index 051a14c05f7c270da3d842024c5936075cb5c2e6..07a99e97bf2b54b1c4028cf9280ec6cae995adb8 100644 Binary files a/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav and b/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav b/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav index 30816d1c205dd136109c6abfca19abf249813c68..9bc645b1dd32cc7e4be13999d3bb3190d6559376 100644 Binary files a/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav and b/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav b/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav index a228ce116181b6b19b741cd9ffc2e1853704adc5..b6d879dd4e7dc07638f71ca0808f7c9395efb420 100644 Binary files a/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav and b/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Clean/state.json b/examples/ASR/LibriSpeech-Test-Clean/state.json index dda0f24f40bffbb5dbb1f236bc44f6e715655e67..3f21062dca2ac77c037dd729833f9e181bcffd92 100644 --- a/examples/ASR/LibriSpeech-Test-Clean/state.json +++ b/examples/ASR/LibriSpeech-Test-Clean/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "d9f5d173c305ae96", + "_fingerprint": "5f41ed9e62814ad1", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ASR/LibriSpeech-Test-Other/dataset_info.json b/examples/ASR/LibriSpeech-Test-Other/dataset_info.json index 5a91f71cfb2044e6060c8f395ee4b798384d32d8..43a7d2a02a5e9b58fc92641d1fa33f66bbb3ffb8 100644 --- a/examples/ASR/LibriSpeech-Test-Other/dataset_info.json +++ b/examples/ASR/LibriSpeech-Test-Other/dataset_info.json @@ -104,6 +104,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/ASR/LibriSpeech-Test-Other/sample_0.wav b/examples/ASR/LibriSpeech-Test-Other/sample_0.wav index c274d02bd3392db09ea1a95bb050a248627b91cc..f0d20ff28c7910013946cc22d27bc14b642a397c 100644 Binary files a/examples/ASR/LibriSpeech-Test-Other/sample_0.wav and b/examples/ASR/LibriSpeech-Test-Other/sample_0.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Other/sample_1.wav b/examples/ASR/LibriSpeech-Test-Other/sample_1.wav index eea20cd08457ea3ae2d55e91c7240602ae30436f..6faf2b05a1da557f1039edf9b67f714ab51c4bed 100644 Binary files a/examples/ASR/LibriSpeech-Test-Other/sample_1.wav and b/examples/ASR/LibriSpeech-Test-Other/sample_1.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Other/sample_2.wav b/examples/ASR/LibriSpeech-Test-Other/sample_2.wav index b7cc7a7ab0cb2cb02a5253dcecede16c688acf86..20fb0bcad6556cfca16b1bf5466d1755061c631a 100644 Binary files a/examples/ASR/LibriSpeech-Test-Other/sample_2.wav and b/examples/ASR/LibriSpeech-Test-Other/sample_2.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Other/state.json b/examples/ASR/LibriSpeech-Test-Other/state.json index 952838bee518f052b1de767f99c58c1282b17596..bd657759ba4398444807dfcc3134ccd9d1bb79f8 100644 --- a/examples/ASR/LibriSpeech-Test-Other/state.json +++ b/examples/ASR/LibriSpeech-Test-Other/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "8bd0648dc412be04", + "_fingerprint": "cdae4114b2fdba28", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ASR/Peoples-Speech-Test/dataset_info.json b/examples/ASR/Peoples-Speech-Test/dataset_info.json index a8d496de62c861269b2a5c8bf9826fedc8abf807..726c69526174b236cc38180a6b27905c9b182714 100644 --- a/examples/ASR/Peoples-Speech-Test/dataset_info.json +++ b/examples/ASR/Peoples-Speech-Test/dataset_info.json @@ -96,6 +96,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/ASR/Peoples-Speech-Test/sample_0.wav b/examples/ASR/Peoples-Speech-Test/sample_0.wav index ff25cea2d23ec61f9199873fc67227221c1bebca..272fccbc0970764c68d7927c25d39ce307b484c1 100644 Binary files a/examples/ASR/Peoples-Speech-Test/sample_0.wav and b/examples/ASR/Peoples-Speech-Test/sample_0.wav differ diff --git a/examples/ASR/Peoples-Speech-Test/sample_1.wav b/examples/ASR/Peoples-Speech-Test/sample_1.wav index 4e7ddfc1977129dfb02d6c4424b362bcd60c1a15..99f1c35821539e7cf65c0ece46d29ac8f137fbd7 100644 Binary files a/examples/ASR/Peoples-Speech-Test/sample_1.wav and b/examples/ASR/Peoples-Speech-Test/sample_1.wav differ diff --git a/examples/ASR/Peoples-Speech-Test/sample_2.wav b/examples/ASR/Peoples-Speech-Test/sample_2.wav index 9f590a810351e28a60b5da87f6821f06f8916790..d9572909c9ffdc4d725b589d351ade4b0aa2998b 100644 Binary files a/examples/ASR/Peoples-Speech-Test/sample_2.wav and b/examples/ASR/Peoples-Speech-Test/sample_2.wav differ diff --git a/examples/ASR/Peoples-Speech-Test/state.json b/examples/ASR/Peoples-Speech-Test/state.json index 60db649d02fbab6497da719d1c536be91f9bda39..61144ef80bc11b38566cc904feca7995f1ab162c 100644 --- a/examples/ASR/Peoples-Speech-Test/state.json +++ b/examples/ASR/Peoples-Speech-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "de704174c1b2e1ea", + "_fingerprint": "a31e8115b04802d9", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ASR/Tedlium3-Long-form-Test/dataset_info.json b/examples/ASR/Tedlium3-Long-form-Test/dataset_info.json index 55009f0312ac6d6605288017abbf50e0bafefdc3..10047f79d2f1a1b8a3f0185fa89ecb0170a4d82c 100644 --- a/examples/ASR/Tedlium3-Long-form-Test/dataset_info.json +++ b/examples/ASR/Tedlium3-Long-form-Test/dataset_info.json @@ -49,6 +49,132 @@ "dtype": "string", "_type": "Value" } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } } }, "homepage": "", diff --git a/examples/ASR/Tedlium3-Long-form-Test/state.json b/examples/ASR/Tedlium3-Long-form-Test/state.json index 3a56b9cee5a756794d32d1b5f03c01a501c41606..802648bca0c3237cf834eb90ae139e55941d4e34 100644 --- a/examples/ASR/Tedlium3-Long-form-Test/state.json +++ b/examples/ASR/Tedlium3-Long-form-Test/state.json @@ -4,12 +4,19 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "f95b9bf4e3dea7c1", + "_fingerprint": "58eff5b352a6c4af", "_format_columns": [ "context", "instruction", "answer", - "other_attributes" + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" ], "_format_kwargs": {}, "_format_type": null, diff --git a/examples/ASR/Tedlium3-Test/dataset_info.json b/examples/ASR/Tedlium3-Test/dataset_info.json index 55009f0312ac6d6605288017abbf50e0bafefdc3..10047f79d2f1a1b8a3f0185fa89ecb0170a4d82c 100644 --- a/examples/ASR/Tedlium3-Test/dataset_info.json +++ b/examples/ASR/Tedlium3-Test/dataset_info.json @@ -49,6 +49,132 @@ "dtype": "string", "_type": "Value" } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } } }, "homepage": "", diff --git a/examples/ASR/Tedlium3-Test/sample_0.wav b/examples/ASR/Tedlium3-Test/sample_0.wav index a07fc005b1f77a01b066c0ef962b04e634f4c356..b8aea73a6c3619a9b9044110a1f8a6d98613724a 100644 Binary files a/examples/ASR/Tedlium3-Test/sample_0.wav and b/examples/ASR/Tedlium3-Test/sample_0.wav differ diff --git a/examples/ASR/Tedlium3-Test/sample_1.wav b/examples/ASR/Tedlium3-Test/sample_1.wav index f864baa998ef015b529fc69d8bccca6f284233f1..5d0764a8f20943f4bb99690206c17dfff7985307 100644 Binary files a/examples/ASR/Tedlium3-Test/sample_1.wav and b/examples/ASR/Tedlium3-Test/sample_1.wav differ diff --git a/examples/ASR/Tedlium3-Test/sample_2.wav b/examples/ASR/Tedlium3-Test/sample_2.wav index 66c6482678614fbd0c658553c4c727a50d80c57a..1a4418fb82d58a63b8d6658a096b619913bb614d 100644 Binary files a/examples/ASR/Tedlium3-Test/sample_2.wav and b/examples/ASR/Tedlium3-Test/sample_2.wav differ diff --git a/examples/ASR/Tedlium3-Test/state.json b/examples/ASR/Tedlium3-Test/state.json index ffb37e795661eaf0f656a4272372d0919a492fe0..05687cd786f8e6e1ceb43737d38165b6602b7dc9 100644 --- a/examples/ASR/Tedlium3-Test/state.json +++ b/examples/ASR/Tedlium3-Test/state.json @@ -4,12 +4,19 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "fb20b90d5641df89", + "_fingerprint": "564760102352a6d3", "_format_columns": [ "context", "instruction", "answer", - "other_attributes" + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" ], "_format_kwargs": {}, "_format_type": null, diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/dataset_info.json b/examples/CNASR/Aishell-ASR-ZH-Test/dataset_info.json index 051243e1e6b3046a83599b80eb901679ff2608d8..c42d75c27c529687386dbb50124a3b199bd3b176 100644 --- a/examples/CNASR/Aishell-ASR-ZH-Test/dataset_info.json +++ b/examples/CNASR/Aishell-ASR-ZH-Test/dataset_info.json @@ -102,6 +102,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "mowe_audio": { "answer": { "dtype": "string", diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/sample_0.wav b/examples/CNASR/Aishell-ASR-ZH-Test/sample_0.wav index a69d64c8284caa8ca7ef3f5ecaf6ebc0519020ef..0631f9745cbc39d30899c534252b902cb0c33ba0 100644 Binary files a/examples/CNASR/Aishell-ASR-ZH-Test/sample_0.wav and b/examples/CNASR/Aishell-ASR-ZH-Test/sample_0.wav differ diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/sample_1.wav b/examples/CNASR/Aishell-ASR-ZH-Test/sample_1.wav index a149a1ab68e19b0029225518d217608c573d37e2..23da00fdc805d70ec90066b602512d110280554d 100644 Binary files a/examples/CNASR/Aishell-ASR-ZH-Test/sample_1.wav and b/examples/CNASR/Aishell-ASR-ZH-Test/sample_1.wav differ diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/sample_2.wav b/examples/CNASR/Aishell-ASR-ZH-Test/sample_2.wav index b2816f1cb90418b611227d1e6d044e886712b426..7d4f68158ad3c22144160b44d2b94dd2489a4f4d 100644 Binary files a/examples/CNASR/Aishell-ASR-ZH-Test/sample_2.wav and b/examples/CNASR/Aishell-ASR-ZH-Test/sample_2.wav differ diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/state.json b/examples/CNASR/Aishell-ASR-ZH-Test/state.json index 4495f512dec2e1549747a3cd9e31137e0afb8081..72520f06a2dfd1a4c01b59451d2c6ebb5cbf7868 100644 --- a/examples/CNASR/Aishell-ASR-ZH-Test/state.json +++ b/examples/CNASR/Aishell-ASR-ZH-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "f9833c929864587b", + "_fingerprint": "c55bbfbc80134880", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "mowe_audio", "qwen_audio_chat" ], diff --git a/examples/ER/IEMOCAP-Emotion-Test/dataset_info.json b/examples/ER/IEMOCAP-Emotion-Test/dataset_info.json index ae1585a301a57eef40e3c39259d56a7e70e2be43..ec9982ce23f02e4a30ed69b54c9da27522206ad7 100644 --- a/examples/ER/IEMOCAP-Emotion-Test/dataset_info.json +++ b/examples/ER/IEMOCAP-Emotion-Test/dataset_info.json @@ -108,6 +108,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/ER/IEMOCAP-Emotion-Test/sample_0.wav b/examples/ER/IEMOCAP-Emotion-Test/sample_0.wav index 3aea288a199cf828777f07051ce17bb65dd122b9..69f31212a1bf5d0220e889032a34835cc4f8414b 100644 Binary files a/examples/ER/IEMOCAP-Emotion-Test/sample_0.wav and b/examples/ER/IEMOCAP-Emotion-Test/sample_0.wav differ diff --git a/examples/ER/IEMOCAP-Emotion-Test/sample_1.wav b/examples/ER/IEMOCAP-Emotion-Test/sample_1.wav index f20cf5efc4a86d62d733d80fc2cde556ea107245..f8c1d3734b3687c8a2205aad61368bffa54cadf2 100644 Binary files a/examples/ER/IEMOCAP-Emotion-Test/sample_1.wav and b/examples/ER/IEMOCAP-Emotion-Test/sample_1.wav differ diff --git a/examples/ER/IEMOCAP-Emotion-Test/sample_2.wav b/examples/ER/IEMOCAP-Emotion-Test/sample_2.wav index 6085d420403bb54190cde8d1cffef75b35f2fa88..12e93e96005e4c422ed6f789bf1ac0273e6fc483 100644 Binary files a/examples/ER/IEMOCAP-Emotion-Test/sample_2.wav and b/examples/ER/IEMOCAP-Emotion-Test/sample_2.wav differ diff --git a/examples/ER/IEMOCAP-Emotion-Test/state.json b/examples/ER/IEMOCAP-Emotion-Test/state.json index f9e210ccb738232f4a7ce004649cc0811b5622e7..47aac6246491fd600c3ee9f77d9ece88074be1e1 100644 --- a/examples/ER/IEMOCAP-Emotion-Test/state.json +++ b/examples/ER/IEMOCAP-Emotion-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "78bf80b897adbddb", + "_fingerprint": "e76359f2b84e8913", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ER/MELD-Emotion-Test/dataset_info.json b/examples/ER/MELD-Emotion-Test/dataset_info.json index 506cf34c0d7158134159a3234f9f98b8e6b74f28..4452de7db0b48a7e233a40373a253c8d4cca6984 100644 --- a/examples/ER/MELD-Emotion-Test/dataset_info.json +++ b/examples/ER/MELD-Emotion-Test/dataset_info.json @@ -124,6 +124,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/ER/MELD-Emotion-Test/sample_0.wav b/examples/ER/MELD-Emotion-Test/sample_0.wav index 819fccfa77653af1d839db36a4d89d6c5073676d..ae16f804b90a7eeabb027d788c1b7e291a50405f 100644 Binary files a/examples/ER/MELD-Emotion-Test/sample_0.wav and b/examples/ER/MELD-Emotion-Test/sample_0.wav differ diff --git a/examples/ER/MELD-Emotion-Test/sample_1.wav b/examples/ER/MELD-Emotion-Test/sample_1.wav index a21acc5a4a0831f75c28e76a93e0339f98a5dab9..621748fea89f2ae2cb00ccf4c5bc60722757966c 100644 Binary files a/examples/ER/MELD-Emotion-Test/sample_1.wav and b/examples/ER/MELD-Emotion-Test/sample_1.wav differ diff --git a/examples/ER/MELD-Emotion-Test/sample_2.wav b/examples/ER/MELD-Emotion-Test/sample_2.wav index 98c1a4ec43768374663eb5cae1305480110d87e8..389dfb97dbb804a09de0bf8f007f59be149eb2c8 100644 Binary files a/examples/ER/MELD-Emotion-Test/sample_2.wav and b/examples/ER/MELD-Emotion-Test/sample_2.wav differ diff --git a/examples/ER/MELD-Emotion-Test/state.json b/examples/ER/MELD-Emotion-Test/state.json index 9c219f8e85ce34e78d9b81244ed1bd84435ebb4c..176649fbdd530cb7834be5416a7c4810f251679a 100644 --- a/examples/ER/MELD-Emotion-Test/state.json +++ b/examples/ER/MELD-Emotion-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "a5a596edab97a213", + "_fingerprint": "af1e1756291ebf0e", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ER/MELD-Sentiment-Test/dataset_info.json b/examples/ER/MELD-Sentiment-Test/dataset_info.json index 506cf34c0d7158134159a3234f9f98b8e6b74f28..4452de7db0b48a7e233a40373a253c8d4cca6984 100644 --- a/examples/ER/MELD-Sentiment-Test/dataset_info.json +++ b/examples/ER/MELD-Sentiment-Test/dataset_info.json @@ -124,6 +124,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/ER/MELD-Sentiment-Test/sample_0.wav b/examples/ER/MELD-Sentiment-Test/sample_0.wav index 025684f18597120ad16569fb77cc1700b855f6c9..34c3f4a45a3e8ed05f716b36d0770ec367449f4d 100644 Binary files a/examples/ER/MELD-Sentiment-Test/sample_0.wav and b/examples/ER/MELD-Sentiment-Test/sample_0.wav differ diff --git a/examples/ER/MELD-Sentiment-Test/sample_1.wav b/examples/ER/MELD-Sentiment-Test/sample_1.wav index 8d64fff1bab98e316eec6c61842e48f7b8f333c4..aa2cd17de168f8dc8c16081cfb5cf2567c7a8701 100644 Binary files a/examples/ER/MELD-Sentiment-Test/sample_1.wav and b/examples/ER/MELD-Sentiment-Test/sample_1.wav differ diff --git a/examples/ER/MELD-Sentiment-Test/sample_2.wav b/examples/ER/MELD-Sentiment-Test/sample_2.wav index f639c22e1b1e24a5d6db354da2e8cf424cdb45be..99a6b96db3614af9fc6b3fc25800ceed3df78bb3 100644 Binary files a/examples/ER/MELD-Sentiment-Test/sample_2.wav and b/examples/ER/MELD-Sentiment-Test/sample_2.wav differ diff --git a/examples/ER/MELD-Sentiment-Test/state.json b/examples/ER/MELD-Sentiment-Test/state.json index 917ccc849fc69d74df055b821dd46d31b29e2e8d..803fbcc33947edac767d6061118da9cb6318c792 100644 --- a/examples/ER/MELD-Sentiment-Test/state.json +++ b/examples/ER/MELD-Sentiment-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "a052e830551840d2", + "_fingerprint": "7785c7413a306461", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/GR/IEMOCAP-Gender-Test/dataset_info.json b/examples/GR/IEMOCAP-Gender-Test/dataset_info.json index ae1585a301a57eef40e3c39259d56a7e70e2be43..ec9982ce23f02e4a30ed69b54c9da27522206ad7 100644 --- a/examples/GR/IEMOCAP-Gender-Test/dataset_info.json +++ b/examples/GR/IEMOCAP-Gender-Test/dataset_info.json @@ -108,6 +108,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/GR/IEMOCAP-Gender-Test/sample_0.wav b/examples/GR/IEMOCAP-Gender-Test/sample_0.wav index 0f29a85f3b63eb74da8cd026aab5aa13498e0125..f052a2e401793a6a2cad7fd12ec9e7dc305ea5b2 100644 Binary files a/examples/GR/IEMOCAP-Gender-Test/sample_0.wav and b/examples/GR/IEMOCAP-Gender-Test/sample_0.wav differ diff --git a/examples/GR/IEMOCAP-Gender-Test/sample_1.wav b/examples/GR/IEMOCAP-Gender-Test/sample_1.wav index 56cec7eeb6836d092e76201787aa22b9436c13f2..fa42b5bdbf1f708fcb039e99845040df41d84da4 100644 Binary files a/examples/GR/IEMOCAP-Gender-Test/sample_1.wav and b/examples/GR/IEMOCAP-Gender-Test/sample_1.wav differ diff --git a/examples/GR/IEMOCAP-Gender-Test/sample_2.wav b/examples/GR/IEMOCAP-Gender-Test/sample_2.wav index 5c68747f7620a99d23ad13f8d2fd7386ed49332c..5f81af300bee9be5ef0de8ab0c7e120a4d0b917b 100644 Binary files a/examples/GR/IEMOCAP-Gender-Test/sample_2.wav and b/examples/GR/IEMOCAP-Gender-Test/sample_2.wav differ diff --git a/examples/GR/IEMOCAP-Gender-Test/state.json b/examples/GR/IEMOCAP-Gender-Test/state.json index 909b8a4a39ef78e0e286d8b51244d38d68e3aa31..ed84f0cd8521cebcc00f6456b76bc9d78d767f38 100644 --- a/examples/GR/IEMOCAP-Gender-Test/state.json +++ b/examples/GR/IEMOCAP-Gender-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "7a4eb80e3f03a3f4", + "_fingerprint": "339f506943f7e884", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/GR/VoxCeleb-Gender-Test/dataset_info.json b/examples/GR/VoxCeleb-Gender-Test/dataset_info.json index 913e85b36737f9004f81286043e7493d6f61b737..91b217e4844573d2e3a2d0072b2276b37723047c 100644 --- a/examples/GR/VoxCeleb-Gender-Test/dataset_info.json +++ b/examples/GR/VoxCeleb-Gender-Test/dataset_info.json @@ -108,6 +108,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/GR/VoxCeleb-Gender-Test/sample_0.wav b/examples/GR/VoxCeleb-Gender-Test/sample_0.wav index 2f6e333859e59c14c99e08c0ed07ec34da06b7cc..1108051bf59741d780dd1895721cef626655ad4d 100644 Binary files a/examples/GR/VoxCeleb-Gender-Test/sample_0.wav and b/examples/GR/VoxCeleb-Gender-Test/sample_0.wav differ diff --git a/examples/GR/VoxCeleb-Gender-Test/sample_1.wav b/examples/GR/VoxCeleb-Gender-Test/sample_1.wav index cdc77dfd8a54374fbf21b32128129fde94d44c2c..170de49b786e11c8d6f9ea105f30cef9eceb5862 100644 Binary files a/examples/GR/VoxCeleb-Gender-Test/sample_1.wav and b/examples/GR/VoxCeleb-Gender-Test/sample_1.wav differ diff --git a/examples/GR/VoxCeleb-Gender-Test/sample_2.wav b/examples/GR/VoxCeleb-Gender-Test/sample_2.wav index 8b3634c13fc5cefe14475fc673f9458aa7c79815..b520f722b1880dc2c15fb2090a174016982e9eff 100644 Binary files a/examples/GR/VoxCeleb-Gender-Test/sample_2.wav and b/examples/GR/VoxCeleb-Gender-Test/sample_2.wav differ diff --git a/examples/GR/VoxCeleb-Gender-Test/state.json b/examples/GR/VoxCeleb-Gender-Test/state.json index d0591ec49e94a4c129e72d00c70af869214dafd8..59877853a608c9f52a3b29cf3f9db4524b1addce 100644 --- a/examples/GR/VoxCeleb-Gender-Test/state.json +++ b/examples/GR/VoxCeleb-Gender-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "29964e6c779e5e22", + "_fingerprint": "d16ef5cdce6dd4c2", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/SI/ALPACA-Audio-Test/dataset_info.json b/examples/SI/ALPACA-Audio-Test/dataset_info.json index 882789dac9ac95ae77467581f55b4f296746e7a9..408aaad5f7e22888fd0fa1ac9c64f759cf6d77bd 100644 --- a/examples/SI/ALPACA-Audio-Test/dataset_info.json +++ b/examples/SI/ALPACA-Audio-Test/dataset_info.json @@ -104,6 +104,28 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/SI/ALPACA-Audio-Test/sample_0.wav b/examples/SI/ALPACA-Audio-Test/sample_0.wav index 70248c9fd2ff3b5792099536311d05bed53fba47..39211d1c195a9613197680da766e2f46e1b7deb5 100644 Binary files a/examples/SI/ALPACA-Audio-Test/sample_0.wav and b/examples/SI/ALPACA-Audio-Test/sample_0.wav differ diff --git a/examples/SI/ALPACA-Audio-Test/sample_1.wav b/examples/SI/ALPACA-Audio-Test/sample_1.wav index 0fa85b0e999023c1118dd960236ae84689438330..e22fc1b4899053c55b2c93f61a17ae04290603dd 100644 Binary files a/examples/SI/ALPACA-Audio-Test/sample_1.wav and b/examples/SI/ALPACA-Audio-Test/sample_1.wav differ diff --git a/examples/SI/ALPACA-Audio-Test/sample_2.wav b/examples/SI/ALPACA-Audio-Test/sample_2.wav index b5f187db3678a198b23df8b683f8492ae5ff60ac..f61f6006c97c647388826d1ecc9dde7f2e8a8c2f 100644 Binary files a/examples/SI/ALPACA-Audio-Test/sample_2.wav and b/examples/SI/ALPACA-Audio-Test/sample_2.wav differ diff --git a/examples/SI/ALPACA-Audio-Test/state.json b/examples/SI/ALPACA-Audio-Test/state.json index 6ba2848fa1bbfd0cf127c67ce6315770b72d590d..3bd9c038b001c84bbd46e26df7c121a7ea8e1315 100644 --- a/examples/SI/ALPACA-Audio-Test/state.json +++ b/examples/SI/ALPACA-Audio-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "f46fe3d489641513", + "_fingerprint": "41ebac7aa5808e92", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/SI/OpenHermes-Audio-Test/dataset_info.json b/examples/SI/OpenHermes-Audio-Test/dataset_info.json index ac515de4ff190d0b7bd0ce417dfbe2b34d6c0bb8..c047f4a40521a5ca6dd80262091713a20934eee1 100644 --- a/examples/SI/OpenHermes-Audio-Test/dataset_info.json +++ b/examples/SI/OpenHermes-Audio-Test/dataset_info.json @@ -116,6 +116,28 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/SI/OpenHermes-Audio-Test/sample_0.wav b/examples/SI/OpenHermes-Audio-Test/sample_0.wav index add65880600b56384cc5350e64c30be65d0ec19d..fc6251969fc752f60c52056c83739e8b206578ea 100644 Binary files a/examples/SI/OpenHermes-Audio-Test/sample_0.wav and b/examples/SI/OpenHermes-Audio-Test/sample_0.wav differ diff --git a/examples/SI/OpenHermes-Audio-Test/sample_1.wav b/examples/SI/OpenHermes-Audio-Test/sample_1.wav index 8fa3cc5ffd5e8304b28e2c00f15edfda78c293b2..c64f49698924ca6c035902b254cabfd6015cf6f7 100644 Binary files a/examples/SI/OpenHermes-Audio-Test/sample_1.wav and b/examples/SI/OpenHermes-Audio-Test/sample_1.wav differ diff --git a/examples/SI/OpenHermes-Audio-Test/sample_2.wav b/examples/SI/OpenHermes-Audio-Test/sample_2.wav index 47dc6714c50f2650cb528c81edf0f18a8f3148e9..8086c0e0a6b16c63cc8fa165089a3cc45e65e6e3 100644 Binary files a/examples/SI/OpenHermes-Audio-Test/sample_2.wav and b/examples/SI/OpenHermes-Audio-Test/sample_2.wav differ diff --git a/examples/SI/OpenHermes-Audio-Test/state.json b/examples/SI/OpenHermes-Audio-Test/state.json index 9a19e3c159f16d99ee14394ad0aab31b7594a7eb..fe7dccc553f7fd6116624ea9dd30fc50e026c50a 100644 --- a/examples/SI/OpenHermes-Audio-Test/state.json +++ b/examples/SI/OpenHermes-Audio-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "bec0fd435c621121", + "_fingerprint": "ec1d0f482d452867", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/dataset_info.json b/examples/SQA/CN-College-Listen-MCQ-Test/dataset_info.json index 623b82f8d7d18ff8870b56c298783f1b5f32dc57..4771b4afa178372d926d8585373b25efe7c24093 100644 --- a/examples/SQA/CN-College-Listen-MCQ-Test/dataset_info.json +++ b/examples/SQA/CN-College-Listen-MCQ-Test/dataset_info.json @@ -100,6 +100,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav b/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav index 8aea2bff27874d78fbee581ad8849d0ab0ac9fc8..2950fb2efc80385a38eafaf8e5323e235dc6ef5a 100644 Binary files a/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav and b/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav differ diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav b/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav index 750f1b8414060167651ac33128408b510b1545ca..8e38a92819daf71488f83afff75446fafa16653f 100644 Binary files a/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav and b/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav differ diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/state.json b/examples/SQA/CN-College-Listen-MCQ-Test/state.json index 1afd2605e58a9e5304f9f324f51af5e7d535d636..209d66d20de4ced39cf612551d829563dd12715c 100644 --- a/examples/SQA/CN-College-Listen-MCQ-Test/state.json +++ b/examples/SQA/CN-College-Listen-MCQ-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "b4fb19374756e22d", + "_fingerprint": "d6665c93899c985b", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/dataset_info.json b/examples/SQA/DREAM-TTS-MCQ-Test/dataset_info.json index b12863f3596549cd265ea212a5964a91c271a7cf..1e091c2cd9b8341f9d611e06e1ae03d1ce1f1524 100644 --- a/examples/SQA/DREAM-TTS-MCQ-Test/dataset_info.json +++ b/examples/SQA/DREAM-TTS-MCQ-Test/dataset_info.json @@ -114,6 +114,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/state.json b/examples/SQA/DREAM-TTS-MCQ-Test/state.json index 9429aff4cfdd4360de95ee6eaf6f11a40d6ceefa..93713a14d71ecaaf7a461735519e8172057aaa74 100644 --- a/examples/SQA/DREAM-TTS-MCQ-Test/state.json +++ b/examples/SQA/DREAM-TTS-MCQ-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "4ae1a389c9652fd2", + "_fingerprint": "aa689dcb170b0cb8", "_format_columns": [ "answer", "context", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "qwen_audio_chat" ], diff --git a/examples/SQA/Public-SG-Speech-QA-Test/dataset_info.json b/examples/SQA/Public-SG-Speech-QA-Test/dataset_info.json index 5b865ccbdc90f460864fa658307b1fd6e12b5ac6..203817254af5fa6bd621c9a43dfaaf6d61ad4922 100644 --- a/examples/SQA/Public-SG-Speech-QA-Test/dataset_info.json +++ b/examples/SQA/Public-SG-Speech-QA-Test/dataset_info.json @@ -108,6 +108,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/SQA/Public-SG-Speech-QA-Test/state.json b/examples/SQA/Public-SG-Speech-QA-Test/state.json index 0f2668023e6a567ada41c9ea082f99773f2e8f6e..3a16cade1f2154b599ff0ee007e06680bcb75c72 100644 --- a/examples/SQA/Public-SG-Speech-QA-Test/state.json +++ b/examples/SQA/Public-SG-Speech-QA-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "ede505a635b66631", + "_fingerprint": "caccddb9c34b3f21", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "qwen_audio_chat" ], diff --git a/examples/SQA/SLUE-P2-SQA5-Test/dataset_info.json b/examples/SQA/SLUE-P2-SQA5-Test/dataset_info.json index 6c681f17c89b019564bbd2d0509865a05ce4db3a..e1666dfd561139c081bd34bc111a1ef188c97c19 100644 --- a/examples/SQA/SLUE-P2-SQA5-Test/dataset_info.json +++ b/examples/SQA/SLUE-P2-SQA5-Test/dataset_info.json @@ -137,6 +137,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/SQA/SLUE-P2-SQA5-Test/state.json b/examples/SQA/SLUE-P2-SQA5-Test/state.json index 64fcf1074f14e6cc130ef25406cd0718f61798a7..9269d6748a4b83722969b0997a02f1a9ebf3807c 100644 --- a/examples/SQA/SLUE-P2-SQA5-Test/state.json +++ b/examples/SQA/SLUE-P2-SQA5-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "400b504ce3034854", + "_fingerprint": "e3c5a96704e595fe", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/SQA/Spoken-Squad-Test/dataset_info.json b/examples/SQA/Spoken-Squad-Test/dataset_info.json index 5b50e517bf0760f04194c6b7c720d733151674aa..59461f89e6a39255aabddc04d03a28a8b503d28f 100644 --- a/examples/SQA/Spoken-Squad-Test/dataset_info.json +++ b/examples/SQA/Spoken-Squad-Test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "whisper_large_v3_with_llama_3_8b_instruct": { "answer": { "dtype": "string", diff --git a/examples/SQA/Spoken-Squad-Test/state.json b/examples/SQA/Spoken-Squad-Test/state.json index bad03ce21b0c5b24ac005094f5a25236804a54a7..3657f1896e75f83413b7bb4253530da9c1b1c624 100644 --- a/examples/SQA/Spoken-Squad-Test/state.json +++ b/examples/SQA/Spoken-Squad-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "416952584cf805a4", + "_fingerprint": "8482b9acafa077ac", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "whisper_large_v3_with_llama_3_8b_instruct", "mowe_audio", "qwen_audio_chat" diff --git a/examples/ST/Covost2-EN-ID-test/dataset_info.json b/examples/ST/Covost2-EN-ID-test/dataset_info.json index 75447219133e63a2e07347f4f15add29dc2f358f..b25db6989f0e56b64fa01d76c2fb75832a886b5e 100644 --- a/examples/ST/Covost2-EN-ID-test/dataset_info.json +++ b/examples/ST/Covost2-EN-ID-test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "mowe_audio": { "answer": { "dtype": "string", diff --git a/examples/ST/Covost2-EN-ID-test/sample_0.wav b/examples/ST/Covost2-EN-ID-test/sample_0.wav index 5e70047fc638745caac9c90c2539cc9f18168f8a..25c2b5a12d15e235eb9aed64a3c872d32edba496 100644 Binary files a/examples/ST/Covost2-EN-ID-test/sample_0.wav and b/examples/ST/Covost2-EN-ID-test/sample_0.wav differ diff --git a/examples/ST/Covost2-EN-ID-test/sample_1.wav b/examples/ST/Covost2-EN-ID-test/sample_1.wav index 832716522492f3cbeb066ed58c45eab2e9036a2f..2b89ba1a9ffe0b5d8c0659c9410edc725f6b2dd7 100644 Binary files a/examples/ST/Covost2-EN-ID-test/sample_1.wav and b/examples/ST/Covost2-EN-ID-test/sample_1.wav differ diff --git a/examples/ST/Covost2-EN-ID-test/sample_2.wav b/examples/ST/Covost2-EN-ID-test/sample_2.wav index 2ccb333e9339d1fda08f7c84fe4fe2cd0b22508c..f3431117fb2b3a42a63c3138a0605ca0b0046ca5 100644 Binary files a/examples/ST/Covost2-EN-ID-test/sample_2.wav and b/examples/ST/Covost2-EN-ID-test/sample_2.wav differ diff --git a/examples/ST/Covost2-EN-ID-test/state.json b/examples/ST/Covost2-EN-ID-test/state.json index d460d81ba05ec305339ae4a50c65c2654e706950..9b4dc3f7b4960563f749f82c2c02bcc2666ea115 100644 --- a/examples/ST/Covost2-EN-ID-test/state.json +++ b/examples/ST/Covost2-EN-ID-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "17a5c97a84a7f33c", + "_fingerprint": "45aac62476189dab", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "mowe_audio", "qwen_audio_chat" ], diff --git a/examples/ST/Covost2-EN-TA-test/dataset_info.json b/examples/ST/Covost2-EN-TA-test/dataset_info.json index 75447219133e63a2e07347f4f15add29dc2f358f..b25db6989f0e56b64fa01d76c2fb75832a886b5e 100644 --- a/examples/ST/Covost2-EN-TA-test/dataset_info.json +++ b/examples/ST/Covost2-EN-TA-test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "mowe_audio": { "answer": { "dtype": "string", diff --git a/examples/ST/Covost2-EN-TA-test/sample_0.wav b/examples/ST/Covost2-EN-TA-test/sample_0.wav index ad0f73bf6dbfe42e17635b063161d6154cfcf28a..f5cc0d779f3accaa6dff1dcc3f76c6c9b7370446 100644 Binary files a/examples/ST/Covost2-EN-TA-test/sample_0.wav and b/examples/ST/Covost2-EN-TA-test/sample_0.wav differ diff --git a/examples/ST/Covost2-EN-TA-test/sample_1.wav b/examples/ST/Covost2-EN-TA-test/sample_1.wav index e5d11995c4bd1e3484f0c15d828c678300d899b2..3aff66d4d7e2772192d6b37dc0e0142fc57adb0f 100644 Binary files a/examples/ST/Covost2-EN-TA-test/sample_1.wav and b/examples/ST/Covost2-EN-TA-test/sample_1.wav differ diff --git a/examples/ST/Covost2-EN-TA-test/sample_2.wav b/examples/ST/Covost2-EN-TA-test/sample_2.wav index 8910c6f24aebdc2aab0e2517f304448129282655..188a2b20fa8c2612762b09e774ace9f070b8aff0 100644 Binary files a/examples/ST/Covost2-EN-TA-test/sample_2.wav and b/examples/ST/Covost2-EN-TA-test/sample_2.wav differ diff --git a/examples/ST/Covost2-EN-TA-test/state.json b/examples/ST/Covost2-EN-TA-test/state.json index de82636ce42828a45d6c8c4ca1edde9874d87cc3..5e91574d7cd30ff6ea2a8f7fb95ce8d2f0890a83 100644 --- a/examples/ST/Covost2-EN-TA-test/state.json +++ b/examples/ST/Covost2-EN-TA-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "e9d273226522711f", + "_fingerprint": "e30a809aaa184c6f", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "mowe_audio", "qwen_audio_chat" ], diff --git a/examples/ST/Covost2-EN-ZH-test/dataset_info.json b/examples/ST/Covost2-EN-ZH-test/dataset_info.json index 75447219133e63a2e07347f4f15add29dc2f358f..b25db6989f0e56b64fa01d76c2fb75832a886b5e 100644 --- a/examples/ST/Covost2-EN-ZH-test/dataset_info.json +++ b/examples/ST/Covost2-EN-ZH-test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "mowe_audio": { "answer": { "dtype": "string", diff --git a/examples/ST/Covost2-EN-ZH-test/sample_0.wav b/examples/ST/Covost2-EN-ZH-test/sample_0.wav index f428967146cecccb88b318b8388e3b897cd14fca..ebe2b681a9e476cf91aa9f2a2541a57988454221 100644 Binary files a/examples/ST/Covost2-EN-ZH-test/sample_0.wav and b/examples/ST/Covost2-EN-ZH-test/sample_0.wav differ diff --git a/examples/ST/Covost2-EN-ZH-test/sample_1.wav b/examples/ST/Covost2-EN-ZH-test/sample_1.wav index e91b4565eb72ab11f760c9b276daf4f6998f9e5f..dfac27d0e29080bd490e51ed86a09a4804eb5ff1 100644 Binary files a/examples/ST/Covost2-EN-ZH-test/sample_1.wav and b/examples/ST/Covost2-EN-ZH-test/sample_1.wav differ diff --git a/examples/ST/Covost2-EN-ZH-test/sample_2.wav b/examples/ST/Covost2-EN-ZH-test/sample_2.wav index 38f60afc485334971149ebeb3985573093dd9a96..1b3be50610325f271c28947bd1fa44c6586d6fe2 100644 Binary files a/examples/ST/Covost2-EN-ZH-test/sample_2.wav and b/examples/ST/Covost2-EN-ZH-test/sample_2.wav differ diff --git a/examples/ST/Covost2-EN-ZH-test/state.json b/examples/ST/Covost2-EN-ZH-test/state.json index 7dafba85d0dbd927e3c9aae50f0d1a3cf3e2c71c..531e57e55ef2ebf6d0e2336fb125c4db42b0f9c8 100644 --- a/examples/ST/Covost2-EN-ZH-test/state.json +++ b/examples/ST/Covost2-EN-ZH-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "86243bad639f0cb6", + "_fingerprint": "8089a8574e5ffd7a", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "mowe_audio", "qwen_audio_chat" ], diff --git a/examples/ST/Covost2-ID-EN-test/dataset_info.json b/examples/ST/Covost2-ID-EN-test/dataset_info.json index 75447219133e63a2e07347f4f15add29dc2f358f..b25db6989f0e56b64fa01d76c2fb75832a886b5e 100644 --- a/examples/ST/Covost2-ID-EN-test/dataset_info.json +++ b/examples/ST/Covost2-ID-EN-test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "mowe_audio": { "answer": { "dtype": "string", diff --git a/examples/ST/Covost2-ID-EN-test/sample_0.wav b/examples/ST/Covost2-ID-EN-test/sample_0.wav index 60cf3a26a30e046d87c72a4e4d4f15cf54732039..953755836980bd4b6cd2968d39e255ed0f199bfe 100644 Binary files a/examples/ST/Covost2-ID-EN-test/sample_0.wav and b/examples/ST/Covost2-ID-EN-test/sample_0.wav differ diff --git a/examples/ST/Covost2-ID-EN-test/sample_1.wav b/examples/ST/Covost2-ID-EN-test/sample_1.wav index 0b37d62b0b132a59e5ce6d2c4551c1d701143efb..5302d7d0ff51cdce307b5fbb5ec7c3cc61bd8ee8 100644 Binary files a/examples/ST/Covost2-ID-EN-test/sample_1.wav and b/examples/ST/Covost2-ID-EN-test/sample_1.wav differ diff --git a/examples/ST/Covost2-ID-EN-test/sample_2.wav b/examples/ST/Covost2-ID-EN-test/sample_2.wav index 5eda0ef756def2b097ee2bef2a92b8b9e99ff5c3..d01013320d93fa16a15ee38383b01dce205c6989 100644 Binary files a/examples/ST/Covost2-ID-EN-test/sample_2.wav and b/examples/ST/Covost2-ID-EN-test/sample_2.wav differ diff --git a/examples/ST/Covost2-ID-EN-test/state.json b/examples/ST/Covost2-ID-EN-test/state.json index 592b67b79c5171bdccfcf07d28855f756e7f4f23..ccf75843fd30ff7f8653f7a9820c53ddece757b4 100644 --- a/examples/ST/Covost2-ID-EN-test/state.json +++ b/examples/ST/Covost2-ID-EN-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "69b492f6dd79179e", + "_fingerprint": "86eef937bbaf81f4", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "mowe_audio", "qwen_audio_chat" ], diff --git a/examples/ST/Covost2-TA-EN-test/dataset_info.json b/examples/ST/Covost2-TA-EN-test/dataset_info.json index 0a34b7bc1ac66816e090968b4de959503e2c4168..63cdba3ce5662d2c70078e2343b090c3f42aa100 100644 --- a/examples/ST/Covost2-TA-EN-test/dataset_info.json +++ b/examples/ST/Covost2-TA-EN-test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "mowe_audio": { "answer": { "dtype": "string", diff --git a/examples/ST/Covost2-TA-EN-test/sample_0.wav b/examples/ST/Covost2-TA-EN-test/sample_0.wav index 642a3386437533748a4df9cbf7c45ab6f5bd0b6f..a930e9d6ecdf71d1a5c482512d317fc0d774d231 100644 Binary files a/examples/ST/Covost2-TA-EN-test/sample_0.wav and b/examples/ST/Covost2-TA-EN-test/sample_0.wav differ diff --git a/examples/ST/Covost2-TA-EN-test/sample_1.wav b/examples/ST/Covost2-TA-EN-test/sample_1.wav index a9c003707ecdee7e8918014b2f77a69adbe71d93..20c17bae309b829001b09cf6376675f9210ff751 100644 Binary files a/examples/ST/Covost2-TA-EN-test/sample_1.wav and b/examples/ST/Covost2-TA-EN-test/sample_1.wav differ diff --git a/examples/ST/Covost2-TA-EN-test/sample_2.wav b/examples/ST/Covost2-TA-EN-test/sample_2.wav index cd0ce42ab1336cc1591caa5cd56acf6db9afbc29..eaa8c8c0cfa770f7ebc137f70c07bceea40046d9 100644 Binary files a/examples/ST/Covost2-TA-EN-test/sample_2.wav and b/examples/ST/Covost2-TA-EN-test/sample_2.wav differ diff --git a/examples/ST/Covost2-TA-EN-test/state.json b/examples/ST/Covost2-TA-EN-test/state.json index c7aef0119e13ade230e9d5c50ca9b66df7193400..1fa35e273c3e8ee0d8bb7c397deadf9509f8b56f 100644 --- a/examples/ST/Covost2-TA-EN-test/state.json +++ b/examples/ST/Covost2-TA-EN-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "6f095ca26fe268ab", + "_fingerprint": "93608e86f8b7524b", "_format_columns": [ "answer", "context", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "mowe_audio", "qwen_audio_chat" ], diff --git a/examples/ST/Covost2-ZH-EN-test/dataset_info.json b/examples/ST/Covost2-ZH-EN-test/dataset_info.json index 75447219133e63a2e07347f4f15add29dc2f358f..b25db6989f0e56b64fa01d76c2fb75832a886b5e 100644 --- a/examples/ST/Covost2-ZH-EN-test/dataset_info.json +++ b/examples/ST/Covost2-ZH-EN-test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "meralion_audiollm_v1_lora": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "mowe_audio": { "answer": { "dtype": "string", diff --git a/examples/ST/Covost2-ZH-EN-test/sample_0.wav b/examples/ST/Covost2-ZH-EN-test/sample_0.wav index a0add517f30ee8b82cef1be3aba2d471645bd648..b287e87b0a79fc0c8b8c25cbbe54a201e0201592 100644 Binary files a/examples/ST/Covost2-ZH-EN-test/sample_0.wav and b/examples/ST/Covost2-ZH-EN-test/sample_0.wav differ diff --git a/examples/ST/Covost2-ZH-EN-test/sample_1.wav b/examples/ST/Covost2-ZH-EN-test/sample_1.wav index 6fad51ce295d62bb0b68826f1d1f3c3f4a2756e9..25cec91737fb23243bb76c92b6077da6d3acc357 100644 Binary files a/examples/ST/Covost2-ZH-EN-test/sample_1.wav and b/examples/ST/Covost2-ZH-EN-test/sample_1.wav differ diff --git a/examples/ST/Covost2-ZH-EN-test/sample_2.wav b/examples/ST/Covost2-ZH-EN-test/sample_2.wav index e7bebe13c8efed3bb9b8c9b9a46831c8152240b5..78b1a729ea55854f60d3d1c91b40704ed23bd42e 100644 Binary files a/examples/ST/Covost2-ZH-EN-test/sample_2.wav and b/examples/ST/Covost2-ZH-EN-test/sample_2.wav differ diff --git a/examples/ST/Covost2-ZH-EN-test/state.json b/examples/ST/Covost2-ZH-EN-test/state.json index 4199c3b1fc42c18396a4f54e001d032ad37e6d97..3c449d398d13af1bb2ffddab3dd7f8a8fd373a41 100644 --- a/examples/ST/Covost2-ZH-EN-test/state.json +++ b/examples/ST/Covost2-ZH-EN-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "977bd2807131826b", + "_fingerprint": "98d00264fe4b6901", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "meralion_audiollm_v1_lora", "mowe_audio", "qwen_audio_chat" ],