diff --git a/.gitattributes b/.gitattributes index 720577f6a623cf375ee10b6c1bb7576fecd96b96..a9259ff0147e4ff3cc1677a41c974d0babde90f3 100644 --- a/.gitattributes +++ b/.gitattributes @@ -56,3 +56,7 @@ examples/ASR/Tedlium3-Long-form-Test/sample_0.wav filter=lfs diff=lfs merge=lfs examples/ASR/Tedlium3-Long-form-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text examples/ASR/Tedlium3-Long-form-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text examples/SQA/Public-SG-Speech-QA-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/SQA/CN-College-Listen-MCQ-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/SQA/Spoken-Squad-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/SQA/Spoken-Squad-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text +examples/SQA/Spoken-Squad-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text diff --git a/examples/AC/AudioCaps-Test/sample_0.wav b/examples/AC/AudioCaps-Test/sample_0.wav index 509a39869ae9101b674f191a5887448af94d2664..4b2f8047fa38f9ba3acef7485b26ea02f4ada359 100644 Binary files a/examples/AC/AudioCaps-Test/sample_0.wav and b/examples/AC/AudioCaps-Test/sample_0.wav differ diff --git a/examples/AC/AudioCaps-Test/sample_1.wav b/examples/AC/AudioCaps-Test/sample_1.wav index 1c964ebcc0ac6e615f72aa92421b336272c5e5c2..8724df3f01ccd22778f84a7a851871f1d73434fe 100644 Binary files a/examples/AC/AudioCaps-Test/sample_1.wav and b/examples/AC/AudioCaps-Test/sample_1.wav differ diff --git a/examples/AC/AudioCaps-Test/sample_2.wav b/examples/AC/AudioCaps-Test/sample_2.wav index 1b6418b2410e278e532d31ea339e74f6a29585dc..48d08b19be5f0904ca976c35fbe4ae4d6c19435f 100644 Binary files a/examples/AC/AudioCaps-Test/sample_2.wav and b/examples/AC/AudioCaps-Test/sample_2.wav differ diff --git a/examples/AC/AudioCaps-Test/state.json b/examples/AC/AudioCaps-Test/state.json index 0cd0d4b3978e10a5f85d569ffc028dacc1886b74..fcc438d7c2db870c1b636d8299a9d843d607fbc6 100644 --- a/examples/AC/AudioCaps-Test/state.json +++ b/examples/AC/AudioCaps-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "3729f6c56764c342", + "_fingerprint": "e736bf1821a473f3", "_format_columns": [ "context", "instruction", diff --git a/examples/AC/WavCaps-Test/sample_0.wav b/examples/AC/WavCaps-Test/sample_0.wav index 08a85f269beade4c541e3f49a9b2518f31a95ed9..ad8d45455c35860d7309e0554c6610ba6ddccb68 100644 Binary files a/examples/AC/WavCaps-Test/sample_0.wav and b/examples/AC/WavCaps-Test/sample_0.wav differ diff --git a/examples/AC/WavCaps-Test/sample_1.wav b/examples/AC/WavCaps-Test/sample_1.wav index 462d0c2d0189352555dd97182326599b27096f43..ea10461620e829d47fb78bf4d827b95322791340 100644 Binary files a/examples/AC/WavCaps-Test/sample_1.wav and b/examples/AC/WavCaps-Test/sample_1.wav differ diff --git a/examples/AC/WavCaps-Test/sample_2.wav b/examples/AC/WavCaps-Test/sample_2.wav index 62717178e93e79c98c5b81bc4122f65bc6b52efe..b7fef91fbedf60a5d58f4f9fb93d95c1e205bf67 100644 Binary files a/examples/AC/WavCaps-Test/sample_2.wav and b/examples/AC/WavCaps-Test/sample_2.wav differ diff --git a/examples/AC/WavCaps-Test/state.json b/examples/AC/WavCaps-Test/state.json index a3eb658121b9663c5f4d92aa705da048df185326..0d52b23c8e5b93506a8af809adf9680c9cc7bf86 100644 --- a/examples/AC/WavCaps-Test/state.json +++ b/examples/AC/WavCaps-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "6c3c2a5f2db349d8", + "_fingerprint": "742ab313af054565", "_format_columns": [ "context", "instruction", diff --git a/examples/AQA/AudioCaps-QA-Test/sample_0.wav b/examples/AQA/AudioCaps-QA-Test/sample_0.wav index 59fcd56a181073f56c1f0fba45f0ccda8a6337e2..2eec3251fe8dc9acf17f43f66f187a277cf6c6b0 100644 Binary files a/examples/AQA/AudioCaps-QA-Test/sample_0.wav and b/examples/AQA/AudioCaps-QA-Test/sample_0.wav differ diff --git a/examples/AQA/AudioCaps-QA-Test/sample_1.wav b/examples/AQA/AudioCaps-QA-Test/sample_1.wav index 7ca320beb2ea16f32257519116b1f17d456f9bc4..f7e101c5918451111738962b722e47041dd59227 100644 Binary files a/examples/AQA/AudioCaps-QA-Test/sample_1.wav and b/examples/AQA/AudioCaps-QA-Test/sample_1.wav differ diff --git a/examples/AQA/AudioCaps-QA-Test/sample_2.wav b/examples/AQA/AudioCaps-QA-Test/sample_2.wav index 6d062d0a489a8b0334deee75ebe530dbae953dd2..cb15b2ffff83c6ec5541c8b54a8205d58292a2d3 100644 Binary files a/examples/AQA/AudioCaps-QA-Test/sample_2.wav and b/examples/AQA/AudioCaps-QA-Test/sample_2.wav differ diff --git a/examples/AQA/AudioCaps-QA-Test/state.json b/examples/AQA/AudioCaps-QA-Test/state.json index 35ad9ea556daeee418b93d6a77ad56c5cf801dec..267d6cbee3f52f7b8f77f20b959ca9ce159aed16 100644 --- a/examples/AQA/AudioCaps-QA-Test/state.json +++ b/examples/AQA/AudioCaps-QA-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "026dfac674d9ef77", + "_fingerprint": "52bc1dfcaf2a0f4b", "_format_columns": [ "context", "instruction", diff --git a/examples/AQA/Clotho-AQA-Test/dataset_info.json b/examples/AQA/Clotho-AQA-Test/dataset_info.json index e584e27299bec6d9aa2eee45871c216c67ac17b0..1b05abb4dce6b496c0a3c6043f27e4ca1f225320 100644 --- a/examples/AQA/Clotho-AQA-Test/dataset_info.json +++ b/examples/AQA/Clotho-AQA-Test/dataset_info.json @@ -32,7 +32,115 @@ "_type": "Value" } }, - "other_attributes": {} + "other_attributes": {}, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } }, "homepage": "", "license": "" diff --git a/examples/AQA/Clotho-AQA-Test/sample_0.wav b/examples/AQA/Clotho-AQA-Test/sample_0.wav index c48c65eea211ef53e5929d3744cbd0e73fd166a0..d6a07e6172778d85080c04531658efc7443ae03d 100644 Binary files a/examples/AQA/Clotho-AQA-Test/sample_0.wav and b/examples/AQA/Clotho-AQA-Test/sample_0.wav differ diff --git a/examples/AQA/Clotho-AQA-Test/sample_1.wav b/examples/AQA/Clotho-AQA-Test/sample_1.wav index b2a9524bb08edd5e5ab700fec3d49b610338efd2..139bd3226ca457718b3cdab6d1e7a99dd5e4bd01 100644 Binary files a/examples/AQA/Clotho-AQA-Test/sample_1.wav and b/examples/AQA/Clotho-AQA-Test/sample_1.wav differ diff --git a/examples/AQA/Clotho-AQA-Test/sample_2.wav b/examples/AQA/Clotho-AQA-Test/sample_2.wav index ed8314310c19afb4235badd32f937feeb25387b5..b62a8feb71883e7540f521d41ac6e3eefe3862a3 100644 Binary files a/examples/AQA/Clotho-AQA-Test/sample_2.wav and b/examples/AQA/Clotho-AQA-Test/sample_2.wav differ diff --git a/examples/AQA/Clotho-AQA-Test/state.json b/examples/AQA/Clotho-AQA-Test/state.json index 9cdc1447ea2f55e517f57bddb7f7fefb175a45b3..c58228cce70f0b257254856751c37d68dd8cd64f 100644 --- a/examples/AQA/Clotho-AQA-Test/state.json +++ b/examples/AQA/Clotho-AQA-Test/state.json @@ -4,12 +4,18 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "515a1722077187bd", + "_fingerprint": "e2e76326f448d7c4", "_format_columns": [ "context", "instruction", "answer", - "other_attributes" + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" ], "_format_kwargs": {}, "_format_type": null, diff --git a/examples/AQA/WavCaps-QA-Test/sample_0.wav b/examples/AQA/WavCaps-QA-Test/sample_0.wav index a7483f6a72f398b0b35db48322f6bcadb048867d..7639cdca2866a648ce90b4f5e385e3e6dc56c04a 100644 Binary files a/examples/AQA/WavCaps-QA-Test/sample_0.wav and b/examples/AQA/WavCaps-QA-Test/sample_0.wav differ diff --git a/examples/AQA/WavCaps-QA-Test/sample_1.wav b/examples/AQA/WavCaps-QA-Test/sample_1.wav index 41cf6d0cc9f8990b3599985156b44550dcbc9dfb..d2cc1a6def6014328e02ea5ea25019414f8960b4 100644 Binary files a/examples/AQA/WavCaps-QA-Test/sample_1.wav and b/examples/AQA/WavCaps-QA-Test/sample_1.wav differ diff --git a/examples/AQA/WavCaps-QA-Test/sample_2.wav b/examples/AQA/WavCaps-QA-Test/sample_2.wav index 13f24a2d1bfc7e2be0519def176d968885691e74..9629f69dd6d6b8a713b9122b03ee04ec4aae8857 100644 Binary files a/examples/AQA/WavCaps-QA-Test/sample_2.wav and b/examples/AQA/WavCaps-QA-Test/sample_2.wav differ diff --git a/examples/AQA/WavCaps-QA-Test/state.json b/examples/AQA/WavCaps-QA-Test/state.json index 89917a40550177f4b54b4e3fe0885df6d78d0aab..a85966493650dfae84811c91b9f42d7c2811ad9c 100644 --- a/examples/AQA/WavCaps-QA-Test/state.json +++ b/examples/AQA/WavCaps-QA-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "46b38bc22103a7cd", + "_fingerprint": "40995a6cc1fe3dc7", "_format_columns": [ "context", "instruction", diff --git a/examples/AR/VoxCeleb-Accent-Test/sample_0.wav b/examples/AR/VoxCeleb-Accent-Test/sample_0.wav index 637255b8ce4a76eab5145234259d4d9e27a7449d..ae8061522976216887910263ba9414a7e60685eb 100644 Binary files a/examples/AR/VoxCeleb-Accent-Test/sample_0.wav and b/examples/AR/VoxCeleb-Accent-Test/sample_0.wav differ diff --git a/examples/AR/VoxCeleb-Accent-Test/sample_1.wav b/examples/AR/VoxCeleb-Accent-Test/sample_1.wav index 81f38dc724603587c4c75c351b90f236b23a77d7..209cf78dcde2791b463e32f9a7245514655790aa 100644 Binary files a/examples/AR/VoxCeleb-Accent-Test/sample_1.wav and b/examples/AR/VoxCeleb-Accent-Test/sample_1.wav differ diff --git a/examples/AR/VoxCeleb-Accent-Test/sample_2.wav b/examples/AR/VoxCeleb-Accent-Test/sample_2.wav index f34db19cc48eb83b64600413f61e2e4eff07d2cd..0d44634575f30bfe9c2fa2c2765ae34c192c9c90 100644 Binary files a/examples/AR/VoxCeleb-Accent-Test/sample_2.wav and b/examples/AR/VoxCeleb-Accent-Test/sample_2.wav differ diff --git a/examples/AR/VoxCeleb-Accent-Test/state.json b/examples/AR/VoxCeleb-Accent-Test/state.json index a0e4beb3d20ddc496c9863a6d3809657a69c09ea..5354f99a49cf31f0949fd2359846d46859efd5ff 100644 --- a/examples/AR/VoxCeleb-Accent-Test/state.json +++ b/examples/AR/VoxCeleb-Accent-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "f1df87f5b3ca8c97", + "_fingerprint": "fa91a59f90c22c3c", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/Common-Voice-15-En-Test/sample_0.wav b/examples/ASR/Common-Voice-15-En-Test/sample_0.wav index 8e9932f1a577bd50d53c17860ba30589866112d8..42753b756f05c733803356b486de2df1b1224de4 100644 Binary files a/examples/ASR/Common-Voice-15-En-Test/sample_0.wav and b/examples/ASR/Common-Voice-15-En-Test/sample_0.wav differ diff --git a/examples/ASR/Common-Voice-15-En-Test/sample_1.wav b/examples/ASR/Common-Voice-15-En-Test/sample_1.wav index d17339bbdb7e343da7e8514f03bdbfe8d9ed399f..643e1d9e9e461c2465856a18fbf89bb27f577a18 100644 Binary files a/examples/ASR/Common-Voice-15-En-Test/sample_1.wav and b/examples/ASR/Common-Voice-15-En-Test/sample_1.wav differ diff --git a/examples/ASR/Common-Voice-15-En-Test/sample_2.wav b/examples/ASR/Common-Voice-15-En-Test/sample_2.wav index b760edbde1d6bfe80fcf77aecf7ac87aa21ecd4d..eb0894ce127ebe7c2fadb5b11feea3e5b0ace14f 100644 Binary files a/examples/ASR/Common-Voice-15-En-Test/sample_2.wav and b/examples/ASR/Common-Voice-15-En-Test/sample_2.wav differ diff --git a/examples/ASR/Common-Voice-15-En-Test/state.json b/examples/ASR/Common-Voice-15-En-Test/state.json index 80d695eaf0d326aba5c1c42c08f6620b7711f3fc..1ff74abf94d1cbf9804c3911eac7edf199fb36a2 100644 --- a/examples/ASR/Common-Voice-15-En-Test/state.json +++ b/examples/ASR/Common-Voice-15-En-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "5a02a12eee6eb15a", + "_fingerprint": "468db91ad949e4d4", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/Earnings21-Test/state.json b/examples/ASR/Earnings21-Test/state.json index 03ec809e2ddf9f2fe68b495f0237cb499f70be44..ce1987be7235db123fcdddfe4a75272abf7513d4 100644 --- a/examples/ASR/Earnings21-Test/state.json +++ b/examples/ASR/Earnings21-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "0d42a0f2cebd16d8", + "_fingerprint": "8cc0ad99446f1aba", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/Earnings22-Test/state.json b/examples/ASR/Earnings22-Test/state.json index fb5db0657dff508744e282d20abdbfcf783797eb..ac26bdba683e345445fdeb81908f03626f2ced73 100644 --- a/examples/ASR/Earnings22-Test/state.json +++ b/examples/ASR/Earnings22-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "1427a3866fe2cb1a", + "_fingerprint": "331c061bce6e651c", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/GigaSpeech-Test/sample_0.wav b/examples/ASR/GigaSpeech-Test/sample_0.wav index d5b1a0190ab705e242079ac6552d760aa86442e3..0d232b1996ee07bc47f24d06fe8b860ee1b63b11 100644 Binary files a/examples/ASR/GigaSpeech-Test/sample_0.wav and b/examples/ASR/GigaSpeech-Test/sample_0.wav differ diff --git a/examples/ASR/GigaSpeech-Test/sample_1.wav b/examples/ASR/GigaSpeech-Test/sample_1.wav index 8d4715d3ee2255ff632248568ef394a3cdf11417..ee5fced84fd0691c2b4b288be9a5ad73ac67bc6e 100644 Binary files a/examples/ASR/GigaSpeech-Test/sample_1.wav and b/examples/ASR/GigaSpeech-Test/sample_1.wav differ diff --git a/examples/ASR/GigaSpeech-Test/sample_2.wav b/examples/ASR/GigaSpeech-Test/sample_2.wav index ce79e6d621393311dc585ed610534909eca05aa8..2a9edb2ea3e8a96c7ab70b232249205e765f02d0 100644 Binary files a/examples/ASR/GigaSpeech-Test/sample_2.wav and b/examples/ASR/GigaSpeech-Test/sample_2.wav differ diff --git a/examples/ASR/GigaSpeech-Test/state.json b/examples/ASR/GigaSpeech-Test/state.json index 34243fb62eea510a1aba5545b6b66d326f73dc0a..8bd5fd3d45201fc6807cb2364c48d5ba722bb4bf 100644 --- a/examples/ASR/GigaSpeech-Test/state.json +++ b/examples/ASR/GigaSpeech-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "9527d9b9b39b34c3", + "_fingerprint": "68d371cc267ff1d2", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/IMDA-Part1-ASR-Test/dataset_info.json b/examples/ASR/IMDA-Part1-ASR-Test/dataset_info.json index 5649fa6ac42fc9645c7aea4be9d03cc30d774647..af0260da818c604e190e3b198aebef1a391419ec 100644 --- a/examples/ASR/IMDA-Part1-ASR-Test/dataset_info.json +++ b/examples/ASR/IMDA-Part1-ASR-Test/dataset_info.json @@ -85,78 +85,6 @@ "_type": "Value" } } - }, - "salmonn_7b": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "wavllm_fairseq": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "whisper_large_v3_with_llama_3_8b_instruct": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "qwen_audio_chat": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } } }, "homepage": "", diff --git a/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav b/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav index bc8a46dd654a6ba711d10f8c3d4eda7c84bb20ce..2d696049b68e6f64944055f3bf5b7a01c8bcdad6 100644 Binary files a/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav and b/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav differ diff --git a/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav b/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav index 12fed4125c25fc3c4377b72a77b37fdd7f4fc4a4..6f926f6a97132a8df675f05d2f14f1dec232a704 100644 Binary files a/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav and b/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav differ diff --git a/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav b/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav index c4e2909a95a0a3090cab61cd24e9f534075fc8aa..f03f65739284f1757c34e88313998578f6dd67d5 100644 Binary files a/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav and b/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav differ diff --git a/examples/ASR/IMDA-Part1-ASR-Test/state.json b/examples/ASR/IMDA-Part1-ASR-Test/state.json index 6105b023680c0feaa933a66b55356fd3dd70a71b..f1cdd75238904ec8645cf1ef5a1a7e20981862a2 100644 --- a/examples/ASR/IMDA-Part1-ASR-Test/state.json +++ b/examples/ASR/IMDA-Part1-ASR-Test/state.json @@ -4,16 +4,12 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "e4d91fe35602a9e0", + "_fingerprint": "9c67b4f2f347692a", "_format_columns": [ "context", "instruction", "answer", - "other_attributes", - "salmonn_7b", - "wavllm_fairseq", - "whisper_large_v3_with_llama_3_8b_instruct", - "qwen_audio_chat" + "other_attributes" ], "_format_kwargs": {}, "_format_type": null, diff --git a/examples/ASR/IMDA-Part2-ASR-Test/dataset_info.json b/examples/ASR/IMDA-Part2-ASR-Test/dataset_info.json index 5649fa6ac42fc9645c7aea4be9d03cc30d774647..af0260da818c604e190e3b198aebef1a391419ec 100644 --- a/examples/ASR/IMDA-Part2-ASR-Test/dataset_info.json +++ b/examples/ASR/IMDA-Part2-ASR-Test/dataset_info.json @@ -85,78 +85,6 @@ "_type": "Value" } } - }, - "salmonn_7b": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "wavllm_fairseq": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "whisper_large_v3_with_llama_3_8b_instruct": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "qwen_audio_chat": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } } }, "homepage": "", diff --git a/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav b/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav index d5c33dee5bf21565cf381e88d3c4a1ca6dbebe78..d79fde19cec7b066b9f3f546d6ff6366dfa4daef 100644 Binary files a/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav and b/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav differ diff --git a/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav b/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav index 164c99dcf823a95655abe8aed5a11745da3d55d7..9fac54a02b8b9ad1ee6502ef41ef1d50a7213de5 100644 Binary files a/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav and b/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav differ diff --git a/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav b/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav index f945ea7be22586a84f70fc5fe138402953d608ce..d866ad7a1ddfc3541ce3adbecf00f688489cdd3f 100644 Binary files a/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav and b/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav differ diff --git a/examples/ASR/IMDA-Part2-ASR-Test/state.json b/examples/ASR/IMDA-Part2-ASR-Test/state.json index 8583a6d8b377ebe373993a561db34452f3d28444..3c6b051944e6842913bce863ca9c239da3f8e0be 100644 --- a/examples/ASR/IMDA-Part2-ASR-Test/state.json +++ b/examples/ASR/IMDA-Part2-ASR-Test/state.json @@ -4,16 +4,12 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "0ed051d84878e4e9", + "_fingerprint": "9f3d440792a605d2", "_format_columns": [ "context", "instruction", "answer", - "other_attributes", - "salmonn_7b", - "wavllm_fairseq", - "whisper_large_v3_with_llama_3_8b_instruct", - "qwen_audio_chat" + "other_attributes" ], "_format_kwargs": {}, "_format_type": null, diff --git a/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav b/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav index 7f30ab438a700b7e52038787753e5e93d1b4de39..051a14c05f7c270da3d842024c5936075cb5c2e6 100644 Binary files a/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav and b/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav b/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav index e040ea7d8261366c44b0e9a7ddaca8f8c8044621..30816d1c205dd136109c6abfca19abf249813c68 100644 Binary files a/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav and b/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav b/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav index 23229886de2e3f6fb8823fe797326027011f3f09..a228ce116181b6b19b741cd9ffc2e1853704adc5 100644 Binary files a/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav and b/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Clean/state.json b/examples/ASR/LibriSpeech-Test-Clean/state.json index e31a1168b4cff86848674d4197e432b9899e0739..dda0f24f40bffbb5dbb1f236bc44f6e715655e67 100644 --- a/examples/ASR/LibriSpeech-Test-Clean/state.json +++ b/examples/ASR/LibriSpeech-Test-Clean/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "db499491d573fb1e", + "_fingerprint": "d9f5d173c305ae96", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/LibriSpeech-Test-Other/sample_0.wav b/examples/ASR/LibriSpeech-Test-Other/sample_0.wav index 1343a8bf6091855e6ca7569ef96e2064b3cb69c8..c274d02bd3392db09ea1a95bb050a248627b91cc 100644 Binary files a/examples/ASR/LibriSpeech-Test-Other/sample_0.wav and b/examples/ASR/LibriSpeech-Test-Other/sample_0.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Other/sample_1.wav b/examples/ASR/LibriSpeech-Test-Other/sample_1.wav index 108ec3d17493f7321e6ba6553c94dc62fa96d24b..eea20cd08457ea3ae2d55e91c7240602ae30436f 100644 Binary files a/examples/ASR/LibriSpeech-Test-Other/sample_1.wav and b/examples/ASR/LibriSpeech-Test-Other/sample_1.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Other/sample_2.wav b/examples/ASR/LibriSpeech-Test-Other/sample_2.wav index c5737246f32031af2551b79359c9e5704e49df1f..b7cc7a7ab0cb2cb02a5253dcecede16c688acf86 100644 Binary files a/examples/ASR/LibriSpeech-Test-Other/sample_2.wav and b/examples/ASR/LibriSpeech-Test-Other/sample_2.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Other/state.json b/examples/ASR/LibriSpeech-Test-Other/state.json index c73a8c0f165fb6c789d2533084a09b301b9e0d1a..952838bee518f052b1de767f99c58c1282b17596 100644 --- a/examples/ASR/LibriSpeech-Test-Other/state.json +++ b/examples/ASR/LibriSpeech-Test-Other/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "e751a89ce4227535", + "_fingerprint": "8bd0648dc412be04", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/Peoples-Speech-Test/sample_0.wav b/examples/ASR/Peoples-Speech-Test/sample_0.wav index 0534e049487d4a5e44133719598cecc5fe9fc23a..ff25cea2d23ec61f9199873fc67227221c1bebca 100644 Binary files a/examples/ASR/Peoples-Speech-Test/sample_0.wav and b/examples/ASR/Peoples-Speech-Test/sample_0.wav differ diff --git a/examples/ASR/Peoples-Speech-Test/sample_1.wav b/examples/ASR/Peoples-Speech-Test/sample_1.wav index 5b34837973bb7e663e05d6f623e1957baebdc905..4e7ddfc1977129dfb02d6c4424b362bcd60c1a15 100644 Binary files a/examples/ASR/Peoples-Speech-Test/sample_1.wav and b/examples/ASR/Peoples-Speech-Test/sample_1.wav differ diff --git a/examples/ASR/Peoples-Speech-Test/sample_2.wav b/examples/ASR/Peoples-Speech-Test/sample_2.wav index beca31e7418576a9fc43d747be077a95e2353450..9f590a810351e28a60b5da87f6821f06f8916790 100644 Binary files a/examples/ASR/Peoples-Speech-Test/sample_2.wav and b/examples/ASR/Peoples-Speech-Test/sample_2.wav differ diff --git a/examples/ASR/Peoples-Speech-Test/state.json b/examples/ASR/Peoples-Speech-Test/state.json index 632eca89f3c4fea70a453e57a3349323c699797f..60db649d02fbab6497da719d1c536be91f9bda39 100644 --- a/examples/ASR/Peoples-Speech-Test/state.json +++ b/examples/ASR/Peoples-Speech-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "d07f97bfff42b092", + "_fingerprint": "de704174c1b2e1ea", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/Tedlium3-Long-form-Test/dataset_info.json b/examples/ASR/Tedlium3-Long-form-Test/dataset_info.json index cf9965f85a972806830fe14f38cc250366f94118..55009f0312ac6d6605288017abbf50e0bafefdc3 100644 --- a/examples/ASR/Tedlium3-Long-form-Test/dataset_info.json +++ b/examples/ASR/Tedlium3-Long-form-Test/dataset_info.json @@ -49,114 +49,6 @@ "dtype": "string", "_type": "Value" } - }, - "salmonn_7b": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "wavllm_fairseq": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "Qwen2-Audio-7B-Instruct": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "whisper_large_v3_with_llama_3_8b_instruct": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "mowe_audio": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "qwen_audio_chat": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } } }, "homepage": "", diff --git a/examples/ASR/Tedlium3-Long-form-Test/state.json b/examples/ASR/Tedlium3-Long-form-Test/state.json index 30eac41b1222be656086486a4bb4030c8b8e7c9a..3a56b9cee5a756794d32d1b5f03c01a501c41606 100644 --- a/examples/ASR/Tedlium3-Long-form-Test/state.json +++ b/examples/ASR/Tedlium3-Long-form-Test/state.json @@ -4,18 +4,12 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "ffcb019ec304c5cd", + "_fingerprint": "f95b9bf4e3dea7c1", "_format_columns": [ "context", "instruction", "answer", - "other_attributes", - "salmonn_7b", - "wavllm_fairseq", - "Qwen2-Audio-7B-Instruct", - "whisper_large_v3_with_llama_3_8b_instruct", - "mowe_audio", - "qwen_audio_chat" + "other_attributes" ], "_format_kwargs": {}, "_format_type": null, diff --git a/examples/ASR/Tedlium3-Test/dataset_info.json b/examples/ASR/Tedlium3-Test/dataset_info.json index cf9965f85a972806830fe14f38cc250366f94118..55009f0312ac6d6605288017abbf50e0bafefdc3 100644 --- a/examples/ASR/Tedlium3-Test/dataset_info.json +++ b/examples/ASR/Tedlium3-Test/dataset_info.json @@ -49,114 +49,6 @@ "dtype": "string", "_type": "Value" } - }, - "salmonn_7b": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "wavllm_fairseq": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "Qwen2-Audio-7B-Instruct": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "whisper_large_v3_with_llama_3_8b_instruct": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "mowe_audio": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "qwen_audio_chat": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } } }, "homepage": "", diff --git a/examples/ASR/Tedlium3-Test/sample_0.wav b/examples/ASR/Tedlium3-Test/sample_0.wav index 7f9c12706b47e8985cfae1d8e3a33bbb2b93351e..a07fc005b1f77a01b066c0ef962b04e634f4c356 100644 Binary files a/examples/ASR/Tedlium3-Test/sample_0.wav and b/examples/ASR/Tedlium3-Test/sample_0.wav differ diff --git a/examples/ASR/Tedlium3-Test/sample_1.wav b/examples/ASR/Tedlium3-Test/sample_1.wav index f5bdad33f96732f6068f5c0ae1ccfd191e658f14..f864baa998ef015b529fc69d8bccca6f284233f1 100644 Binary files a/examples/ASR/Tedlium3-Test/sample_1.wav and b/examples/ASR/Tedlium3-Test/sample_1.wav differ diff --git a/examples/ASR/Tedlium3-Test/sample_2.wav b/examples/ASR/Tedlium3-Test/sample_2.wav index 650c17709acccb1026eb3f3926caddac87575039..66c6482678614fbd0c658553c4c727a50d80c57a 100644 Binary files a/examples/ASR/Tedlium3-Test/sample_2.wav and b/examples/ASR/Tedlium3-Test/sample_2.wav differ diff --git a/examples/ASR/Tedlium3-Test/state.json b/examples/ASR/Tedlium3-Test/state.json index c31686ed45f77e866be12eb356f308537d01d8c5..ffb37e795661eaf0f656a4272372d0919a492fe0 100644 --- a/examples/ASR/Tedlium3-Test/state.json +++ b/examples/ASR/Tedlium3-Test/state.json @@ -4,18 +4,12 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "6fde2d47e7fcba36", + "_fingerprint": "fb20b90d5641df89", "_format_columns": [ "context", "instruction", "answer", - "other_attributes", - "salmonn_7b", - "wavllm_fairseq", - "Qwen2-Audio-7B-Instruct", - "whisper_large_v3_with_llama_3_8b_instruct", - "mowe_audio", - "qwen_audio_chat" + "other_attributes" ], "_format_kwargs": {}, "_format_type": null, diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/sample_0.wav b/examples/CNASR/Aishell-ASR-ZH-Test/sample_0.wav index a9896d7452058a68f5fb36a098fecb80a61f0179..a69d64c8284caa8ca7ef3f5ecaf6ebc0519020ef 100644 Binary files a/examples/CNASR/Aishell-ASR-ZH-Test/sample_0.wav and b/examples/CNASR/Aishell-ASR-ZH-Test/sample_0.wav differ diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/sample_1.wav b/examples/CNASR/Aishell-ASR-ZH-Test/sample_1.wav index 2b463dad5a36bdd326d83dde600f460252e97213..a149a1ab68e19b0029225518d217608c573d37e2 100644 Binary files a/examples/CNASR/Aishell-ASR-ZH-Test/sample_1.wav and b/examples/CNASR/Aishell-ASR-ZH-Test/sample_1.wav differ diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/sample_2.wav b/examples/CNASR/Aishell-ASR-ZH-Test/sample_2.wav index 1e797f2f358af46173549971074f7b393c6ea266..b2816f1cb90418b611227d1e6d044e886712b426 100644 Binary files a/examples/CNASR/Aishell-ASR-ZH-Test/sample_2.wav and b/examples/CNASR/Aishell-ASR-ZH-Test/sample_2.wav differ diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/state.json b/examples/CNASR/Aishell-ASR-ZH-Test/state.json index 928046ee4e58edb3f83df389b509f6c73998aa2f..4495f512dec2e1549747a3cd9e31137e0afb8081 100644 --- a/examples/CNASR/Aishell-ASR-ZH-Test/state.json +++ b/examples/CNASR/Aishell-ASR-ZH-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "2f95a38020869f6f", + "_fingerprint": "f9833c929864587b", "_format_columns": [ "context", "instruction", diff --git a/examples/ER/IEMOCAP-Emotion-Test/sample_0.wav b/examples/ER/IEMOCAP-Emotion-Test/sample_0.wav index f7fbcec204ce9cad676607ba5d3ecc1164ff7cd7..3aea288a199cf828777f07051ce17bb65dd122b9 100644 Binary files a/examples/ER/IEMOCAP-Emotion-Test/sample_0.wav and b/examples/ER/IEMOCAP-Emotion-Test/sample_0.wav differ diff --git a/examples/ER/IEMOCAP-Emotion-Test/sample_1.wav b/examples/ER/IEMOCAP-Emotion-Test/sample_1.wav index 1a821ca8f4d90e16681d927d1bad127bbc131232..f20cf5efc4a86d62d733d80fc2cde556ea107245 100644 Binary files a/examples/ER/IEMOCAP-Emotion-Test/sample_1.wav and b/examples/ER/IEMOCAP-Emotion-Test/sample_1.wav differ diff --git a/examples/ER/IEMOCAP-Emotion-Test/sample_2.wav b/examples/ER/IEMOCAP-Emotion-Test/sample_2.wav index 9c332fd914253d28f3a7b4851e34ce672c7fb4da..6085d420403bb54190cde8d1cffef75b35f2fa88 100644 Binary files a/examples/ER/IEMOCAP-Emotion-Test/sample_2.wav and b/examples/ER/IEMOCAP-Emotion-Test/sample_2.wav differ diff --git a/examples/ER/IEMOCAP-Emotion-Test/state.json b/examples/ER/IEMOCAP-Emotion-Test/state.json index 4cd3ac71d16a1ccd9aefa29c3c207cb5e00e0dec..f9e210ccb738232f4a7ce004649cc0811b5622e7 100644 --- a/examples/ER/IEMOCAP-Emotion-Test/state.json +++ b/examples/ER/IEMOCAP-Emotion-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "f660f47d60092a28", + "_fingerprint": "78bf80b897adbddb", "_format_columns": [ "context", "instruction", diff --git a/examples/ER/MELD-Emotion-Test/sample_0.wav b/examples/ER/MELD-Emotion-Test/sample_0.wav index 66b28d69b04ddcdbf6649355a50b7431e068b6c1..819fccfa77653af1d839db36a4d89d6c5073676d 100644 Binary files a/examples/ER/MELD-Emotion-Test/sample_0.wav and b/examples/ER/MELD-Emotion-Test/sample_0.wav differ diff --git a/examples/ER/MELD-Emotion-Test/sample_1.wav b/examples/ER/MELD-Emotion-Test/sample_1.wav index 8b65e8917f9c613b9ace25b5cdf2213b1cc5885c..a21acc5a4a0831f75c28e76a93e0339f98a5dab9 100644 Binary files a/examples/ER/MELD-Emotion-Test/sample_1.wav and b/examples/ER/MELD-Emotion-Test/sample_1.wav differ diff --git a/examples/ER/MELD-Emotion-Test/sample_2.wav b/examples/ER/MELD-Emotion-Test/sample_2.wav index b5212fef08bd6083b353d3de4ca02e557b0895ff..98c1a4ec43768374663eb5cae1305480110d87e8 100644 Binary files a/examples/ER/MELD-Emotion-Test/sample_2.wav and b/examples/ER/MELD-Emotion-Test/sample_2.wav differ diff --git a/examples/ER/MELD-Emotion-Test/state.json b/examples/ER/MELD-Emotion-Test/state.json index 4cd00e9f41fc5acfa195cacd42f4b6d869bcfdba..9c219f8e85ce34e78d9b81244ed1bd84435ebb4c 100644 --- a/examples/ER/MELD-Emotion-Test/state.json +++ b/examples/ER/MELD-Emotion-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "8ed652b090e9b45d", + "_fingerprint": "a5a596edab97a213", "_format_columns": [ "context", "instruction", diff --git a/examples/ER/MELD-Sentiment-Test/sample_0.wav b/examples/ER/MELD-Sentiment-Test/sample_0.wav index 70c2320d56f8b078431ee6b90da30c838aa0a3b5..025684f18597120ad16569fb77cc1700b855f6c9 100644 Binary files a/examples/ER/MELD-Sentiment-Test/sample_0.wav and b/examples/ER/MELD-Sentiment-Test/sample_0.wav differ diff --git a/examples/ER/MELD-Sentiment-Test/sample_1.wav b/examples/ER/MELD-Sentiment-Test/sample_1.wav index 9030fb4ce73e38966b2ec587b110d811c1cf4adc..8d64fff1bab98e316eec6c61842e48f7b8f333c4 100644 Binary files a/examples/ER/MELD-Sentiment-Test/sample_1.wav and b/examples/ER/MELD-Sentiment-Test/sample_1.wav differ diff --git a/examples/ER/MELD-Sentiment-Test/sample_2.wav b/examples/ER/MELD-Sentiment-Test/sample_2.wav index 0e25c2ff0f76cf0fbd9a24f091b7317d8f577fa2..f639c22e1b1e24a5d6db354da2e8cf424cdb45be 100644 Binary files a/examples/ER/MELD-Sentiment-Test/sample_2.wav and b/examples/ER/MELD-Sentiment-Test/sample_2.wav differ diff --git a/examples/ER/MELD-Sentiment-Test/state.json b/examples/ER/MELD-Sentiment-Test/state.json index 0732040b40ce0a76abeca37ca83769b790adaebd..917ccc849fc69d74df055b821dd46d31b29e2e8d 100644 --- a/examples/ER/MELD-Sentiment-Test/state.json +++ b/examples/ER/MELD-Sentiment-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "12b7f85ef427fcec", + "_fingerprint": "a052e830551840d2", "_format_columns": [ "context", "instruction", diff --git a/examples/GR/IEMOCAP-Gender-Test/sample_0.wav b/examples/GR/IEMOCAP-Gender-Test/sample_0.wav index b14c47968a74f9c8bfda57605b321677db3a951b..0f29a85f3b63eb74da8cd026aab5aa13498e0125 100644 Binary files a/examples/GR/IEMOCAP-Gender-Test/sample_0.wav and b/examples/GR/IEMOCAP-Gender-Test/sample_0.wav differ diff --git a/examples/GR/IEMOCAP-Gender-Test/sample_1.wav b/examples/GR/IEMOCAP-Gender-Test/sample_1.wav index f14fe7b4e7bebf1ba0ea4671bd54061c9aa690af..56cec7eeb6836d092e76201787aa22b9436c13f2 100644 Binary files a/examples/GR/IEMOCAP-Gender-Test/sample_1.wav and b/examples/GR/IEMOCAP-Gender-Test/sample_1.wav differ diff --git a/examples/GR/IEMOCAP-Gender-Test/sample_2.wav b/examples/GR/IEMOCAP-Gender-Test/sample_2.wav index d4e718fbd94d944b8dedc542c4032539ebcd5bf7..5c68747f7620a99d23ad13f8d2fd7386ed49332c 100644 Binary files a/examples/GR/IEMOCAP-Gender-Test/sample_2.wav and b/examples/GR/IEMOCAP-Gender-Test/sample_2.wav differ diff --git a/examples/GR/IEMOCAP-Gender-Test/state.json b/examples/GR/IEMOCAP-Gender-Test/state.json index b556f362aef4e6bfad3e7f0959553733a304f250..909b8a4a39ef78e0e286d8b51244d38d68e3aa31 100644 --- a/examples/GR/IEMOCAP-Gender-Test/state.json +++ b/examples/GR/IEMOCAP-Gender-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "42f5f6e863c92512", + "_fingerprint": "7a4eb80e3f03a3f4", "_format_columns": [ "context", "instruction", diff --git a/examples/GR/VoxCeleb-Gender-Test/sample_0.wav b/examples/GR/VoxCeleb-Gender-Test/sample_0.wav index f834b4c7fa0bbc42f9ae00985df35f41be1eba1e..2f6e333859e59c14c99e08c0ed07ec34da06b7cc 100644 Binary files a/examples/GR/VoxCeleb-Gender-Test/sample_0.wav and b/examples/GR/VoxCeleb-Gender-Test/sample_0.wav differ diff --git a/examples/GR/VoxCeleb-Gender-Test/sample_1.wav b/examples/GR/VoxCeleb-Gender-Test/sample_1.wav index 318509e9dd39ef9205be46c24307cffb31a89523..cdc77dfd8a54374fbf21b32128129fde94d44c2c 100644 Binary files a/examples/GR/VoxCeleb-Gender-Test/sample_1.wav and b/examples/GR/VoxCeleb-Gender-Test/sample_1.wav differ diff --git a/examples/GR/VoxCeleb-Gender-Test/sample_2.wav b/examples/GR/VoxCeleb-Gender-Test/sample_2.wav index 887e042ebd793961f567457f41b6a9ed19107ba1..8b3634c13fc5cefe14475fc673f9458aa7c79815 100644 Binary files a/examples/GR/VoxCeleb-Gender-Test/sample_2.wav and b/examples/GR/VoxCeleb-Gender-Test/sample_2.wav differ diff --git a/examples/GR/VoxCeleb-Gender-Test/state.json b/examples/GR/VoxCeleb-Gender-Test/state.json index 3fd1c757fa6496421e73ae77b5da8ed09efa38f7..d0591ec49e94a4c129e72d00c70af869214dafd8 100644 --- a/examples/GR/VoxCeleb-Gender-Test/state.json +++ b/examples/GR/VoxCeleb-Gender-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "7951265b5c594ce6", + "_fingerprint": "29964e6c779e5e22", "_format_columns": [ "context", "instruction", diff --git a/examples/SI/ALPACA-Audio-Test/sample_0.wav b/examples/SI/ALPACA-Audio-Test/sample_0.wav index 1d3e06ea09dc44a7e851372270ae926ca09161ca..70248c9fd2ff3b5792099536311d05bed53fba47 100644 Binary files a/examples/SI/ALPACA-Audio-Test/sample_0.wav and b/examples/SI/ALPACA-Audio-Test/sample_0.wav differ diff --git a/examples/SI/ALPACA-Audio-Test/sample_1.wav b/examples/SI/ALPACA-Audio-Test/sample_1.wav index 63423226a7fea5c7d3cd5cdf708415493411a2ad..0fa85b0e999023c1118dd960236ae84689438330 100644 Binary files a/examples/SI/ALPACA-Audio-Test/sample_1.wav and b/examples/SI/ALPACA-Audio-Test/sample_1.wav differ diff --git a/examples/SI/ALPACA-Audio-Test/sample_2.wav b/examples/SI/ALPACA-Audio-Test/sample_2.wav index aee526d870dcfea114cbe77a58f2b906db3a1846..b5f187db3678a198b23df8b683f8492ae5ff60ac 100644 Binary files a/examples/SI/ALPACA-Audio-Test/sample_2.wav and b/examples/SI/ALPACA-Audio-Test/sample_2.wav differ diff --git a/examples/SI/ALPACA-Audio-Test/state.json b/examples/SI/ALPACA-Audio-Test/state.json index ec1bcb6f73e7f696b90634ca1416212deb70497c..6ba2848fa1bbfd0cf127c67ce6315770b72d590d 100644 --- a/examples/SI/ALPACA-Audio-Test/state.json +++ b/examples/SI/ALPACA-Audio-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "60ec3d739f1f264e", + "_fingerprint": "f46fe3d489641513", "_format_columns": [ "context", "instruction", diff --git a/examples/SI/OpenHermes-Audio-Test/dataset_info.json b/examples/SI/OpenHermes-Audio-Test/dataset_info.json index 454bcaf8f0186d26601a0107af54dc2e906e4091..ac515de4ff190d0b7bd0ce417dfbe2b34d6c0bb8 100644 --- a/examples/SI/OpenHermes-Audio-Test/dataset_info.json +++ b/examples/SI/OpenHermes-Audio-Test/dataset_info.json @@ -49,6 +49,138 @@ "dtype": "int64", "_type": "Value" } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "audio_text_instruction": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } } }, "homepage": "", diff --git a/examples/SI/OpenHermes-Audio-Test/sample_0.wav b/examples/SI/OpenHermes-Audio-Test/sample_0.wav index b848eaa0197df53f0c66a042dd2cf0907a18adb6..add65880600b56384cc5350e64c30be65d0ec19d 100644 Binary files a/examples/SI/OpenHermes-Audio-Test/sample_0.wav and b/examples/SI/OpenHermes-Audio-Test/sample_0.wav differ diff --git a/examples/SI/OpenHermes-Audio-Test/sample_1.wav b/examples/SI/OpenHermes-Audio-Test/sample_1.wav index 8990c2638a876f50eabb711a3c65e07809a7aa48..8fa3cc5ffd5e8304b28e2c00f15edfda78c293b2 100644 Binary files a/examples/SI/OpenHermes-Audio-Test/sample_1.wav and b/examples/SI/OpenHermes-Audio-Test/sample_1.wav differ diff --git a/examples/SI/OpenHermes-Audio-Test/sample_2.wav b/examples/SI/OpenHermes-Audio-Test/sample_2.wav index f2d8572e54eeabe52f6e37c3d285d5b1a39060e6..47dc6714c50f2650cb528c81edf0f18a8f3148e9 100644 Binary files a/examples/SI/OpenHermes-Audio-Test/sample_2.wav and b/examples/SI/OpenHermes-Audio-Test/sample_2.wav differ diff --git a/examples/SI/OpenHermes-Audio-Test/state.json b/examples/SI/OpenHermes-Audio-Test/state.json index 3e62882afeb4c4672ca7b438517834bcc61c2771..9a19e3c159f16d99ee14394ad0aab31b7594a7eb 100644 --- a/examples/SI/OpenHermes-Audio-Test/state.json +++ b/examples/SI/OpenHermes-Audio-Test/state.json @@ -4,12 +4,18 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "19108060d5e74733", + "_fingerprint": "bec0fd435c621121", "_format_columns": [ "context", "instruction", "answer", - "other_attributes" + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" ], "_format_kwargs": {}, "_format_type": null, diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/dataset_info.json b/examples/SQA/CN-College-Listen-MCQ-Test/dataset_info.json index 81903da2dc3bf5b849827427d5a61f932fe485bb..623b82f8d7d18ff8870b56c298783f1b5f32dc57 100644 --- a/examples/SQA/CN-College-Listen-MCQ-Test/dataset_info.json +++ b/examples/SQA/CN-College-Listen-MCQ-Test/dataset_info.json @@ -45,6 +45,114 @@ "dtype": "string", "_type": "Value" } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } } }, "homepage": "", diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav b/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav index 02faddd4fba70d832d7b726a181b922e974bd1ac..8aea2bff27874d78fbee581ad8849d0ab0ac9fc8 100644 Binary files a/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav and b/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav differ diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav b/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav index 86ba0d6fedd50d874900f973f86967cdf9c54ab4..750f1b8414060167651ac33128408b510b1545ca 100644 Binary files a/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav and b/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav differ diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/state.json b/examples/SQA/CN-College-Listen-MCQ-Test/state.json index 023c37fec316be4e2d15a4c442c4e4ec31bc090a..1afd2605e58a9e5304f9f324f51af5e7d535d636 100644 --- a/examples/SQA/CN-College-Listen-MCQ-Test/state.json +++ b/examples/SQA/CN-College-Listen-MCQ-Test/state.json @@ -4,12 +4,18 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "ea9d9086266315b3", + "_fingerprint": "b4fb19374756e22d", "_format_columns": [ "context", "instruction", "answer", - "other_attributes" + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" ], "_format_kwargs": {}, "_format_type": null, diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/dataset_info.json b/examples/SQA/DREAM-TTS-MCQ-Test/dataset_info.json index a16938258fabe5c46b36019db75cd53e5132c21f..b12863f3596549cd265ea212a5964a91c271a7cf 100644 --- a/examples/SQA/DREAM-TTS-MCQ-Test/dataset_info.json +++ b/examples/SQA/DREAM-TTS-MCQ-Test/dataset_info.json @@ -59,6 +59,96 @@ "dtype": "string", "_type": "Value" } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } } }, "homepage": "", diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav b/examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav index 701eaadff036dbe4397ac3d3e5cf953cdafe2492..15f7e2c92aa5c8e199277474dfbb81d27c9ef002 100644 Binary files a/examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav and b/examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav differ diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/state.json b/examples/SQA/DREAM-TTS-MCQ-Test/state.json index 60f24b2e2e0fc0f619402aedaba80fe1a6102aa6..9429aff4cfdd4360de95ee6eaf6f11a40d6ceefa 100644 --- a/examples/SQA/DREAM-TTS-MCQ-Test/state.json +++ b/examples/SQA/DREAM-TTS-MCQ-Test/state.json @@ -4,12 +4,17 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "8d4dbaf0bbc46c99", + "_fingerprint": "4ae1a389c9652fd2", "_format_columns": [ "answer", "context", "instruction", - "other_attributes" + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "qwen_audio_chat" ], "_format_kwargs": {}, "_format_type": null, diff --git a/examples/SQA/Public-SG-Speech-QA-Test/state.json b/examples/SQA/Public-SG-Speech-QA-Test/state.json index 966e1f2dfe5fba436d6ddb0873b91c39ee0f9530..0f2668023e6a567ada41c9ea082f99773f2e8f6e 100644 --- a/examples/SQA/Public-SG-Speech-QA-Test/state.json +++ b/examples/SQA/Public-SG-Speech-QA-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "4d4e665c9f359042", + "_fingerprint": "ede505a635b66631", "_format_columns": [ "context", "instruction", diff --git a/examples/SQA/SLUE-P2-SQA5-Test/state.json b/examples/SQA/SLUE-P2-SQA5-Test/state.json index e79576fcdc5892cb2e18d1920422d90537058f9b..64fcf1074f14e6cc130ef25406cd0718f61798a7 100644 --- a/examples/SQA/SLUE-P2-SQA5-Test/state.json +++ b/examples/SQA/SLUE-P2-SQA5-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "0bc180e1898c34b5", + "_fingerprint": "400b504ce3034854", "_format_columns": [ "context", "instruction", diff --git a/examples/SQA/Spoken-Squad-Test/dataset_info.json b/examples/SQA/Spoken-Squad-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..5b50e517bf0760f04194c6b7c720d733151674aa --- /dev/null +++ b/examples/SQA/Spoken-Squad-Test/dataset_info.json @@ -0,0 +1,172 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "answer_length": { + "dtype": "int64", + "_type": "Value" + }, + "context_length": { + "dtype": "int64", + "_type": "Value" + }, + "instruction_length": { + "dtype": "int64", + "_type": "Value" + }, + "paragraph_id": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + }, + "topic_id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/SQA/Spoken-Squad-Test/state.json b/examples/SQA/Spoken-Squad-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..bad03ce21b0c5b24ac005094f5a25236804a54a7 --- /dev/null +++ b/examples/SQA/Spoken-Squad-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "416952584cf805a4", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ST/Covost2-EN-ID-test/sample_0.wav b/examples/ST/Covost2-EN-ID-test/sample_0.wav index aaa73891ca80e6daf935a96362b99724bf30e5f2..5e70047fc638745caac9c90c2539cc9f18168f8a 100644 Binary files a/examples/ST/Covost2-EN-ID-test/sample_0.wav and b/examples/ST/Covost2-EN-ID-test/sample_0.wav differ diff --git a/examples/ST/Covost2-EN-ID-test/sample_1.wav b/examples/ST/Covost2-EN-ID-test/sample_1.wav index 8bb713755533c9c9c40fd7bc6fb97ceccc71b5c1..832716522492f3cbeb066ed58c45eab2e9036a2f 100644 Binary files a/examples/ST/Covost2-EN-ID-test/sample_1.wav and b/examples/ST/Covost2-EN-ID-test/sample_1.wav differ diff --git a/examples/ST/Covost2-EN-ID-test/sample_2.wav b/examples/ST/Covost2-EN-ID-test/sample_2.wav index e82baf335e4809fb09b4fd7447b6c96cad4e289f..2ccb333e9339d1fda08f7c84fe4fe2cd0b22508c 100644 Binary files a/examples/ST/Covost2-EN-ID-test/sample_2.wav and b/examples/ST/Covost2-EN-ID-test/sample_2.wav differ diff --git a/examples/ST/Covost2-EN-ID-test/state.json b/examples/ST/Covost2-EN-ID-test/state.json index d35329d7cd163f8aeae7496f3cfa3ebdc423220f..d460d81ba05ec305339ae4a50c65c2654e706950 100644 --- a/examples/ST/Covost2-EN-ID-test/state.json +++ b/examples/ST/Covost2-EN-ID-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "b3bfe1ac88ff72a0", + "_fingerprint": "17a5c97a84a7f33c", "_format_columns": [ "context", "instruction", diff --git a/examples/ST/Covost2-EN-TA-test/sample_0.wav b/examples/ST/Covost2-EN-TA-test/sample_0.wav index 0c62dd1aab9919d9bcd776efbe484e001b592598..ad0f73bf6dbfe42e17635b063161d6154cfcf28a 100644 Binary files a/examples/ST/Covost2-EN-TA-test/sample_0.wav and b/examples/ST/Covost2-EN-TA-test/sample_0.wav differ diff --git a/examples/ST/Covost2-EN-TA-test/sample_1.wav b/examples/ST/Covost2-EN-TA-test/sample_1.wav index 4246c85fb21814573dbb736d188584a42e63f29f..e5d11995c4bd1e3484f0c15d828c678300d899b2 100644 Binary files a/examples/ST/Covost2-EN-TA-test/sample_1.wav and b/examples/ST/Covost2-EN-TA-test/sample_1.wav differ diff --git a/examples/ST/Covost2-EN-TA-test/sample_2.wav b/examples/ST/Covost2-EN-TA-test/sample_2.wav index d339cd3dc96f925f24a1f7501e4645c8844fb369..8910c6f24aebdc2aab0e2517f304448129282655 100644 Binary files a/examples/ST/Covost2-EN-TA-test/sample_2.wav and b/examples/ST/Covost2-EN-TA-test/sample_2.wav differ diff --git a/examples/ST/Covost2-EN-TA-test/state.json b/examples/ST/Covost2-EN-TA-test/state.json index 8ca1326d120d940e69fbea2e66a9174a5d88f864..de82636ce42828a45d6c8c4ca1edde9874d87cc3 100644 --- a/examples/ST/Covost2-EN-TA-test/state.json +++ b/examples/ST/Covost2-EN-TA-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "76e8db43e251f03a", + "_fingerprint": "e9d273226522711f", "_format_columns": [ "context", "instruction", diff --git a/examples/ST/Covost2-EN-ZH-test/sample_0.wav b/examples/ST/Covost2-EN-ZH-test/sample_0.wav index a5ace1d21eb77c249ac8bf9f268b8f1bfec5519a..f428967146cecccb88b318b8388e3b897cd14fca 100644 Binary files a/examples/ST/Covost2-EN-ZH-test/sample_0.wav and b/examples/ST/Covost2-EN-ZH-test/sample_0.wav differ diff --git a/examples/ST/Covost2-EN-ZH-test/sample_1.wav b/examples/ST/Covost2-EN-ZH-test/sample_1.wav index 8c40d7e137c6f005a0b3fd087f5b68aff125adc7..e91b4565eb72ab11f760c9b276daf4f6998f9e5f 100644 Binary files a/examples/ST/Covost2-EN-ZH-test/sample_1.wav and b/examples/ST/Covost2-EN-ZH-test/sample_1.wav differ diff --git a/examples/ST/Covost2-EN-ZH-test/sample_2.wav b/examples/ST/Covost2-EN-ZH-test/sample_2.wav index b1ce2588bbd2cd289ea7b7d3996c95a904eeb06d..38f60afc485334971149ebeb3985573093dd9a96 100644 Binary files a/examples/ST/Covost2-EN-ZH-test/sample_2.wav and b/examples/ST/Covost2-EN-ZH-test/sample_2.wav differ diff --git a/examples/ST/Covost2-EN-ZH-test/state.json b/examples/ST/Covost2-EN-ZH-test/state.json index cf87654cc0e72ceb65b8e71d7428d648a2f1549b..7dafba85d0dbd927e3c9aae50f0d1a3cf3e2c71c 100644 --- a/examples/ST/Covost2-EN-ZH-test/state.json +++ b/examples/ST/Covost2-EN-ZH-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "d27fe19e989510df", + "_fingerprint": "86243bad639f0cb6", "_format_columns": [ "context", "instruction", diff --git a/examples/ST/Covost2-ID-EN-test/sample_0.wav b/examples/ST/Covost2-ID-EN-test/sample_0.wav index c5fc387f794ec6644cd2c2a04cbced7d49950f5f..60cf3a26a30e046d87c72a4e4d4f15cf54732039 100644 Binary files a/examples/ST/Covost2-ID-EN-test/sample_0.wav and b/examples/ST/Covost2-ID-EN-test/sample_0.wav differ diff --git a/examples/ST/Covost2-ID-EN-test/sample_1.wav b/examples/ST/Covost2-ID-EN-test/sample_1.wav index 89cea5e89bc92eb93b885643c1e250e7a987a73c..0b37d62b0b132a59e5ce6d2c4551c1d701143efb 100644 Binary files a/examples/ST/Covost2-ID-EN-test/sample_1.wav and b/examples/ST/Covost2-ID-EN-test/sample_1.wav differ diff --git a/examples/ST/Covost2-ID-EN-test/sample_2.wav b/examples/ST/Covost2-ID-EN-test/sample_2.wav index bbcdcfd796d393d9d227a57fe543b8e442b1d94f..5eda0ef756def2b097ee2bef2a92b8b9e99ff5c3 100644 Binary files a/examples/ST/Covost2-ID-EN-test/sample_2.wav and b/examples/ST/Covost2-ID-EN-test/sample_2.wav differ diff --git a/examples/ST/Covost2-ID-EN-test/state.json b/examples/ST/Covost2-ID-EN-test/state.json index 060cb29ce89ace31ec246bf22af9ae83459382e3..592b67b79c5171bdccfcf07d28855f756e7f4f23 100644 --- a/examples/ST/Covost2-ID-EN-test/state.json +++ b/examples/ST/Covost2-ID-EN-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "1ad122cc3b0e20fe", + "_fingerprint": "69b492f6dd79179e", "_format_columns": [ "context", "instruction", diff --git a/examples/ST/Covost2-TA-EN-test/sample_0.wav b/examples/ST/Covost2-TA-EN-test/sample_0.wav index 070bd162c47080814b4afe6d0ff0c1f629926ff7..642a3386437533748a4df9cbf7c45ab6f5bd0b6f 100644 Binary files a/examples/ST/Covost2-TA-EN-test/sample_0.wav and b/examples/ST/Covost2-TA-EN-test/sample_0.wav differ diff --git a/examples/ST/Covost2-TA-EN-test/sample_1.wav b/examples/ST/Covost2-TA-EN-test/sample_1.wav index c5ad72bcc3bdcbdc2b2b9a13b92c0e78ea2f6e95..a9c003707ecdee7e8918014b2f77a69adbe71d93 100644 Binary files a/examples/ST/Covost2-TA-EN-test/sample_1.wav and b/examples/ST/Covost2-TA-EN-test/sample_1.wav differ diff --git a/examples/ST/Covost2-TA-EN-test/sample_2.wav b/examples/ST/Covost2-TA-EN-test/sample_2.wav index ccf5eda02b57d943a995a0504dde21135376ff21..cd0ce42ab1336cc1591caa5cd56acf6db9afbc29 100644 Binary files a/examples/ST/Covost2-TA-EN-test/sample_2.wav and b/examples/ST/Covost2-TA-EN-test/sample_2.wav differ diff --git a/examples/ST/Covost2-TA-EN-test/state.json b/examples/ST/Covost2-TA-EN-test/state.json index 047e39432ccad934ad34d2ebdf861a7031e4ff96..c7aef0119e13ade230e9d5c50ca9b66df7193400 100644 --- a/examples/ST/Covost2-TA-EN-test/state.json +++ b/examples/ST/Covost2-TA-EN-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "ec8fc1af5b770f60", + "_fingerprint": "6f095ca26fe268ab", "_format_columns": [ "answer", "context", diff --git a/examples/ST/Covost2-ZH-EN-test/sample_0.wav b/examples/ST/Covost2-ZH-EN-test/sample_0.wav index 261eb86cee105db49f22654b63ce14980709edec..a0add517f30ee8b82cef1be3aba2d471645bd648 100644 Binary files a/examples/ST/Covost2-ZH-EN-test/sample_0.wav and b/examples/ST/Covost2-ZH-EN-test/sample_0.wav differ diff --git a/examples/ST/Covost2-ZH-EN-test/sample_1.wav b/examples/ST/Covost2-ZH-EN-test/sample_1.wav index 4bfd72806def045cf386666dcd2ca307040049ec..6fad51ce295d62bb0b68826f1d1f3c3f4a2756e9 100644 Binary files a/examples/ST/Covost2-ZH-EN-test/sample_1.wav and b/examples/ST/Covost2-ZH-EN-test/sample_1.wav differ diff --git a/examples/ST/Covost2-ZH-EN-test/sample_2.wav b/examples/ST/Covost2-ZH-EN-test/sample_2.wav index 1fd592e9eccd9f45016e64157bed001a02da4421..e7bebe13c8efed3bb9b8c9b9a46831c8152240b5 100644 Binary files a/examples/ST/Covost2-ZH-EN-test/sample_2.wav and b/examples/ST/Covost2-ZH-EN-test/sample_2.wav differ diff --git a/examples/ST/Covost2-ZH-EN-test/state.json b/examples/ST/Covost2-ZH-EN-test/state.json index e230ae52dd845127cf654fc006737e7883a1dcc9..4199c3b1fc42c18396a4f54e001d032ad37e6d97 100644 --- a/examples/ST/Covost2-ZH-EN-test/state.json +++ b/examples/ST/Covost2-ZH-EN-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "f049563334d9978e", + "_fingerprint": "977bd2807131826b", "_format_columns": [ "context", "instruction",