diff --git a/.gitattributes b/.gitattributes index 1d106ec848dda05696abd065e59c1d09fe637cc0..720577f6a623cf375ee10b6c1bb7576fecd96b96 100644 --- a/.gitattributes +++ b/.gitattributes @@ -52,3 +52,7 @@ examples/SQA/Spoken-Squad-v1/sample_0.wav filter=lfs diff=lfs merge=lfs -text examples/SQA/Spoken-Squad-v1/sample_1.wav filter=lfs diff=lfs merge=lfs -text examples/SQA/Spoken-Squad-v1/sample_2.wav filter=lfs diff=lfs merge=lfs -text examples/SQA/DREAM-TTS-MCQ-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/ASR/Tedlium3-Long-form-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text +examples/ASR/Tedlium3-Long-form-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text +examples/ASR/Tedlium3-Long-form-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text +examples/SQA/Public-SG-Speech-QA-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text diff --git a/examples/AC/AudioCaps-Test/sample_0.wav b/examples/AC/AudioCaps-Test/sample_0.wav index e37f34b9f07a7b6266957992824bab914ce15fe3..509a39869ae9101b674f191a5887448af94d2664 100644 Binary files a/examples/AC/AudioCaps-Test/sample_0.wav and b/examples/AC/AudioCaps-Test/sample_0.wav differ diff --git a/examples/AC/AudioCaps-Test/sample_1.wav b/examples/AC/AudioCaps-Test/sample_1.wav index 9f16faee9805c2cbf530d36958c99f82952f42f3..1c964ebcc0ac6e615f72aa92421b336272c5e5c2 100644 Binary files a/examples/AC/AudioCaps-Test/sample_1.wav and b/examples/AC/AudioCaps-Test/sample_1.wav differ diff --git a/examples/AC/AudioCaps-Test/sample_2.wav b/examples/AC/AudioCaps-Test/sample_2.wav index 95c13a1a0c7e8fce564fba1e7ecd506797ef83ed..1b6418b2410e278e532d31ea339e74f6a29585dc 100644 Binary files a/examples/AC/AudioCaps-Test/sample_2.wav and b/examples/AC/AudioCaps-Test/sample_2.wav differ diff --git a/examples/AC/AudioCaps-Test/state.json b/examples/AC/AudioCaps-Test/state.json index b334a469898f2333372cff0d2c4e4def008d271e..0cd0d4b3978e10a5f85d569ffc028dacc1886b74 100644 --- a/examples/AC/AudioCaps-Test/state.json +++ b/examples/AC/AudioCaps-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "e654a4081bc1365b", + "_fingerprint": "3729f6c56764c342", "_format_columns": [ "context", "instruction", diff --git a/examples/AC/WavCaps-Test/sample_0.wav b/examples/AC/WavCaps-Test/sample_0.wav index 86803de3706292dd68f1a49c6dc5b66662eabae8..08a85f269beade4c541e3f49a9b2518f31a95ed9 100644 Binary files a/examples/AC/WavCaps-Test/sample_0.wav and b/examples/AC/WavCaps-Test/sample_0.wav differ diff --git a/examples/AC/WavCaps-Test/sample_1.wav b/examples/AC/WavCaps-Test/sample_1.wav index 00182dc3b08c94349036bf2f6da3df783e7358dc..462d0c2d0189352555dd97182326599b27096f43 100644 Binary files a/examples/AC/WavCaps-Test/sample_1.wav and b/examples/AC/WavCaps-Test/sample_1.wav differ diff --git a/examples/AC/WavCaps-Test/sample_2.wav b/examples/AC/WavCaps-Test/sample_2.wav index a9c4f74921215f29b6e4e01b3ec87b98d4c64a04..62717178e93e79c98c5b81bc4122f65bc6b52efe 100644 Binary files a/examples/AC/WavCaps-Test/sample_2.wav and b/examples/AC/WavCaps-Test/sample_2.wav differ diff --git a/examples/AC/WavCaps-Test/state.json b/examples/AC/WavCaps-Test/state.json index 18c89274b45b312d6a38f3f6e978483c100b9db1..a3eb658121b9663c5f4d92aa705da048df185326 100644 --- a/examples/AC/WavCaps-Test/state.json +++ b/examples/AC/WavCaps-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "ce408e4cfa3eec8a", + "_fingerprint": "6c3c2a5f2db349d8", "_format_columns": [ "context", "instruction", diff --git a/examples/AQA/AudioCaps-QA-Test/dataset_info.json b/examples/AQA/AudioCaps-QA-Test/dataset_info.json index 927e9073f4fbf7e4c1b46bca4fd2f1dc1cb18fa6..c6d61c8e72325cf36fabc952fbec1ca42e49e5e8 100644 --- a/examples/AQA/AudioCaps-QA-Test/dataset_info.json +++ b/examples/AQA/AudioCaps-QA-Test/dataset_info.json @@ -122,6 +122,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/AQA/AudioCaps-QA-Test/sample_0.wav b/examples/AQA/AudioCaps-QA-Test/sample_0.wav index 773c98035b78950ae26994d76316a4a602446e17..59fcd56a181073f56c1f0fba45f0ccda8a6337e2 100644 Binary files a/examples/AQA/AudioCaps-QA-Test/sample_0.wav and b/examples/AQA/AudioCaps-QA-Test/sample_0.wav differ diff --git a/examples/AQA/AudioCaps-QA-Test/sample_1.wav b/examples/AQA/AudioCaps-QA-Test/sample_1.wav index 6963878bb2d524f1967934a9c615126ff93481b3..7ca320beb2ea16f32257519116b1f17d456f9bc4 100644 Binary files a/examples/AQA/AudioCaps-QA-Test/sample_1.wav and b/examples/AQA/AudioCaps-QA-Test/sample_1.wav differ diff --git a/examples/AQA/AudioCaps-QA-Test/sample_2.wav b/examples/AQA/AudioCaps-QA-Test/sample_2.wav index 40a5d2bd7b527602fccad9f3ff5e9844f0da45e4..6d062d0a489a8b0334deee75ebe530dbae953dd2 100644 Binary files a/examples/AQA/AudioCaps-QA-Test/sample_2.wav and b/examples/AQA/AudioCaps-QA-Test/sample_2.wav differ diff --git a/examples/AQA/AudioCaps-QA-Test/state.json b/examples/AQA/AudioCaps-QA-Test/state.json index 19d00ee6e5f98d5468330edb676d8a2f9f4159f0..35ad9ea556daeee418b93d6a77ad56c5cf801dec 100644 --- a/examples/AQA/AudioCaps-QA-Test/state.json +++ b/examples/AQA/AudioCaps-QA-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "b4d0bc420173574a", + "_fingerprint": "026dfac674d9ef77", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/AQA/Clotho-AQA-Test/dataset_info.json b/examples/AQA/Clotho-AQA-Test/dataset_info.json index 45210944633428553291fd44b5717fceb5809e5c..e584e27299bec6d9aa2eee45871c216c67ac17b0 100644 --- a/examples/AQA/Clotho-AQA-Test/dataset_info.json +++ b/examples/AQA/Clotho-AQA-Test/dataset_info.json @@ -32,97 +32,7 @@ "_type": "Value" } }, - "other_attributes": {}, - "salmonn_7b": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "wavllm_fairseq": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "Qwen2-Audio-7B-Instruct": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "whisper_large_v3_with_llama_3_8b_instruct": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - }, - "qwen_audio_chat": { - "answer": { - "dtype": "string", - "_type": "Value" - }, - "model_prediction": { - "dtype": "string", - "_type": "Value" - }, - "task_type": { - "dtype": "string", - "_type": "Value" - }, - "text": { - "dtype": "string", - "_type": "Value" - } - } + "other_attributes": {} }, "homepage": "", "license": "" diff --git a/examples/AQA/Clotho-AQA-Test/sample_0.wav b/examples/AQA/Clotho-AQA-Test/sample_0.wav index 71db83c4fd13beb21fc12baebabc3115bb12772f..c48c65eea211ef53e5929d3744cbd0e73fd166a0 100644 Binary files a/examples/AQA/Clotho-AQA-Test/sample_0.wav and b/examples/AQA/Clotho-AQA-Test/sample_0.wav differ diff --git a/examples/AQA/Clotho-AQA-Test/sample_1.wav b/examples/AQA/Clotho-AQA-Test/sample_1.wav index e840f36885be4f6991710701ff18840e62d6932e..b2a9524bb08edd5e5ab700fec3d49b610338efd2 100644 Binary files a/examples/AQA/Clotho-AQA-Test/sample_1.wav and b/examples/AQA/Clotho-AQA-Test/sample_1.wav differ diff --git a/examples/AQA/Clotho-AQA-Test/sample_2.wav b/examples/AQA/Clotho-AQA-Test/sample_2.wav index 32101df39657d72e131d224a23f61dde7aa6012f..ed8314310c19afb4235badd32f937feeb25387b5 100644 Binary files a/examples/AQA/Clotho-AQA-Test/sample_2.wav and b/examples/AQA/Clotho-AQA-Test/sample_2.wav differ diff --git a/examples/AQA/Clotho-AQA-Test/state.json b/examples/AQA/Clotho-AQA-Test/state.json index f914ee70c395c738a3b3d929a297026a62b87526..9cdc1447ea2f55e517f57bddb7f7fefb175a45b3 100644 --- a/examples/AQA/Clotho-AQA-Test/state.json +++ b/examples/AQA/Clotho-AQA-Test/state.json @@ -4,17 +4,12 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "3f05c76553bf311d", + "_fingerprint": "515a1722077187bd", "_format_columns": [ "context", "instruction", "answer", - "other_attributes", - "salmonn_7b", - "wavllm_fairseq", - "Qwen2-Audio-7B-Instruct", - "whisper_large_v3_with_llama_3_8b_instruct", - "qwen_audio_chat" + "other_attributes" ], "_format_kwargs": {}, "_format_type": null, diff --git a/examples/AQA/WavCaps-QA-Test/dataset_info.json b/examples/AQA/WavCaps-QA-Test/dataset_info.json index 8bf3ee660b297ef30a37b4ab559ea865bf89c9cf..72ceb742ffcaf0f6ff67811fa628b1e1c7a1167e 100644 --- a/examples/AQA/WavCaps-QA-Test/dataset_info.json +++ b/examples/AQA/WavCaps-QA-Test/dataset_info.json @@ -118,6 +118,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/AQA/WavCaps-QA-Test/sample_0.wav b/examples/AQA/WavCaps-QA-Test/sample_0.wav index c525931f97fb9fffd107ba1f10e02ff348ee733a..a7483f6a72f398b0b35db48322f6bcadb048867d 100644 Binary files a/examples/AQA/WavCaps-QA-Test/sample_0.wav and b/examples/AQA/WavCaps-QA-Test/sample_0.wav differ diff --git a/examples/AQA/WavCaps-QA-Test/sample_1.wav b/examples/AQA/WavCaps-QA-Test/sample_1.wav index cc63fae1bd2bda6151ab60da2b2f7903191a03f3..41cf6d0cc9f8990b3599985156b44550dcbc9dfb 100644 Binary files a/examples/AQA/WavCaps-QA-Test/sample_1.wav and b/examples/AQA/WavCaps-QA-Test/sample_1.wav differ diff --git a/examples/AQA/WavCaps-QA-Test/sample_2.wav b/examples/AQA/WavCaps-QA-Test/sample_2.wav index 27706824fec806662482ae7903c969db8a96c7ba..13f24a2d1bfc7e2be0519def176d968885691e74 100644 Binary files a/examples/AQA/WavCaps-QA-Test/sample_2.wav and b/examples/AQA/WavCaps-QA-Test/sample_2.wav differ diff --git a/examples/AQA/WavCaps-QA-Test/state.json b/examples/AQA/WavCaps-QA-Test/state.json index 30c3a60665bed0ad7c6140b5086efce6fdd486ec..89917a40550177f4b54b4e3fe0885df6d78d0aab 100644 --- a/examples/AQA/WavCaps-QA-Test/state.json +++ b/examples/AQA/WavCaps-QA-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "f88396310248e252", + "_fingerprint": "46b38bc22103a7cd", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/AR/VoxCeleb-Accent-Test/dataset_info.json b/examples/AR/VoxCeleb-Accent-Test/dataset_info.json index 50bebce9f3ed95a5e075f901d4038bf769c46d49..913e85b36737f9004f81286043e7493d6f61b737 100644 --- a/examples/AR/VoxCeleb-Accent-Test/dataset_info.json +++ b/examples/AR/VoxCeleb-Accent-Test/dataset_info.json @@ -126,6 +126,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/AR/VoxCeleb-Accent-Test/sample_0.wav b/examples/AR/VoxCeleb-Accent-Test/sample_0.wav index 4108fd6ea1fac52d76f936d237a7199833266c0b..637255b8ce4a76eab5145234259d4d9e27a7449d 100644 Binary files a/examples/AR/VoxCeleb-Accent-Test/sample_0.wav and b/examples/AR/VoxCeleb-Accent-Test/sample_0.wav differ diff --git a/examples/AR/VoxCeleb-Accent-Test/sample_1.wav b/examples/AR/VoxCeleb-Accent-Test/sample_1.wav index aa2f6e1d60f18910793c3ec63d2107616a88ecc4..81f38dc724603587c4c75c351b90f236b23a77d7 100644 Binary files a/examples/AR/VoxCeleb-Accent-Test/sample_1.wav and b/examples/AR/VoxCeleb-Accent-Test/sample_1.wav differ diff --git a/examples/AR/VoxCeleb-Accent-Test/sample_2.wav b/examples/AR/VoxCeleb-Accent-Test/sample_2.wav index f903c6c5ae2939f8f523c3113d25df8b1bb74d7d..f34db19cc48eb83b64600413f61e2e4eff07d2cd 100644 Binary files a/examples/AR/VoxCeleb-Accent-Test/sample_2.wav and b/examples/AR/VoxCeleb-Accent-Test/sample_2.wav differ diff --git a/examples/AR/VoxCeleb-Accent-Test/state.json b/examples/AR/VoxCeleb-Accent-Test/state.json index c69c32faf3ae084e16287c436f540a16578faf04..a0e4beb3d20ddc496c9863a6d3809657a69c09ea 100644 --- a/examples/AR/VoxCeleb-Accent-Test/state.json +++ b/examples/AR/VoxCeleb-Accent-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "7017504c8eeb5d71", + "_fingerprint": "f1df87f5b3ca8c97", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/ASR/Common-Voice-15-En-Test/dataset_info.json b/examples/ASR/Common-Voice-15-En-Test/dataset_info.json index 61c4c1050499fd3220dbfe6013858b008c8d9810..08f8bd6abcb7df02ab18d592990cc082baa8bfa3 100644 --- a/examples/ASR/Common-Voice-15-En-Test/dataset_info.json +++ b/examples/ASR/Common-Voice-15-En-Test/dataset_info.json @@ -34,11 +34,11 @@ }, "other_attributes": { "accents": { - "dtype": "string", + "dtype": "null", "_type": "Value" }, "age": { - "dtype": "string", + "dtype": "null", "_type": "Value" }, "client_id": { @@ -50,7 +50,7 @@ "_type": "Value" }, "gender": { - "dtype": "string", + "dtype": "null", "_type": "Value" }, "language": { diff --git a/examples/ASR/Common-Voice-15-En-Test/sample_0.wav b/examples/ASR/Common-Voice-15-En-Test/sample_0.wav index 98ad00ab81a597db93fb42634a7b173662c5402e..8e9932f1a577bd50d53c17860ba30589866112d8 100644 Binary files a/examples/ASR/Common-Voice-15-En-Test/sample_0.wav and b/examples/ASR/Common-Voice-15-En-Test/sample_0.wav differ diff --git a/examples/ASR/Common-Voice-15-En-Test/sample_1.wav b/examples/ASR/Common-Voice-15-En-Test/sample_1.wav index 2ff5b62f10f2614553c20a23a94c67dd8813f7c7..d17339bbdb7e343da7e8514f03bdbfe8d9ed399f 100644 Binary files a/examples/ASR/Common-Voice-15-En-Test/sample_1.wav and b/examples/ASR/Common-Voice-15-En-Test/sample_1.wav differ diff --git a/examples/ASR/Common-Voice-15-En-Test/sample_2.wav b/examples/ASR/Common-Voice-15-En-Test/sample_2.wav index 98eff0b7069351279a995d5e33aa12b83a109dd0..b760edbde1d6bfe80fcf77aecf7ac87aa21ecd4d 100644 Binary files a/examples/ASR/Common-Voice-15-En-Test/sample_2.wav and b/examples/ASR/Common-Voice-15-En-Test/sample_2.wav differ diff --git a/examples/ASR/Common-Voice-15-En-Test/state.json b/examples/ASR/Common-Voice-15-En-Test/state.json index 3064aa54130945bc783ab898011d0e84385e02fa..80d695eaf0d326aba5c1c42c08f6620b7711f3fc 100644 --- a/examples/ASR/Common-Voice-15-En-Test/state.json +++ b/examples/ASR/Common-Voice-15-En-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "6342d438049fbc7e", + "_fingerprint": "5a02a12eee6eb15a", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/Earnings21-Test/state.json b/examples/ASR/Earnings21-Test/state.json index 37a0a357127feb8eb1497306a502edb53a1e4eee..03ec809e2ddf9f2fe68b495f0237cb499f70be44 100644 --- a/examples/ASR/Earnings21-Test/state.json +++ b/examples/ASR/Earnings21-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "d0ad1703cbc51418", + "_fingerprint": "0d42a0f2cebd16d8", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/Earnings22-Test/state.json b/examples/ASR/Earnings22-Test/state.json index ad7966db6aa47d1e1dbff1f49d710fa0e2050484..fb5db0657dff508744e282d20abdbfcf783797eb 100644 --- a/examples/ASR/Earnings22-Test/state.json +++ b/examples/ASR/Earnings22-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "fb047ff90ed3a443", + "_fingerprint": "1427a3866fe2cb1a", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/GigaSpeech-Test/sample_0.wav b/examples/ASR/GigaSpeech-Test/sample_0.wav index 073483a0e4604323d0369ff2216b2a5765780823..d5b1a0190ab705e242079ac6552d760aa86442e3 100644 Binary files a/examples/ASR/GigaSpeech-Test/sample_0.wav and b/examples/ASR/GigaSpeech-Test/sample_0.wav differ diff --git a/examples/ASR/GigaSpeech-Test/sample_1.wav b/examples/ASR/GigaSpeech-Test/sample_1.wav index ca1b14f3ed3ea483c331f9acbaabde5b03c0b35b..8d4715d3ee2255ff632248568ef394a3cdf11417 100644 Binary files a/examples/ASR/GigaSpeech-Test/sample_1.wav and b/examples/ASR/GigaSpeech-Test/sample_1.wav differ diff --git a/examples/ASR/GigaSpeech-Test/sample_2.wav b/examples/ASR/GigaSpeech-Test/sample_2.wav index 11094ecca41233498c7b38e51b9b70971f113ff6..ce79e6d621393311dc585ed610534909eca05aa8 100644 Binary files a/examples/ASR/GigaSpeech-Test/sample_2.wav and b/examples/ASR/GigaSpeech-Test/sample_2.wav differ diff --git a/examples/ASR/GigaSpeech-Test/state.json b/examples/ASR/GigaSpeech-Test/state.json index dd846b8228bc8c6a5d6d88f7300ff35a6b51e5cb..34243fb62eea510a1aba5545b6b66d326f73dc0a 100644 --- a/examples/ASR/GigaSpeech-Test/state.json +++ b/examples/ASR/GigaSpeech-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "84a02614da440215", + "_fingerprint": "9527d9b9b39b34c3", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav b/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav index ea36e7a02a6771ea96e210107b81ef3a5e5cd791..bc8a46dd654a6ba711d10f8c3d4eda7c84bb20ce 100644 Binary files a/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav and b/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav differ diff --git a/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav b/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav index 553aa3569726cd67a40baf3f50c7b5e18f32cd74..12fed4125c25fc3c4377b72a77b37fdd7f4fc4a4 100644 Binary files a/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav and b/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav differ diff --git a/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav b/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav index e4a9780515675dbb05b95cff26fb5aea26fe9aa7..c4e2909a95a0a3090cab61cd24e9f534075fc8aa 100644 Binary files a/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav and b/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav differ diff --git a/examples/ASR/IMDA-Part1-ASR-Test/state.json b/examples/ASR/IMDA-Part1-ASR-Test/state.json index d8919f5bfc220aed30d07a397179f30478ac3176..6105b023680c0feaa933a66b55356fd3dd70a71b 100644 --- a/examples/ASR/IMDA-Part1-ASR-Test/state.json +++ b/examples/ASR/IMDA-Part1-ASR-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "1514e693988caee7", + "_fingerprint": "e4d91fe35602a9e0", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav b/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav index 1784a0b78a165892ba01586b04974a15dda4eea3..d5c33dee5bf21565cf381e88d3c4a1ca6dbebe78 100644 Binary files a/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav and b/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav differ diff --git a/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav b/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav index 1fde75aea28f1557cd2fe3d80434d8b394f116ac..164c99dcf823a95655abe8aed5a11745da3d55d7 100644 Binary files a/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav and b/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav differ diff --git a/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav b/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav index 260cd5f87122a500ebf3b014bb2c323cc1200abe..f945ea7be22586a84f70fc5fe138402953d608ce 100644 Binary files a/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav and b/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav differ diff --git a/examples/ASR/IMDA-Part2-ASR-Test/state.json b/examples/ASR/IMDA-Part2-ASR-Test/state.json index 21299bb3dfc3a9f29854e5a39c5e2130dfca6bae..8583a6d8b377ebe373993a561db34452f3d28444 100644 --- a/examples/ASR/IMDA-Part2-ASR-Test/state.json +++ b/examples/ASR/IMDA-Part2-ASR-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "8a8e117080f24a8b", + "_fingerprint": "0ed051d84878e4e9", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav b/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav index 2a103bec37460090a764f280ebcf4791ccd17d4e..7f30ab438a700b7e52038787753e5e93d1b4de39 100644 Binary files a/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav and b/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav b/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav index 60af90099e82c9115285de4897e38412a9440cac..e040ea7d8261366c44b0e9a7ddaca8f8c8044621 100644 Binary files a/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav and b/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav b/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav index d44035f1bacdce219e44056683944fd9cfecf7eb..23229886de2e3f6fb8823fe797326027011f3f09 100644 Binary files a/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav and b/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Clean/state.json b/examples/ASR/LibriSpeech-Test-Clean/state.json index 103a2443a4c9e5355798ee842efb63b6bf286af8..e31a1168b4cff86848674d4197e432b9899e0739 100644 --- a/examples/ASR/LibriSpeech-Test-Clean/state.json +++ b/examples/ASR/LibriSpeech-Test-Clean/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "cb0a09e53f0cc5db", + "_fingerprint": "db499491d573fb1e", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/LibriSpeech-Test-Other/sample_0.wav b/examples/ASR/LibriSpeech-Test-Other/sample_0.wav index 595f7ba137749fd1578688365357e230351f6660..1343a8bf6091855e6ca7569ef96e2064b3cb69c8 100644 Binary files a/examples/ASR/LibriSpeech-Test-Other/sample_0.wav and b/examples/ASR/LibriSpeech-Test-Other/sample_0.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Other/sample_1.wav b/examples/ASR/LibriSpeech-Test-Other/sample_1.wav index 948927ee05b269f31a635b2682751b4f0d50a8c5..108ec3d17493f7321e6ba6553c94dc62fa96d24b 100644 Binary files a/examples/ASR/LibriSpeech-Test-Other/sample_1.wav and b/examples/ASR/LibriSpeech-Test-Other/sample_1.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Other/sample_2.wav b/examples/ASR/LibriSpeech-Test-Other/sample_2.wav index 82b4b2afd731d0c2a1501e34c5689cff431955a0..c5737246f32031af2551b79359c9e5704e49df1f 100644 Binary files a/examples/ASR/LibriSpeech-Test-Other/sample_2.wav and b/examples/ASR/LibriSpeech-Test-Other/sample_2.wav differ diff --git a/examples/ASR/LibriSpeech-Test-Other/state.json b/examples/ASR/LibriSpeech-Test-Other/state.json index f296f217907538df40fe8adfabb15d59e5f8d326..c73a8c0f165fb6c789d2533084a09b301b9e0d1a 100644 --- a/examples/ASR/LibriSpeech-Test-Other/state.json +++ b/examples/ASR/LibriSpeech-Test-Other/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "0ed80d8ca27350ce", + "_fingerprint": "e751a89ce4227535", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/Peoples-Speech-Test/sample_0.wav b/examples/ASR/Peoples-Speech-Test/sample_0.wav index d18ce818679d916f7d285ab41c4eb0b4c64ab80a..0534e049487d4a5e44133719598cecc5fe9fc23a 100644 Binary files a/examples/ASR/Peoples-Speech-Test/sample_0.wav and b/examples/ASR/Peoples-Speech-Test/sample_0.wav differ diff --git a/examples/ASR/Peoples-Speech-Test/sample_1.wav b/examples/ASR/Peoples-Speech-Test/sample_1.wav index 7b6d8264d363621884f39814ea63380cb64434a4..5b34837973bb7e663e05d6f623e1957baebdc905 100644 Binary files a/examples/ASR/Peoples-Speech-Test/sample_1.wav and b/examples/ASR/Peoples-Speech-Test/sample_1.wav differ diff --git a/examples/ASR/Peoples-Speech-Test/sample_2.wav b/examples/ASR/Peoples-Speech-Test/sample_2.wav index ec498dbdb1c5d0bac49a03779da1f995bd621c07..beca31e7418576a9fc43d747be077a95e2353450 100644 Binary files a/examples/ASR/Peoples-Speech-Test/sample_2.wav and b/examples/ASR/Peoples-Speech-Test/sample_2.wav differ diff --git a/examples/ASR/Peoples-Speech-Test/state.json b/examples/ASR/Peoples-Speech-Test/state.json index 41de0c42892f63c7ec1efb30e3d62a0be9f877a8..632eca89f3c4fea70a453e57a3349323c699797f 100644 --- a/examples/ASR/Peoples-Speech-Test/state.json +++ b/examples/ASR/Peoples-Speech-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "bf71989dac1baa0c", + "_fingerprint": "d07f97bfff42b092", "_format_columns": [ "context", "instruction", diff --git a/examples/ASR/Tedlium3-Long-form-Test/dataset_info.json b/examples/ASR/Tedlium3-Long-form-Test/dataset_info.json new file mode 100644 index 0000000000000000000000000000000000000000..cf9965f85a972806830fe14f38cc250366f94118 --- /dev/null +++ b/examples/ASR/Tedlium3-Long-form-Test/dataset_info.json @@ -0,0 +1,164 @@ +{ + "citation": "", + "description": "", + "features": { + "context": { + "text": { + "dtype": "string", + "_type": "Value" + }, + "audio": { + "sampling_rate": 16000, + "_type": "Audio" + } + }, + "instruction": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "answer": { + "audio": { + "dtype": "null", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "other_attributes": { + "file": { + "dtype": "string", + "_type": "Value" + }, + "gender": { + "dtype": "int64", + "_type": "Value" + }, + "id": { + "dtype": "string", + "_type": "Value" + }, + "speaker_id": { + "dtype": "string", + "_type": "Value" + } + }, + "salmonn_7b": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "wavllm_fairseq": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "Qwen2-Audio-7B-Instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "whisper_large_v3_with_llama_3_8b_instruct": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, + "qwen_audio_chat": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + } + }, + "homepage": "", + "license": "" +} \ No newline at end of file diff --git a/examples/ASR/Tedlium3-Long-form-Test/state.json b/examples/ASR/Tedlium3-Long-form-Test/state.json new file mode 100644 index 0000000000000000000000000000000000000000..30eac41b1222be656086486a4bb4030c8b8e7c9a --- /dev/null +++ b/examples/ASR/Tedlium3-Long-form-Test/state.json @@ -0,0 +1,24 @@ +{ + "_data_files": [ + { + "filename": "data-00000-of-00001.arrow" + } + ], + "_fingerprint": "ffcb019ec304c5cd", + "_format_columns": [ + "context", + "instruction", + "answer", + "other_attributes", + "salmonn_7b", + "wavllm_fairseq", + "Qwen2-Audio-7B-Instruct", + "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", + "qwen_audio_chat" + ], + "_format_kwargs": {}, + "_format_type": null, + "_output_all_columns": false, + "_split": null +} \ No newline at end of file diff --git a/examples/ASR/Tedlium3-Test/sample_0.wav b/examples/ASR/Tedlium3-Test/sample_0.wav index 94362f231d21545289d87680f8580d7dd2de71e8..7f9c12706b47e8985cfae1d8e3a33bbb2b93351e 100644 Binary files a/examples/ASR/Tedlium3-Test/sample_0.wav and b/examples/ASR/Tedlium3-Test/sample_0.wav differ diff --git a/examples/ASR/Tedlium3-Test/sample_1.wav b/examples/ASR/Tedlium3-Test/sample_1.wav index 8a73e7a2a9ec7b50f172450c75d9d16a2b7c0987..f5bdad33f96732f6068f5c0ae1ccfd191e658f14 100644 Binary files a/examples/ASR/Tedlium3-Test/sample_1.wav and b/examples/ASR/Tedlium3-Test/sample_1.wav differ diff --git a/examples/ASR/Tedlium3-Test/sample_2.wav b/examples/ASR/Tedlium3-Test/sample_2.wav index 79460a0b295f5b0ee22515fcbb7fcdee5ce83816..650c17709acccb1026eb3f3926caddac87575039 100644 Binary files a/examples/ASR/Tedlium3-Test/sample_2.wav and b/examples/ASR/Tedlium3-Test/sample_2.wav differ diff --git a/examples/ASR/Tedlium3-Test/state.json b/examples/ASR/Tedlium3-Test/state.json index 5db8543827fbeb809394b23b9163137b6d9f7b71..c31686ed45f77e866be12eb356f308537d01d8c5 100644 --- a/examples/ASR/Tedlium3-Test/state.json +++ b/examples/ASR/Tedlium3-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "33116bb8d5c8b7bf", + "_fingerprint": "6fde2d47e7fcba36", "_format_columns": [ "context", "instruction", diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/dataset_info.json b/examples/CNASR/Aishell-ASR-ZH-Test/dataset_info.json index 53a69797f0e56551f11dd7fc754525541592260d..051243e1e6b3046a83599b80eb901679ff2608d8 100644 --- a/examples/CNASR/Aishell-ASR-ZH-Test/dataset_info.json +++ b/examples/CNASR/Aishell-ASR-ZH-Test/dataset_info.json @@ -102,6 +102,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/sample_0.wav b/examples/CNASR/Aishell-ASR-ZH-Test/sample_0.wav index e60df4c6f8b76165aa68bc4690c4890d7bb06ca0..a9896d7452058a68f5fb36a098fecb80a61f0179 100644 Binary files a/examples/CNASR/Aishell-ASR-ZH-Test/sample_0.wav and b/examples/CNASR/Aishell-ASR-ZH-Test/sample_0.wav differ diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/sample_1.wav b/examples/CNASR/Aishell-ASR-ZH-Test/sample_1.wav index b7e3953ebefb7d00f2f53a128da4132ebb400f17..2b463dad5a36bdd326d83dde600f460252e97213 100644 Binary files a/examples/CNASR/Aishell-ASR-ZH-Test/sample_1.wav and b/examples/CNASR/Aishell-ASR-ZH-Test/sample_1.wav differ diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/sample_2.wav b/examples/CNASR/Aishell-ASR-ZH-Test/sample_2.wav index 9e7ace99cf7ee186c5ff75811dd2825faf401818..1e797f2f358af46173549971074f7b393c6ea266 100644 Binary files a/examples/CNASR/Aishell-ASR-ZH-Test/sample_2.wav and b/examples/CNASR/Aishell-ASR-ZH-Test/sample_2.wav differ diff --git a/examples/CNASR/Aishell-ASR-ZH-Test/state.json b/examples/CNASR/Aishell-ASR-ZH-Test/state.json index 362c37b973a7be7bfabe4775b6a93b2f3e038e13..928046ee4e58edb3f83df389b509f6c73998aa2f 100644 --- a/examples/CNASR/Aishell-ASR-ZH-Test/state.json +++ b/examples/CNASR/Aishell-ASR-ZH-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "9df39c289a58da05", + "_fingerprint": "2f95a38020869f6f", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/ER/IEMOCAP-Emotion-Test/dataset_info.json b/examples/ER/IEMOCAP-Emotion-Test/dataset_info.json index 6ee0bdbe32fc2abb53eb378278667ae7c65706ee..ae1585a301a57eef40e3c39259d56a7e70e2be43 100644 --- a/examples/ER/IEMOCAP-Emotion-Test/dataset_info.json +++ b/examples/ER/IEMOCAP-Emotion-Test/dataset_info.json @@ -126,6 +126,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/ER/IEMOCAP-Emotion-Test/sample_0.wav b/examples/ER/IEMOCAP-Emotion-Test/sample_0.wav index 9663058cec25e36c80c34ec1b437201c0ab36405..f7fbcec204ce9cad676607ba5d3ecc1164ff7cd7 100644 Binary files a/examples/ER/IEMOCAP-Emotion-Test/sample_0.wav and b/examples/ER/IEMOCAP-Emotion-Test/sample_0.wav differ diff --git a/examples/ER/IEMOCAP-Emotion-Test/sample_1.wav b/examples/ER/IEMOCAP-Emotion-Test/sample_1.wav index d0f66c928e3eec7d9344ae8c1a1b1e953128ded8..1a821ca8f4d90e16681d927d1bad127bbc131232 100644 Binary files a/examples/ER/IEMOCAP-Emotion-Test/sample_1.wav and b/examples/ER/IEMOCAP-Emotion-Test/sample_1.wav differ diff --git a/examples/ER/IEMOCAP-Emotion-Test/sample_2.wav b/examples/ER/IEMOCAP-Emotion-Test/sample_2.wav index 28a9ee62e794164d0e553f55ad644ff6d93439f4..9c332fd914253d28f3a7b4851e34ce672c7fb4da 100644 Binary files a/examples/ER/IEMOCAP-Emotion-Test/sample_2.wav and b/examples/ER/IEMOCAP-Emotion-Test/sample_2.wav differ diff --git a/examples/ER/IEMOCAP-Emotion-Test/state.json b/examples/ER/IEMOCAP-Emotion-Test/state.json index b9b947f65b2b046e7afd6aff693633ea6b20392a..4cd3ac71d16a1ccd9aefa29c3c207cb5e00e0dec 100644 --- a/examples/ER/IEMOCAP-Emotion-Test/state.json +++ b/examples/ER/IEMOCAP-Emotion-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "11c1cad506e32e3a", + "_fingerprint": "f660f47d60092a28", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/ER/MELD-Emotion-Test/dataset_info.json b/examples/ER/MELD-Emotion-Test/dataset_info.json index d1510e9c9bcd20bf4791aa110420e7c00f194d06..506cf34c0d7158134159a3234f9f98b8e6b74f28 100644 --- a/examples/ER/MELD-Emotion-Test/dataset_info.json +++ b/examples/ER/MELD-Emotion-Test/dataset_info.json @@ -142,6 +142,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/ER/MELD-Emotion-Test/sample_0.wav b/examples/ER/MELD-Emotion-Test/sample_0.wav index 1290f04beb0655f325c70a66d139e91a327d44d6..66b28d69b04ddcdbf6649355a50b7431e068b6c1 100644 Binary files a/examples/ER/MELD-Emotion-Test/sample_0.wav and b/examples/ER/MELD-Emotion-Test/sample_0.wav differ diff --git a/examples/ER/MELD-Emotion-Test/sample_1.wav b/examples/ER/MELD-Emotion-Test/sample_1.wav index 6102c50ca250d0aea84867bff8a8447030794c74..8b65e8917f9c613b9ace25b5cdf2213b1cc5885c 100644 Binary files a/examples/ER/MELD-Emotion-Test/sample_1.wav and b/examples/ER/MELD-Emotion-Test/sample_1.wav differ diff --git a/examples/ER/MELD-Emotion-Test/sample_2.wav b/examples/ER/MELD-Emotion-Test/sample_2.wav index a14d5451f401e7479f3b88d3d98d3c7542aaf060..b5212fef08bd6083b353d3de4ca02e557b0895ff 100644 Binary files a/examples/ER/MELD-Emotion-Test/sample_2.wav and b/examples/ER/MELD-Emotion-Test/sample_2.wav differ diff --git a/examples/ER/MELD-Emotion-Test/state.json b/examples/ER/MELD-Emotion-Test/state.json index 5fea2bae49831f0cb5cb4c00d5d87f97de7c6781..4cd00e9f41fc5acfa195cacd42f4b6d869bcfdba 100644 --- a/examples/ER/MELD-Emotion-Test/state.json +++ b/examples/ER/MELD-Emotion-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "b26eb8a553b6391d", + "_fingerprint": "8ed652b090e9b45d", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/ER/MELD-Sentiment-Test/dataset_info.json b/examples/ER/MELD-Sentiment-Test/dataset_info.json index d1510e9c9bcd20bf4791aa110420e7c00f194d06..506cf34c0d7158134159a3234f9f98b8e6b74f28 100644 --- a/examples/ER/MELD-Sentiment-Test/dataset_info.json +++ b/examples/ER/MELD-Sentiment-Test/dataset_info.json @@ -142,6 +142,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/ER/MELD-Sentiment-Test/sample_0.wav b/examples/ER/MELD-Sentiment-Test/sample_0.wav index a41e977ceb90459a322dd63128ffbfbd8bef57d1..70c2320d56f8b078431ee6b90da30c838aa0a3b5 100644 Binary files a/examples/ER/MELD-Sentiment-Test/sample_0.wav and b/examples/ER/MELD-Sentiment-Test/sample_0.wav differ diff --git a/examples/ER/MELD-Sentiment-Test/sample_1.wav b/examples/ER/MELD-Sentiment-Test/sample_1.wav index 2231159f9ca4b08102222cfe0aa216f816bf682d..9030fb4ce73e38966b2ec587b110d811c1cf4adc 100644 Binary files a/examples/ER/MELD-Sentiment-Test/sample_1.wav and b/examples/ER/MELD-Sentiment-Test/sample_1.wav differ diff --git a/examples/ER/MELD-Sentiment-Test/sample_2.wav b/examples/ER/MELD-Sentiment-Test/sample_2.wav index c01afd9a33375108c80c071096b918a45e3f0023..0e25c2ff0f76cf0fbd9a24f091b7317d8f577fa2 100644 Binary files a/examples/ER/MELD-Sentiment-Test/sample_2.wav and b/examples/ER/MELD-Sentiment-Test/sample_2.wav differ diff --git a/examples/ER/MELD-Sentiment-Test/state.json b/examples/ER/MELD-Sentiment-Test/state.json index c151f23c31d06eaed079d3301e8b0dd23727cb93..0732040b40ce0a76abeca37ca83769b790adaebd 100644 --- a/examples/ER/MELD-Sentiment-Test/state.json +++ b/examples/ER/MELD-Sentiment-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "b95b9f7545a82910", + "_fingerprint": "12b7f85ef427fcec", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/GR/IEMOCAP-Gender-Test/dataset_info.json b/examples/GR/IEMOCAP-Gender-Test/dataset_info.json index 6ee0bdbe32fc2abb53eb378278667ae7c65706ee..ae1585a301a57eef40e3c39259d56a7e70e2be43 100644 --- a/examples/GR/IEMOCAP-Gender-Test/dataset_info.json +++ b/examples/GR/IEMOCAP-Gender-Test/dataset_info.json @@ -126,6 +126,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/GR/IEMOCAP-Gender-Test/sample_0.wav b/examples/GR/IEMOCAP-Gender-Test/sample_0.wav index b2fdff74b94ec6049256083752dec82c55fdd73f..b14c47968a74f9c8bfda57605b321677db3a951b 100644 Binary files a/examples/GR/IEMOCAP-Gender-Test/sample_0.wav and b/examples/GR/IEMOCAP-Gender-Test/sample_0.wav differ diff --git a/examples/GR/IEMOCAP-Gender-Test/sample_1.wav b/examples/GR/IEMOCAP-Gender-Test/sample_1.wav index 0bc8ed44e57f6996d20cd15880fcdc8e1b6e80c6..f14fe7b4e7bebf1ba0ea4671bd54061c9aa690af 100644 Binary files a/examples/GR/IEMOCAP-Gender-Test/sample_1.wav and b/examples/GR/IEMOCAP-Gender-Test/sample_1.wav differ diff --git a/examples/GR/IEMOCAP-Gender-Test/sample_2.wav b/examples/GR/IEMOCAP-Gender-Test/sample_2.wav index 900b477cbf7e25f5ec19c3ec6150fe577d25e600..d4e718fbd94d944b8dedc542c4032539ebcd5bf7 100644 Binary files a/examples/GR/IEMOCAP-Gender-Test/sample_2.wav and b/examples/GR/IEMOCAP-Gender-Test/sample_2.wav differ diff --git a/examples/GR/IEMOCAP-Gender-Test/state.json b/examples/GR/IEMOCAP-Gender-Test/state.json index 56e93d6ac7909fbb8ec066090da0fd4eea1cbaa6..b556f362aef4e6bfad3e7f0959553733a304f250 100644 --- a/examples/GR/IEMOCAP-Gender-Test/state.json +++ b/examples/GR/IEMOCAP-Gender-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "fd5ddfda82c4aaa9", + "_fingerprint": "42f5f6e863c92512", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/GR/VoxCeleb-Gender-Test/dataset_info.json b/examples/GR/VoxCeleb-Gender-Test/dataset_info.json index 50bebce9f3ed95a5e075f901d4038bf769c46d49..913e85b36737f9004f81286043e7493d6f61b737 100644 --- a/examples/GR/VoxCeleb-Gender-Test/dataset_info.json +++ b/examples/GR/VoxCeleb-Gender-Test/dataset_info.json @@ -126,6 +126,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/GR/VoxCeleb-Gender-Test/sample_0.wav b/examples/GR/VoxCeleb-Gender-Test/sample_0.wav index c011ce7fcde186b0e52ac1a4646a86924f2614dc..f834b4c7fa0bbc42f9ae00985df35f41be1eba1e 100644 Binary files a/examples/GR/VoxCeleb-Gender-Test/sample_0.wav and b/examples/GR/VoxCeleb-Gender-Test/sample_0.wav differ diff --git a/examples/GR/VoxCeleb-Gender-Test/sample_1.wav b/examples/GR/VoxCeleb-Gender-Test/sample_1.wav index b7862b32a295e4d9716f99115f35df0b9188b931..318509e9dd39ef9205be46c24307cffb31a89523 100644 Binary files a/examples/GR/VoxCeleb-Gender-Test/sample_1.wav and b/examples/GR/VoxCeleb-Gender-Test/sample_1.wav differ diff --git a/examples/GR/VoxCeleb-Gender-Test/sample_2.wav b/examples/GR/VoxCeleb-Gender-Test/sample_2.wav index c4018fc0c75db7e6b96e7ae41af0e1e605d53630..887e042ebd793961f567457f41b6a9ed19107ba1 100644 Binary files a/examples/GR/VoxCeleb-Gender-Test/sample_2.wav and b/examples/GR/VoxCeleb-Gender-Test/sample_2.wav differ diff --git a/examples/GR/VoxCeleb-Gender-Test/state.json b/examples/GR/VoxCeleb-Gender-Test/state.json index 3c601cf05613a921e17d2000e8c76f38f319136f..3fd1c757fa6496421e73ae77b5da8ed09efa38f7 100644 --- a/examples/GR/VoxCeleb-Gender-Test/state.json +++ b/examples/GR/VoxCeleb-Gender-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "38fda24672a2ee02", + "_fingerprint": "7951265b5c594ce6", "_format_columns": [ "context", "instruction", @@ -14,6 +14,7 @@ "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", "whisper_large_v3_with_llama_3_8b_instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/SI/ALPACA-Audio-Test/sample_0.wav b/examples/SI/ALPACA-Audio-Test/sample_0.wav index 9d59769e693ba8186a468c920bf33aed6415b9a0..1d3e06ea09dc44a7e851372270ae926ca09161ca 100644 Binary files a/examples/SI/ALPACA-Audio-Test/sample_0.wav and b/examples/SI/ALPACA-Audio-Test/sample_0.wav differ diff --git a/examples/SI/ALPACA-Audio-Test/sample_1.wav b/examples/SI/ALPACA-Audio-Test/sample_1.wav index 9fa77dc3a558295c617e6ba5f9c77d4a744f18fe..63423226a7fea5c7d3cd5cdf708415493411a2ad 100644 Binary files a/examples/SI/ALPACA-Audio-Test/sample_1.wav and b/examples/SI/ALPACA-Audio-Test/sample_1.wav differ diff --git a/examples/SI/ALPACA-Audio-Test/sample_2.wav b/examples/SI/ALPACA-Audio-Test/sample_2.wav index e7446eb105a62c5223dc799407a78a303ffa3273..aee526d870dcfea114cbe77a58f2b906db3a1846 100644 Binary files a/examples/SI/ALPACA-Audio-Test/sample_2.wav and b/examples/SI/ALPACA-Audio-Test/sample_2.wav differ diff --git a/examples/SI/ALPACA-Audio-Test/state.json b/examples/SI/ALPACA-Audio-Test/state.json index 3b1309df321667faea7ee972e3d92df95efd575f..ec1bcb6f73e7f696b90634ca1416212deb70497c 100644 --- a/examples/SI/ALPACA-Audio-Test/state.json +++ b/examples/SI/ALPACA-Audio-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "c6a29ee9e25361a7", + "_fingerprint": "60ec3d739f1f264e", "_format_columns": [ "context", "instruction", diff --git a/examples/SI/OpenHermes-Audio-Test/sample_0.wav b/examples/SI/OpenHermes-Audio-Test/sample_0.wav index 58c30ae68669cf5651f7dcd57e28e73b9347f677..b848eaa0197df53f0c66a042dd2cf0907a18adb6 100644 Binary files a/examples/SI/OpenHermes-Audio-Test/sample_0.wav and b/examples/SI/OpenHermes-Audio-Test/sample_0.wav differ diff --git a/examples/SI/OpenHermes-Audio-Test/sample_1.wav b/examples/SI/OpenHermes-Audio-Test/sample_1.wav index 58ffc5f5c796a545f6ec10417f286ad13acc64cd..8990c2638a876f50eabb711a3c65e07809a7aa48 100644 Binary files a/examples/SI/OpenHermes-Audio-Test/sample_1.wav and b/examples/SI/OpenHermes-Audio-Test/sample_1.wav differ diff --git a/examples/SI/OpenHermes-Audio-Test/sample_2.wav b/examples/SI/OpenHermes-Audio-Test/sample_2.wav index 7d013e06ccb1a7b66fa1371cd7dd25a0067c6424..f2d8572e54eeabe52f6e37c3d285d5b1a39060e6 100644 Binary files a/examples/SI/OpenHermes-Audio-Test/sample_2.wav and b/examples/SI/OpenHermes-Audio-Test/sample_2.wav differ diff --git a/examples/SI/OpenHermes-Audio-Test/state.json b/examples/SI/OpenHermes-Audio-Test/state.json index 83993faa18ef244ce7cd82d1a262742737e1b751..3e62882afeb4c4672ca7b438517834bcc61c2771 100644 --- a/examples/SI/OpenHermes-Audio-Test/state.json +++ b/examples/SI/OpenHermes-Audio-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "44d9e5a3612e0df7", + "_fingerprint": "19108060d5e74733", "_format_columns": [ "context", "instruction", diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/sample_0.wav b/examples/SQA/CN-College-Listen-MCQ-Test/sample_0.wav index b383286458adcd302192c1a8840bf8adf43b1c41..2c71842e0c47f529cde47eadbdc21dac098eca8d 100644 Binary files a/examples/SQA/CN-College-Listen-MCQ-Test/sample_0.wav and b/examples/SQA/CN-College-Listen-MCQ-Test/sample_0.wav differ diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav b/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav index fcdf3095b8c9b36fd8d1140fabc9c22e506ce290..02faddd4fba70d832d7b726a181b922e974bd1ac 100644 Binary files a/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav and b/examples/SQA/CN-College-Listen-MCQ-Test/sample_1.wav differ diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav b/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav index 3674d26ff67ffee9fbf6cd64bd00af74906a37d0..86ba0d6fedd50d874900f973f86967cdf9c54ab4 100644 Binary files a/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav and b/examples/SQA/CN-College-Listen-MCQ-Test/sample_2.wav differ diff --git a/examples/SQA/CN-College-Listen-MCQ-Test/state.json b/examples/SQA/CN-College-Listen-MCQ-Test/state.json index c6cc629f7c25521767ac482dbd1e71307ff9c443..023c37fec316be4e2d15a4c442c4e4ec31bc090a 100644 --- a/examples/SQA/CN-College-Listen-MCQ-Test/state.json +++ b/examples/SQA/CN-College-Listen-MCQ-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "efbbc5b180ee96e0", + "_fingerprint": "ea9d9086266315b3", "_format_columns": [ "context", "instruction", diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav b/examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav index 8f6275ca50ba72f0800b5392777d63d5d1655a2f..701eaadff036dbe4397ac3d3e5cf953cdafe2492 100644 Binary files a/examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav and b/examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav differ diff --git a/examples/SQA/DREAM-TTS-MCQ-Test/state.json b/examples/SQA/DREAM-TTS-MCQ-Test/state.json index 9839f8b445e8be16c0eacbf29b36a184b7e09cf0..60f24b2e2e0fc0f619402aedaba80fe1a6102aa6 100644 --- a/examples/SQA/DREAM-TTS-MCQ-Test/state.json +++ b/examples/SQA/DREAM-TTS-MCQ-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "1414881d04766772", + "_fingerprint": "8d4dbaf0bbc46c99", "_format_columns": [ "answer", "context", diff --git a/examples/SQA/Public-SG-Speech-QA-Test/state.json b/examples/SQA/Public-SG-Speech-QA-Test/state.json index cb60981a9b1ca32983da76e04178d039a7507568..966e1f2dfe5fba436d6ddb0873b91c39ee0f9530 100644 --- a/examples/SQA/Public-SG-Speech-QA-Test/state.json +++ b/examples/SQA/Public-SG-Speech-QA-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "794fdda22fd93c7f", + "_fingerprint": "4d4e665c9f359042", "_format_columns": [ "context", "instruction", diff --git a/examples/SQA/SLUE-P2-SQA5-Test/state.json b/examples/SQA/SLUE-P2-SQA5-Test/state.json index 2c3aad4c97f82ceb8147cd280eaa55e3d858b29d..e79576fcdc5892cb2e18d1920422d90537058f9b 100644 --- a/examples/SQA/SLUE-P2-SQA5-Test/state.json +++ b/examples/SQA/SLUE-P2-SQA5-Test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "8a250cf20de7599a", + "_fingerprint": "0bc180e1898c34b5", "_format_columns": [ "context", "instruction", diff --git a/examples/SQA/Spoken-Squad-v1/state.json b/examples/SQA/Spoken-Squad-v1/state.json index 64ffce20f87758120e69473703b377a002af8178..42b62cba113239f33738787c3c7f730b0a75d8a8 100644 --- a/examples/SQA/Spoken-Squad-v1/state.json +++ b/examples/SQA/Spoken-Squad-v1/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "d63972b6ec648239", + "_fingerprint": "035eac078f4d4679", "_format_columns": [ "context", "instruction", diff --git a/examples/ST/Covost2-EN-ID-test/dataset_info.json b/examples/ST/Covost2-EN-ID-test/dataset_info.json index ef861e441fd29533e55b11f801c64226c20967ab..75447219133e63a2e07347f4f15add29dc2f358f 100644 --- a/examples/ST/Covost2-EN-ID-test/dataset_info.json +++ b/examples/ST/Covost2-EN-ID-test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/ST/Covost2-EN-ID-test/sample_0.wav b/examples/ST/Covost2-EN-ID-test/sample_0.wav index d5e0e0fde0f3a0f727836b4600f298dcce5f5498..aaa73891ca80e6daf935a96362b99724bf30e5f2 100644 Binary files a/examples/ST/Covost2-EN-ID-test/sample_0.wav and b/examples/ST/Covost2-EN-ID-test/sample_0.wav differ diff --git a/examples/ST/Covost2-EN-ID-test/sample_1.wav b/examples/ST/Covost2-EN-ID-test/sample_1.wav index 61aa557a0cdcfce43ee5f8372561e10281165065..8bb713755533c9c9c40fd7bc6fb97ceccc71b5c1 100644 Binary files a/examples/ST/Covost2-EN-ID-test/sample_1.wav and b/examples/ST/Covost2-EN-ID-test/sample_1.wav differ diff --git a/examples/ST/Covost2-EN-ID-test/sample_2.wav b/examples/ST/Covost2-EN-ID-test/sample_2.wav index 11ccbae6576ed4e45dc7474750cefeafcdb868e8..e82baf335e4809fb09b4fd7447b6c96cad4e289f 100644 Binary files a/examples/ST/Covost2-EN-ID-test/sample_2.wav and b/examples/ST/Covost2-EN-ID-test/sample_2.wav differ diff --git a/examples/ST/Covost2-EN-ID-test/state.json b/examples/ST/Covost2-EN-ID-test/state.json index d47a7c80857a3221d45df3297070e256739fb389..d35329d7cd163f8aeae7496f3cfa3ebdc423220f 100644 --- a/examples/ST/Covost2-EN-ID-test/state.json +++ b/examples/ST/Covost2-EN-ID-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "5a70ead6af05628d", + "_fingerprint": "b3bfe1ac88ff72a0", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/ST/Covost2-EN-TA-test/dataset_info.json b/examples/ST/Covost2-EN-TA-test/dataset_info.json index ef861e441fd29533e55b11f801c64226c20967ab..75447219133e63a2e07347f4f15add29dc2f358f 100644 --- a/examples/ST/Covost2-EN-TA-test/dataset_info.json +++ b/examples/ST/Covost2-EN-TA-test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/ST/Covost2-EN-TA-test/sample_0.wav b/examples/ST/Covost2-EN-TA-test/sample_0.wav index a513de5aebf55fa19b2e0d001fe3ce20ca911915..0c62dd1aab9919d9bcd776efbe484e001b592598 100644 Binary files a/examples/ST/Covost2-EN-TA-test/sample_0.wav and b/examples/ST/Covost2-EN-TA-test/sample_0.wav differ diff --git a/examples/ST/Covost2-EN-TA-test/sample_1.wav b/examples/ST/Covost2-EN-TA-test/sample_1.wav index 57b916b8b61a81249911a0350a97f73cc6ec8eec..4246c85fb21814573dbb736d188584a42e63f29f 100644 Binary files a/examples/ST/Covost2-EN-TA-test/sample_1.wav and b/examples/ST/Covost2-EN-TA-test/sample_1.wav differ diff --git a/examples/ST/Covost2-EN-TA-test/sample_2.wav b/examples/ST/Covost2-EN-TA-test/sample_2.wav index 044e3082961777fbb0dc86467636b15484ada4ed..d339cd3dc96f925f24a1f7501e4645c8844fb369 100644 Binary files a/examples/ST/Covost2-EN-TA-test/sample_2.wav and b/examples/ST/Covost2-EN-TA-test/sample_2.wav differ diff --git a/examples/ST/Covost2-EN-TA-test/state.json b/examples/ST/Covost2-EN-TA-test/state.json index 4d4c647d8ae69a5c7dba811c69508b500e4ba97f..8ca1326d120d940e69fbea2e66a9174a5d88f864 100644 --- a/examples/ST/Covost2-EN-TA-test/state.json +++ b/examples/ST/Covost2-EN-TA-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "963b050ae0788de8", + "_fingerprint": "76e8db43e251f03a", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/ST/Covost2-EN-ZH-test/dataset_info.json b/examples/ST/Covost2-EN-ZH-test/dataset_info.json index ef861e441fd29533e55b11f801c64226c20967ab..75447219133e63a2e07347f4f15add29dc2f358f 100644 --- a/examples/ST/Covost2-EN-ZH-test/dataset_info.json +++ b/examples/ST/Covost2-EN-ZH-test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/ST/Covost2-EN-ZH-test/sample_0.wav b/examples/ST/Covost2-EN-ZH-test/sample_0.wav index a8b3faa66b5b5db50c9ce92155ba3f8a032531e0..a5ace1d21eb77c249ac8bf9f268b8f1bfec5519a 100644 Binary files a/examples/ST/Covost2-EN-ZH-test/sample_0.wav and b/examples/ST/Covost2-EN-ZH-test/sample_0.wav differ diff --git a/examples/ST/Covost2-EN-ZH-test/sample_1.wav b/examples/ST/Covost2-EN-ZH-test/sample_1.wav index 38b0b2d51c6283ae60b36b713dc5196a7c72ce91..8c40d7e137c6f005a0b3fd087f5b68aff125adc7 100644 Binary files a/examples/ST/Covost2-EN-ZH-test/sample_1.wav and b/examples/ST/Covost2-EN-ZH-test/sample_1.wav differ diff --git a/examples/ST/Covost2-EN-ZH-test/sample_2.wav b/examples/ST/Covost2-EN-ZH-test/sample_2.wav index 7cd489f895ab73e1307c1f5773bcd21ae9d41b9a..b1ce2588bbd2cd289ea7b7d3996c95a904eeb06d 100644 Binary files a/examples/ST/Covost2-EN-ZH-test/sample_2.wav and b/examples/ST/Covost2-EN-ZH-test/sample_2.wav differ diff --git a/examples/ST/Covost2-EN-ZH-test/state.json b/examples/ST/Covost2-EN-ZH-test/state.json index 7495f64755a1258d0257220bff2dbfd7a5c45d03..cf87654cc0e72ceb65b8e71d7428d648a2f1549b 100644 --- a/examples/ST/Covost2-EN-ZH-test/state.json +++ b/examples/ST/Covost2-EN-ZH-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "d72058f978d82043", + "_fingerprint": "d27fe19e989510df", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/ST/Covost2-ID-EN-test/dataset_info.json b/examples/ST/Covost2-ID-EN-test/dataset_info.json index ef861e441fd29533e55b11f801c64226c20967ab..75447219133e63a2e07347f4f15add29dc2f358f 100644 --- a/examples/ST/Covost2-ID-EN-test/dataset_info.json +++ b/examples/ST/Covost2-ID-EN-test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/ST/Covost2-ID-EN-test/sample_0.wav b/examples/ST/Covost2-ID-EN-test/sample_0.wav index 047d6f5b5944901143e9688985a729d939c86726..c5fc387f794ec6644cd2c2a04cbced7d49950f5f 100644 Binary files a/examples/ST/Covost2-ID-EN-test/sample_0.wav and b/examples/ST/Covost2-ID-EN-test/sample_0.wav differ diff --git a/examples/ST/Covost2-ID-EN-test/sample_1.wav b/examples/ST/Covost2-ID-EN-test/sample_1.wav index eb062e116031609643e0a8552418642bfb9ce897..89cea5e89bc92eb93b885643c1e250e7a987a73c 100644 Binary files a/examples/ST/Covost2-ID-EN-test/sample_1.wav and b/examples/ST/Covost2-ID-EN-test/sample_1.wav differ diff --git a/examples/ST/Covost2-ID-EN-test/sample_2.wav b/examples/ST/Covost2-ID-EN-test/sample_2.wav index 7359a1c0c3a1bbf05a89d5c9185476f2358525bc..bbcdcfd796d393d9d227a57fe543b8e442b1d94f 100644 Binary files a/examples/ST/Covost2-ID-EN-test/sample_2.wav and b/examples/ST/Covost2-ID-EN-test/sample_2.wav differ diff --git a/examples/ST/Covost2-ID-EN-test/state.json b/examples/ST/Covost2-ID-EN-test/state.json index 844be100a289ab53d963dadc96479f514727d41f..060cb29ce89ace31ec246bf22af9ae83459382e3 100644 --- a/examples/ST/Covost2-ID-EN-test/state.json +++ b/examples/ST/Covost2-ID-EN-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "19ed09e0714aabbd", + "_fingerprint": "1ad122cc3b0e20fe", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/ST/Covost2-TA-EN-test/dataset_info.json b/examples/ST/Covost2-TA-EN-test/dataset_info.json index cc82e8b905c06d273e25883741d47917c177bd34..0a34b7bc1ac66816e090968b4de959503e2c4168 100644 --- a/examples/ST/Covost2-TA-EN-test/dataset_info.json +++ b/examples/ST/Covost2-TA-EN-test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/ST/Covost2-TA-EN-test/sample_0.wav b/examples/ST/Covost2-TA-EN-test/sample_0.wav index 15edf0463a09e091fc40e61fca7310e7bb939466..070bd162c47080814b4afe6d0ff0c1f629926ff7 100644 Binary files a/examples/ST/Covost2-TA-EN-test/sample_0.wav and b/examples/ST/Covost2-TA-EN-test/sample_0.wav differ diff --git a/examples/ST/Covost2-TA-EN-test/sample_1.wav b/examples/ST/Covost2-TA-EN-test/sample_1.wav index 72c56721fa7293f283d4b063dbf2422001398dad..c5ad72bcc3bdcbdc2b2b9a13b92c0e78ea2f6e95 100644 Binary files a/examples/ST/Covost2-TA-EN-test/sample_1.wav and b/examples/ST/Covost2-TA-EN-test/sample_1.wav differ diff --git a/examples/ST/Covost2-TA-EN-test/sample_2.wav b/examples/ST/Covost2-TA-EN-test/sample_2.wav index 8beb82265b0970569af53ba0ffd245a32d8c7bdd..ccf5eda02b57d943a995a0504dde21135376ff21 100644 Binary files a/examples/ST/Covost2-TA-EN-test/sample_2.wav and b/examples/ST/Covost2-TA-EN-test/sample_2.wav differ diff --git a/examples/ST/Covost2-TA-EN-test/state.json b/examples/ST/Covost2-TA-EN-test/state.json index 34a773ffdcc01254866b2bf3a2a70e8666ff271f..047e39432ccad934ad34d2ebdf861a7031e4ff96 100644 --- a/examples/ST/Covost2-TA-EN-test/state.json +++ b/examples/ST/Covost2-TA-EN-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "92c0c02259ae086c", + "_fingerprint": "ec8fc1af5b770f60", "_format_columns": [ "answer", "context", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {}, diff --git a/examples/ST/Covost2-ZH-EN-test/dataset_info.json b/examples/ST/Covost2-ZH-EN-test/dataset_info.json index ef861e441fd29533e55b11f801c64226c20967ab..75447219133e63a2e07347f4f15add29dc2f358f 100644 --- a/examples/ST/Covost2-ZH-EN-test/dataset_info.json +++ b/examples/ST/Covost2-ZH-EN-test/dataset_info.json @@ -112,6 +112,24 @@ "_type": "Value" } }, + "mowe_audio": { + "answer": { + "dtype": "string", + "_type": "Value" + }, + "model_prediction": { + "dtype": "string", + "_type": "Value" + }, + "task_type": { + "dtype": "string", + "_type": "Value" + }, + "text": { + "dtype": "string", + "_type": "Value" + } + }, "qwen_audio_chat": { "answer": { "dtype": "string", diff --git a/examples/ST/Covost2-ZH-EN-test/sample_0.wav b/examples/ST/Covost2-ZH-EN-test/sample_0.wav index 6772be89c460f498224f8144709671e29394097a..261eb86cee105db49f22654b63ce14980709edec 100644 Binary files a/examples/ST/Covost2-ZH-EN-test/sample_0.wav and b/examples/ST/Covost2-ZH-EN-test/sample_0.wav differ diff --git a/examples/ST/Covost2-ZH-EN-test/sample_1.wav b/examples/ST/Covost2-ZH-EN-test/sample_1.wav index 618d4f4466396098c6b8682092affe1bd70393db..4bfd72806def045cf386666dcd2ca307040049ec 100644 Binary files a/examples/ST/Covost2-ZH-EN-test/sample_1.wav and b/examples/ST/Covost2-ZH-EN-test/sample_1.wav differ diff --git a/examples/ST/Covost2-ZH-EN-test/sample_2.wav b/examples/ST/Covost2-ZH-EN-test/sample_2.wav index 97ddc1a4d27a100758de33c67691b498c292dd0f..1fd592e9eccd9f45016e64157bed001a02da4421 100644 Binary files a/examples/ST/Covost2-ZH-EN-test/sample_2.wav and b/examples/ST/Covost2-ZH-EN-test/sample_2.wav differ diff --git a/examples/ST/Covost2-ZH-EN-test/state.json b/examples/ST/Covost2-ZH-EN-test/state.json index a6bf27b40c1391c887b487fcc106fe5eda0ed77a..e230ae52dd845127cf654fc006737e7883a1dcc9 100644 --- a/examples/ST/Covost2-ZH-EN-test/state.json +++ b/examples/ST/Covost2-ZH-EN-test/state.json @@ -4,7 +4,7 @@ "filename": "data-00000-of-00001.arrow" } ], - "_fingerprint": "24de82e05fd4827e", + "_fingerprint": "f049563334d9978e", "_format_columns": [ "context", "instruction", @@ -13,6 +13,7 @@ "salmonn_7b", "wavllm_fairseq", "Qwen2-Audio-7B-Instruct", + "mowe_audio", "qwen_audio_chat" ], "_format_kwargs": {},