zhuohan-7 commited on
Commit
e80994e
·
1 Parent(s): 3348106

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. examples/AC/AudioCaps-Test/dataset_info.json +18 -0
  3. examples/AC/AudioCaps-Test/sample_0.wav +0 -0
  4. examples/AC/AudioCaps-Test/sample_1.wav +0 -0
  5. examples/AC/AudioCaps-Test/sample_2.wav +0 -0
  6. examples/AC/AudioCaps-Test/state.json +2 -1
  7. examples/AC/WavCaps-Test/dataset_info.json +18 -0
  8. examples/AC/WavCaps-Test/sample_0.wav +0 -0
  9. examples/AC/WavCaps-Test/sample_1.wav +0 -0
  10. examples/AC/WavCaps-Test/sample_2.wav +0 -0
  11. examples/AC/WavCaps-Test/state.json +2 -1
  12. examples/ASR/Common-Voice-15-En-Test/dataset_info.json +21 -3
  13. examples/ASR/Common-Voice-15-En-Test/sample_0.wav +0 -0
  14. examples/ASR/Common-Voice-15-En-Test/sample_1.wav +0 -0
  15. examples/ASR/Common-Voice-15-En-Test/sample_2.wav +0 -0
  16. examples/ASR/Common-Voice-15-En-Test/state.json +2 -1
  17. examples/ASR/Earnings21-Test/dataset_info.json +18 -0
  18. examples/ASR/Earnings21-Test/state.json +2 -1
  19. examples/ASR/Earnings22-Test/dataset_info.json +18 -0
  20. examples/ASR/Earnings22-Test/state.json +2 -1
  21. examples/ASR/GigaSpeech-Test/dataset_info.json +18 -0
  22. examples/ASR/GigaSpeech-Test/sample_0.wav +0 -0
  23. examples/ASR/GigaSpeech-Test/sample_1.wav +0 -0
  24. examples/ASR/GigaSpeech-Test/sample_2.wav +0 -0
  25. examples/ASR/GigaSpeech-Test/state.json +2 -1
  26. examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav +0 -0
  27. examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav +0 -0
  28. examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav +0 -0
  29. examples/ASR/IMDA-Part1-ASR-Test/state.json +1 -1
  30. examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav +0 -0
  31. examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav +0 -0
  32. examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav +0 -0
  33. examples/ASR/IMDA-Part2-ASR-Test/state.json +1 -1
  34. examples/ASR/LibriSpeech-Test-Clean/dataset_info.json +18 -0
  35. examples/ASR/LibriSpeech-Test-Clean/sample_0.wav +0 -0
  36. examples/ASR/LibriSpeech-Test-Clean/sample_1.wav +0 -0
  37. examples/ASR/LibriSpeech-Test-Clean/sample_2.wav +0 -0
  38. examples/ASR/LibriSpeech-Test-Clean/state.json +2 -1
  39. examples/ASR/LibriSpeech-Test-Other/dataset_info.json +18 -0
  40. examples/ASR/LibriSpeech-Test-Other/sample_0.wav +0 -0
  41. examples/ASR/LibriSpeech-Test-Other/sample_1.wav +0 -0
  42. examples/ASR/LibriSpeech-Test-Other/sample_2.wav +0 -0
  43. examples/ASR/LibriSpeech-Test-Other/state.json +2 -1
  44. examples/ASR/Peoples-Speech-Test/dataset_info.json +18 -0
  45. examples/ASR/Peoples-Speech-Test/sample_0.wav +0 -0
  46. examples/ASR/Peoples-Speech-Test/sample_1.wav +0 -0
  47. examples/ASR/Peoples-Speech-Test/sample_2.wav +0 -0
  48. examples/ASR/Peoples-Speech-Test/state.json +2 -1
  49. examples/ASR/Tedlium3-Longform-Test/dataset_info.json +18 -0
  50. examples/ASR/Tedlium3-Longform-Test/state.json +2 -1
.gitattributes CHANGED
@@ -51,3 +51,4 @@ examples/SQA/SLUE-P2-SQA5-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text
51
  examples/SQA/Spoken-Squad-v1/sample_0.wav filter=lfs diff=lfs merge=lfs -text
52
  examples/SQA/Spoken-Squad-v1/sample_1.wav filter=lfs diff=lfs merge=lfs -text
53
  examples/SQA/Spoken-Squad-v1/sample_2.wav filter=lfs diff=lfs merge=lfs -text
 
 
51
  examples/SQA/Spoken-Squad-v1/sample_0.wav filter=lfs diff=lfs merge=lfs -text
52
  examples/SQA/Spoken-Squad-v1/sample_1.wav filter=lfs diff=lfs merge=lfs -text
53
  examples/SQA/Spoken-Squad-v1/sample_2.wav filter=lfs diff=lfs merge=lfs -text
54
+ examples/SQA/DREAM-TTS-MCQ-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text
examples/AC/AudioCaps-Test/dataset_info.json CHANGED
@@ -118,6 +118,24 @@
118
  "_type": "Value"
119
  }
120
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  "qwen_audio_chat": {
122
  "answer": {
123
  "dtype": "string",
 
118
  "_type": "Value"
119
  }
120
  },
121
+ "mowe_audio": {
122
+ "answer": {
123
+ "dtype": "string",
124
+ "_type": "Value"
125
+ },
126
+ "model_prediction": {
127
+ "dtype": "string",
128
+ "_type": "Value"
129
+ },
130
+ "task_type": {
131
+ "dtype": "string",
132
+ "_type": "Value"
133
+ },
134
+ "text": {
135
+ "dtype": "string",
136
+ "_type": "Value"
137
+ }
138
+ },
139
  "qwen_audio_chat": {
140
  "answer": {
141
  "dtype": "string",
examples/AC/AudioCaps-Test/sample_0.wav CHANGED
Binary files a/examples/AC/AudioCaps-Test/sample_0.wav and b/examples/AC/AudioCaps-Test/sample_0.wav differ
 
examples/AC/AudioCaps-Test/sample_1.wav CHANGED
Binary files a/examples/AC/AudioCaps-Test/sample_1.wav and b/examples/AC/AudioCaps-Test/sample_1.wav differ
 
examples/AC/AudioCaps-Test/sample_2.wav CHANGED
Binary files a/examples/AC/AudioCaps-Test/sample_2.wav and b/examples/AC/AudioCaps-Test/sample_2.wav differ
 
examples/AC/AudioCaps-Test/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "0e301916c3676d35",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
@@ -14,6 +14,7 @@
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
 
17
  "qwen_audio_chat"
18
  ],
19
  "_format_kwargs": {},
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "e654a4081bc1365b",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
 
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
17
+ "mowe_audio",
18
  "qwen_audio_chat"
19
  ],
20
  "_format_kwargs": {},
examples/AC/WavCaps-Test/dataset_info.json CHANGED
@@ -114,6 +114,24 @@
114
  "_type": "Value"
115
  }
116
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  "qwen_audio_chat": {
118
  "answer": {
119
  "dtype": "string",
 
114
  "_type": "Value"
115
  }
116
  },
117
+ "mowe_audio": {
118
+ "answer": {
119
+ "dtype": "string",
120
+ "_type": "Value"
121
+ },
122
+ "model_prediction": {
123
+ "dtype": "string",
124
+ "_type": "Value"
125
+ },
126
+ "task_type": {
127
+ "dtype": "string",
128
+ "_type": "Value"
129
+ },
130
+ "text": {
131
+ "dtype": "string",
132
+ "_type": "Value"
133
+ }
134
+ },
135
  "qwen_audio_chat": {
136
  "answer": {
137
  "dtype": "string",
examples/AC/WavCaps-Test/sample_0.wav CHANGED
Binary files a/examples/AC/WavCaps-Test/sample_0.wav and b/examples/AC/WavCaps-Test/sample_0.wav differ
 
examples/AC/WavCaps-Test/sample_1.wav CHANGED
Binary files a/examples/AC/WavCaps-Test/sample_1.wav and b/examples/AC/WavCaps-Test/sample_1.wav differ
 
examples/AC/WavCaps-Test/sample_2.wav CHANGED
Binary files a/examples/AC/WavCaps-Test/sample_2.wav and b/examples/AC/WavCaps-Test/sample_2.wav differ
 
examples/AC/WavCaps-Test/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "1e570096603c2a32",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
@@ -14,6 +14,7 @@
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
 
17
  "qwen_audio_chat"
18
  ],
19
  "_format_kwargs": {},
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "ce408e4cfa3eec8a",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
 
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
17
+ "mowe_audio",
18
  "qwen_audio_chat"
19
  ],
20
  "_format_kwargs": {},
examples/ASR/Common-Voice-15-En-Test/dataset_info.json CHANGED
@@ -34,11 +34,11 @@
34
  },
35
  "other_attributes": {
36
  "accents": {
37
- "dtype": "null",
38
  "_type": "Value"
39
  },
40
  "age": {
41
- "dtype": "null",
42
  "_type": "Value"
43
  },
44
  "client_id": {
@@ -50,7 +50,7 @@
50
  "_type": "Value"
51
  },
52
  "gender": {
53
- "dtype": "null",
54
  "_type": "Value"
55
  },
56
  "language": {
@@ -146,6 +146,24 @@
146
  "_type": "Value"
147
  }
148
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  "qwen_audio_chat": {
150
  "answer": {
151
  "dtype": "string",
 
34
  },
35
  "other_attributes": {
36
  "accents": {
37
+ "dtype": "string",
38
  "_type": "Value"
39
  },
40
  "age": {
41
+ "dtype": "string",
42
  "_type": "Value"
43
  },
44
  "client_id": {
 
50
  "_type": "Value"
51
  },
52
  "gender": {
53
+ "dtype": "string",
54
  "_type": "Value"
55
  },
56
  "language": {
 
146
  "_type": "Value"
147
  }
148
  },
149
+ "mowe_audio": {
150
+ "answer": {
151
+ "dtype": "string",
152
+ "_type": "Value"
153
+ },
154
+ "model_prediction": {
155
+ "dtype": "string",
156
+ "_type": "Value"
157
+ },
158
+ "task_type": {
159
+ "dtype": "string",
160
+ "_type": "Value"
161
+ },
162
+ "text": {
163
+ "dtype": "string",
164
+ "_type": "Value"
165
+ }
166
+ },
167
  "qwen_audio_chat": {
168
  "answer": {
169
  "dtype": "string",
examples/ASR/Common-Voice-15-En-Test/sample_0.wav CHANGED
Binary files a/examples/ASR/Common-Voice-15-En-Test/sample_0.wav and b/examples/ASR/Common-Voice-15-En-Test/sample_0.wav differ
 
examples/ASR/Common-Voice-15-En-Test/sample_1.wav CHANGED
Binary files a/examples/ASR/Common-Voice-15-En-Test/sample_1.wav and b/examples/ASR/Common-Voice-15-En-Test/sample_1.wav differ
 
examples/ASR/Common-Voice-15-En-Test/sample_2.wav CHANGED
Binary files a/examples/ASR/Common-Voice-15-En-Test/sample_2.wav and b/examples/ASR/Common-Voice-15-En-Test/sample_2.wav differ
 
examples/ASR/Common-Voice-15-En-Test/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "30218d56801da2e8",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
@@ -14,6 +14,7 @@
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
 
17
  "qwen_audio_chat"
18
  ],
19
  "_format_kwargs": {},
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "6342d438049fbc7e",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
 
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
17
+ "mowe_audio",
18
  "qwen_audio_chat"
19
  ],
20
  "_format_kwargs": {},
examples/ASR/Earnings21-Test/dataset_info.json CHANGED
@@ -110,6 +110,24 @@
110
  "_type": "Value"
111
  }
112
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  "qwen_audio_chat": {
114
  "answer": {
115
  "dtype": "string",
 
110
  "_type": "Value"
111
  }
112
  },
113
+ "mowe_audio": {
114
+ "answer": {
115
+ "dtype": "string",
116
+ "_type": "Value"
117
+ },
118
+ "model_prediction": {
119
+ "dtype": "string",
120
+ "_type": "Value"
121
+ },
122
+ "task_type": {
123
+ "dtype": "string",
124
+ "_type": "Value"
125
+ },
126
+ "text": {
127
+ "dtype": "string",
128
+ "_type": "Value"
129
+ }
130
+ },
131
  "qwen_audio_chat": {
132
  "answer": {
133
  "dtype": "string",
examples/ASR/Earnings21-Test/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "2e3dea299b387757",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
@@ -14,6 +14,7 @@
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
 
17
  "qwen_audio_chat"
18
  ],
19
  "_format_kwargs": {},
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "d0ad1703cbc51418",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
 
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
17
+ "mowe_audio",
18
  "qwen_audio_chat"
19
  ],
20
  "_format_kwargs": {},
examples/ASR/Earnings22-Test/dataset_info.json CHANGED
@@ -110,6 +110,24 @@
110
  "_type": "Value"
111
  }
112
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  "qwen_audio_chat": {
114
  "answer": {
115
  "dtype": "string",
 
110
  "_type": "Value"
111
  }
112
  },
113
+ "mowe_audio": {
114
+ "answer": {
115
+ "dtype": "string",
116
+ "_type": "Value"
117
+ },
118
+ "model_prediction": {
119
+ "dtype": "string",
120
+ "_type": "Value"
121
+ },
122
+ "task_type": {
123
+ "dtype": "string",
124
+ "_type": "Value"
125
+ },
126
+ "text": {
127
+ "dtype": "string",
128
+ "_type": "Value"
129
+ }
130
+ },
131
  "qwen_audio_chat": {
132
  "answer": {
133
  "dtype": "string",
examples/ASR/Earnings22-Test/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "c2ddf91e8ccb230c",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
@@ -14,6 +14,7 @@
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
 
17
  "qwen_audio_chat"
18
  ],
19
  "_format_kwargs": {},
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "fb047ff90ed3a443",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
 
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
17
+ "mowe_audio",
18
  "qwen_audio_chat"
19
  ],
20
  "_format_kwargs": {},
examples/ASR/GigaSpeech-Test/dataset_info.json CHANGED
@@ -138,6 +138,24 @@
138
  "_type": "Value"
139
  }
140
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  "qwen_audio_chat": {
142
  "answer": {
143
  "dtype": "string",
 
138
  "_type": "Value"
139
  }
140
  },
141
+ "mowe_audio": {
142
+ "answer": {
143
+ "dtype": "string",
144
+ "_type": "Value"
145
+ },
146
+ "model_prediction": {
147
+ "dtype": "string",
148
+ "_type": "Value"
149
+ },
150
+ "task_type": {
151
+ "dtype": "string",
152
+ "_type": "Value"
153
+ },
154
+ "text": {
155
+ "dtype": "string",
156
+ "_type": "Value"
157
+ }
158
+ },
159
  "qwen_audio_chat": {
160
  "answer": {
161
  "dtype": "string",
examples/ASR/GigaSpeech-Test/sample_0.wav CHANGED
Binary files a/examples/ASR/GigaSpeech-Test/sample_0.wav and b/examples/ASR/GigaSpeech-Test/sample_0.wav differ
 
examples/ASR/GigaSpeech-Test/sample_1.wav CHANGED
Binary files a/examples/ASR/GigaSpeech-Test/sample_1.wav and b/examples/ASR/GigaSpeech-Test/sample_1.wav differ
 
examples/ASR/GigaSpeech-Test/sample_2.wav CHANGED
Binary files a/examples/ASR/GigaSpeech-Test/sample_2.wav and b/examples/ASR/GigaSpeech-Test/sample_2.wav differ
 
examples/ASR/GigaSpeech-Test/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "0032f92a85e94025",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
@@ -14,6 +14,7 @@
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
 
17
  "qwen_audio_chat"
18
  ],
19
  "_format_kwargs": {},
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "84a02614da440215",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
 
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
17
+ "mowe_audio",
18
  "qwen_audio_chat"
19
  ],
20
  "_format_kwargs": {},
examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav CHANGED
Binary files a/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav and b/examples/ASR/IMDA-Part1-ASR-Test/sample_0.wav differ
 
examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav CHANGED
Binary files a/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav and b/examples/ASR/IMDA-Part1-ASR-Test/sample_1.wav differ
 
examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav CHANGED
Binary files a/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav and b/examples/ASR/IMDA-Part1-ASR-Test/sample_2.wav differ
 
examples/ASR/IMDA-Part1-ASR-Test/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "9b9c1437475afa9d",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "1514e693988caee7",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav CHANGED
Binary files a/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav and b/examples/ASR/IMDA-Part2-ASR-Test/sample_0.wav differ
 
examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav CHANGED
Binary files a/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav and b/examples/ASR/IMDA-Part2-ASR-Test/sample_1.wav differ
 
examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav CHANGED
Binary files a/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav and b/examples/ASR/IMDA-Part2-ASR-Test/sample_2.wav differ
 
examples/ASR/IMDA-Part2-ASR-Test/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "1b048a2e54d0c002",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "8a8e117080f24a8b",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
examples/ASR/LibriSpeech-Test-Clean/dataset_info.json CHANGED
@@ -122,6 +122,24 @@
122
  "_type": "Value"
123
  }
124
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  "qwen_audio_chat": {
126
  "answer": {
127
  "dtype": "string",
 
122
  "_type": "Value"
123
  }
124
  },
125
+ "mowe_audio": {
126
+ "answer": {
127
+ "dtype": "string",
128
+ "_type": "Value"
129
+ },
130
+ "model_prediction": {
131
+ "dtype": "string",
132
+ "_type": "Value"
133
+ },
134
+ "task_type": {
135
+ "dtype": "string",
136
+ "_type": "Value"
137
+ },
138
+ "text": {
139
+ "dtype": "string",
140
+ "_type": "Value"
141
+ }
142
+ },
143
  "qwen_audio_chat": {
144
  "answer": {
145
  "dtype": "string",
examples/ASR/LibriSpeech-Test-Clean/sample_0.wav CHANGED
Binary files a/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav and b/examples/ASR/LibriSpeech-Test-Clean/sample_0.wav differ
 
examples/ASR/LibriSpeech-Test-Clean/sample_1.wav CHANGED
Binary files a/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav and b/examples/ASR/LibriSpeech-Test-Clean/sample_1.wav differ
 
examples/ASR/LibriSpeech-Test-Clean/sample_2.wav CHANGED
Binary files a/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav and b/examples/ASR/LibriSpeech-Test-Clean/sample_2.wav differ
 
examples/ASR/LibriSpeech-Test-Clean/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "c56d3af03a1dc565",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
@@ -14,6 +14,7 @@
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
 
17
  "qwen_audio_chat"
18
  ],
19
  "_format_kwargs": {},
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "cb0a09e53f0cc5db",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
 
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
17
+ "mowe_audio",
18
  "qwen_audio_chat"
19
  ],
20
  "_format_kwargs": {},
examples/ASR/LibriSpeech-Test-Other/dataset_info.json CHANGED
@@ -122,6 +122,24 @@
122
  "_type": "Value"
123
  }
124
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  "qwen_audio_chat": {
126
  "answer": {
127
  "dtype": "string",
 
122
  "_type": "Value"
123
  }
124
  },
125
+ "mowe_audio": {
126
+ "answer": {
127
+ "dtype": "string",
128
+ "_type": "Value"
129
+ },
130
+ "model_prediction": {
131
+ "dtype": "string",
132
+ "_type": "Value"
133
+ },
134
+ "task_type": {
135
+ "dtype": "string",
136
+ "_type": "Value"
137
+ },
138
+ "text": {
139
+ "dtype": "string",
140
+ "_type": "Value"
141
+ }
142
+ },
143
  "qwen_audio_chat": {
144
  "answer": {
145
  "dtype": "string",
examples/ASR/LibriSpeech-Test-Other/sample_0.wav CHANGED
Binary files a/examples/ASR/LibriSpeech-Test-Other/sample_0.wav and b/examples/ASR/LibriSpeech-Test-Other/sample_0.wav differ
 
examples/ASR/LibriSpeech-Test-Other/sample_1.wav CHANGED
Binary files a/examples/ASR/LibriSpeech-Test-Other/sample_1.wav and b/examples/ASR/LibriSpeech-Test-Other/sample_1.wav differ
 
examples/ASR/LibriSpeech-Test-Other/sample_2.wav CHANGED
Binary files a/examples/ASR/LibriSpeech-Test-Other/sample_2.wav and b/examples/ASR/LibriSpeech-Test-Other/sample_2.wav differ
 
examples/ASR/LibriSpeech-Test-Other/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "5fc28a37097fe19f",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
@@ -14,6 +14,7 @@
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
 
17
  "qwen_audio_chat"
18
  ],
19
  "_format_kwargs": {},
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "0ed80d8ca27350ce",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
 
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
17
+ "mowe_audio",
18
  "qwen_audio_chat"
19
  ],
20
  "_format_kwargs": {},
examples/ASR/Peoples-Speech-Test/dataset_info.json CHANGED
@@ -114,6 +114,24 @@
114
  "_type": "Value"
115
  }
116
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  "qwen_audio_chat": {
118
  "answer": {
119
  "dtype": "string",
 
114
  "_type": "Value"
115
  }
116
  },
117
+ "mowe_audio": {
118
+ "answer": {
119
+ "dtype": "string",
120
+ "_type": "Value"
121
+ },
122
+ "model_prediction": {
123
+ "dtype": "string",
124
+ "_type": "Value"
125
+ },
126
+ "task_type": {
127
+ "dtype": "string",
128
+ "_type": "Value"
129
+ },
130
+ "text": {
131
+ "dtype": "string",
132
+ "_type": "Value"
133
+ }
134
+ },
135
  "qwen_audio_chat": {
136
  "answer": {
137
  "dtype": "string",
examples/ASR/Peoples-Speech-Test/sample_0.wav CHANGED
Binary files a/examples/ASR/Peoples-Speech-Test/sample_0.wav and b/examples/ASR/Peoples-Speech-Test/sample_0.wav differ
 
examples/ASR/Peoples-Speech-Test/sample_1.wav CHANGED
Binary files a/examples/ASR/Peoples-Speech-Test/sample_1.wav and b/examples/ASR/Peoples-Speech-Test/sample_1.wav differ
 
examples/ASR/Peoples-Speech-Test/sample_2.wav CHANGED
Binary files a/examples/ASR/Peoples-Speech-Test/sample_2.wav and b/examples/ASR/Peoples-Speech-Test/sample_2.wav differ
 
examples/ASR/Peoples-Speech-Test/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "6203edc47e9a3c56",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
@@ -14,6 +14,7 @@
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
 
17
  "qwen_audio_chat"
18
  ],
19
  "_format_kwargs": {},
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "bf71989dac1baa0c",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
 
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
17
+ "mowe_audio",
18
  "qwen_audio_chat"
19
  ],
20
  "_format_kwargs": {},
examples/ASR/Tedlium3-Longform-Test/dataset_info.json CHANGED
@@ -122,6 +122,24 @@
122
  "_type": "Value"
123
  }
124
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  "qwen_audio_chat": {
126
  "answer": {
127
  "dtype": "string",
 
122
  "_type": "Value"
123
  }
124
  },
125
+ "mowe_audio": {
126
+ "answer": {
127
+ "dtype": "string",
128
+ "_type": "Value"
129
+ },
130
+ "model_prediction": {
131
+ "dtype": "string",
132
+ "_type": "Value"
133
+ },
134
+ "task_type": {
135
+ "dtype": "string",
136
+ "_type": "Value"
137
+ },
138
+ "text": {
139
+ "dtype": "string",
140
+ "_type": "Value"
141
+ }
142
+ },
143
  "qwen_audio_chat": {
144
  "answer": {
145
  "dtype": "string",
examples/ASR/Tedlium3-Longform-Test/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "f89ae31db7413bf4",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
@@ -14,6 +14,7 @@
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
 
17
  "qwen_audio_chat"
18
  ],
19
  "_format_kwargs": {},
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "16ff45685ee4694b",
8
  "_format_columns": [
9
  "context",
10
  "instruction",
 
14
  "wavllm_fairseq",
15
  "Qwen2-Audio-7B-Instruct",
16
  "whisper_large_v3_with_llama_3_8b_instruct",
17
+ "mowe_audio",
18
  "qwen_audio_chat"
19
  ],
20
  "_format_kwargs": {},