Update ultravox_processing.py
Browse files- ultravox_processing.py +6 -3
ultravox_processing.py
CHANGED
@@ -163,11 +163,11 @@ class UltravoxProcessor(transformers.ProcessorMixin):
|
|
163 |
chunked_audio_values: List[torch.Tensor] = []
|
164 |
chunked_audio_lens: List[int] = []
|
165 |
is_continuation_list: List[bool] = []
|
166 |
-
|
167 |
context_size = self.audio_context_size or audio_values.shape[-1]
|
168 |
|
169 |
for i in range(audio_values.shape[0]): # iterate over the batch
|
170 |
-
|
171 |
for offset in range(0, audio_lens[i], context_size):
|
172 |
is_continuation = offset > 0
|
173 |
chunk = audio_values[i, :, offset : offset + context_size]
|
@@ -193,7 +193,10 @@ class UltravoxProcessor(transformers.ProcessorMixin):
|
|
193 |
is_continuation_list, dtype=torch.bool, device=audio_values.device
|
194 |
),
|
195 |
"audio_batch_size": torch.tensor(
|
196 |
-
|
|
|
|
|
|
|
197 |
),
|
198 |
}
|
199 |
|
|
|
163 |
chunked_audio_values: List[torch.Tensor] = []
|
164 |
chunked_audio_lens: List[int] = []
|
165 |
is_continuation_list: List[bool] = []
|
166 |
+
num_chunks: List[int] = []
|
167 |
context_size = self.audio_context_size or audio_values.shape[-1]
|
168 |
|
169 |
for i in range(audio_values.shape[0]): # iterate over the batch
|
170 |
+
num_chunks.append(int(np.ceil(audio_lens[i] / context_size)))
|
171 |
for offset in range(0, audio_lens[i], context_size):
|
172 |
is_continuation = offset > 0
|
173 |
chunk = audio_values[i, :, offset : offset + context_size]
|
|
|
193 |
is_continuation_list, dtype=torch.bool, device=audio_values.device
|
194 |
),
|
195 |
"audio_batch_size": torch.tensor(
|
196 |
+
[len(chunked_audio_values)], device=audio_values.device
|
197 |
+
),
|
198 |
+
"audio_num_chunks": torch.tensor(
|
199 |
+
num_chunks, dtype=torch.int64, device=audio_values.device
|
200 |
),
|
201 |
}
|
202 |
|