terry-li-hm
commited on
Commit
·
458cf48
1
Parent(s):
b0de40b
Update
Browse files
sv.py
CHANGED
@@ -194,7 +194,7 @@ def format_time(seconds, use_short_format=True, always_use_seconds=False):
|
|
194 |
|
195 |
if always_use_seconds or (use_short_format and hours == 0 and minutes == 0):
|
196 |
return f"{seconds:06.3f}s"
|
197 |
-
elif use_short_format
|
198 |
return f"{minutes:02d}:{seconds:06.3f}"
|
199 |
else:
|
200 |
return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}"
|
@@ -275,7 +275,7 @@ def process_audio(audio_path, language="yue", fs=16000):
|
|
275 |
|
276 |
# Determine if the audio is less than one minute
|
277 |
total_duration = sum(duration for _, _, duration, _ in diarization_segments)
|
278 |
-
|
279 |
|
280 |
# Process the audio in chunks based on diarization segments
|
281 |
results = []
|
@@ -321,9 +321,19 @@ def process_audio(audio_path, language="yue", fs=16000):
|
|
321 |
# Format the results
|
322 |
formatted_text = ""
|
323 |
for speaker, start, end, duration, text in results:
|
324 |
-
start_str =
|
325 |
-
|
326 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
327 |
speaker_num = "1" if speaker == "SPEAKER_00" else "2"
|
328 |
line = f"{start_str} - {end_str} ({duration_str}) Speaker {speaker_num}: {text}"
|
329 |
formatted_text += line + "\n"
|
|
|
194 |
|
195 |
if always_use_seconds or (use_short_format and hours == 0 and minutes == 0):
|
196 |
return f"{seconds:06.3f}s"
|
197 |
+
elif use_short_format and hours == 0:
|
198 |
return f"{minutes:02d}:{seconds:06.3f}"
|
199 |
else:
|
200 |
return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}"
|
|
|
275 |
|
276 |
# Determine if the audio is less than one minute
|
277 |
total_duration = sum(duration for _, _, duration, _ in diarization_segments)
|
278 |
+
use_long_format = total_duration >= 60
|
279 |
|
280 |
# Process the audio in chunks based on diarization segments
|
281 |
results = []
|
|
|
321 |
# Format the results
|
322 |
formatted_text = ""
|
323 |
for speaker, start, end, duration, text in results:
|
324 |
+
start_str = (
|
325 |
+
format_time(start, use_short_format=False)
|
326 |
+
if use_long_format
|
327 |
+
else format_time(start, use_short_format=True)
|
328 |
+
)
|
329 |
+
end_str = (
|
330 |
+
format_time(end, use_short_format=False)
|
331 |
+
if use_long_format
|
332 |
+
else format_time(end, use_short_format=True)
|
333 |
+
)
|
334 |
+
duration_str = format_time(
|
335 |
+
duration, use_short_format=True
|
336 |
+
) # Always use short format for duration
|
337 |
speaker_num = "1" if speaker == "SPEAKER_00" else "2"
|
338 |
line = f"{start_str} - {end_str} ({duration_str}) Speaker {speaker_num}: {text}"
|
339 |
formatted_text += line + "\n"
|