Spaces:
Running
on
Zero
Running
on
Zero
Update whisper.py (#10)
Browse files- Update whisper.py (34001bf3f30739e701e1b13b203137d300f43686)
Co-authored-by: Sarah Solito <[email protected]>
- whisper.py +3 -4
whisper.py
CHANGED
|
@@ -107,7 +107,9 @@ def transcribe_pipeline(audio, task):
|
|
| 107 |
return text
|
| 108 |
|
| 109 |
def generate(audio_path, use_v2):
|
| 110 |
-
|
|
|
|
|
|
|
| 111 |
if use_v2:
|
| 112 |
split_stereo_channels(audio_path)
|
| 113 |
|
|
@@ -126,7 +128,6 @@ def generate(audio_path, use_v2):
|
|
| 126 |
right_segs = [(seg["timestamp"][0], seg["timestamp"][1], "Speaker 2", post_process_transcription(seg["text"])) for seg in right_result["chunks"]]
|
| 127 |
|
| 128 |
merged_transcript = sorted(left_segs + right_segs, key=lambda x: x[0])
|
| 129 |
-
merged_text = " ".join([seg[3] for seg in merged_transcript])
|
| 130 |
|
| 131 |
output = ""
|
| 132 |
for start, end, speaker, text in merged_transcript:
|
|
@@ -134,14 +135,12 @@ def generate(audio_path, use_v2):
|
|
| 134 |
|
| 135 |
else:
|
| 136 |
audio = AudioSegment.from_wav(audio_path)
|
| 137 |
-
temp_mono_path = None
|
| 138 |
|
| 139 |
if audio.channels != 1: #stereo2mono
|
| 140 |
audio = audio.set_channels(1)
|
| 141 |
temp_mono_path = "temp_mono.wav"
|
| 142 |
audio.export(temp_mono_path, format="wav")
|
| 143 |
audio_path = temp_mono_path
|
| 144 |
-
task = "transcribe"
|
| 145 |
output = transcribe_pipeline(format_audio(audio_path), task)
|
| 146 |
|
| 147 |
clean_output = post_process_transcription(output, max_repeats=1) #check
|
|
|
|
| 107 |
return text
|
| 108 |
|
| 109 |
def generate(audio_path, use_v2):
|
| 110 |
+
task = "transcribe
|
| 111 |
+
temp_mono_path = None
|
| 112 |
+
|
| 113 |
if use_v2:
|
| 114 |
split_stereo_channels(audio_path)
|
| 115 |
|
|
|
|
| 128 |
right_segs = [(seg["timestamp"][0], seg["timestamp"][1], "Speaker 2", post_process_transcription(seg["text"])) for seg in right_result["chunks"]]
|
| 129 |
|
| 130 |
merged_transcript = sorted(left_segs + right_segs, key=lambda x: x[0])
|
|
|
|
| 131 |
|
| 132 |
output = ""
|
| 133 |
for start, end, speaker, text in merged_transcript:
|
|
|
|
| 135 |
|
| 136 |
else:
|
| 137 |
audio = AudioSegment.from_wav(audio_path)
|
|
|
|
| 138 |
|
| 139 |
if audio.channels != 1: #stereo2mono
|
| 140 |
audio = audio.set_channels(1)
|
| 141 |
temp_mono_path = "temp_mono.wav"
|
| 142 |
audio.export(temp_mono_path, format="wav")
|
| 143 |
audio_path = temp_mono_path
|
|
|
|
| 144 |
output = transcribe_pipeline(format_audio(audio_path), task)
|
| 145 |
|
| 146 |
clean_output = post_process_transcription(output, max_repeats=1) #check
|