Spaces:
Running
on
Zero
Running
on
Zero
Update whisper_cs.py (#40)
Browse files- Update whisper_cs.py (ee5993709babb4400395b6bccd3d2cf4d3152d9e)
- whisper_cs.py +19 -2
whisper_cs.py
CHANGED
@@ -11,6 +11,7 @@ from faster_whisper import WhisperModel
|
|
11 |
device = 0 if torch.cuda.is_available() else "cpu"
|
12 |
torch_dtype = torch.float32
|
13 |
|
|
|
14 |
MODEL_PATH_V2 = "langtech-veu/whisper-timestamped-cs"
|
15 |
MODEL_PATH_V2_FAST = "langtech-veu/faster-whisper-timestamped-cs"
|
16 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -101,10 +102,18 @@ def post_merge_consecutive_segments_from_text(transcription_text: str) -> str:
|
|
101 |
return merged_transcription.strip()
|
102 |
|
103 |
def cleanup_temp_files(*file_paths):
|
|
|
|
|
|
|
|
|
|
|
104 |
for path in file_paths:
|
105 |
if path and os.path.exists(path):
|
|
|
106 |
os.remove(path)
|
107 |
|
|
|
|
|
108 |
'''
|
109 |
try:
|
110 |
faster_model = WhisperModel(
|
@@ -173,6 +182,9 @@ def transcribe_audio(model, audio_path: str) -> Dict:
|
|
173 |
|
174 |
def generate(audio_path, use_v2_fast):
|
175 |
|
|
|
|
|
|
|
176 |
if use_v2_fast:
|
177 |
split_stereo_channels(audio_path)
|
178 |
left_channel_path = "temp_mono_speaker2.wav"
|
@@ -206,12 +218,13 @@ def generate(audio_path, use_v2_fast):
|
|
206 |
|
207 |
clean_output = ""
|
208 |
for start, end, speaker, text in merged_transcript:
|
209 |
-
clean_output += f"[{speaker}]: {text}\n"
|
210 |
-
print('clean_output',clean_output)
|
211 |
|
212 |
# FIX Seems that post_merge_consecutive_segments_from_text returns an empty string
|
213 |
#clean_output = post_merge_consecutive_segments_from_text(clean_output)
|
214 |
#print('clean_output',clean_output)
|
|
|
|
|
215 |
|
216 |
else:
|
217 |
model = load_whisper_model(MODEL_PATH_V2)
|
@@ -248,9 +261,13 @@ def generate(audio_path, use_v2_fast):
|
|
248 |
|
249 |
clean_output = output.strip()
|
250 |
|
|
|
|
|
251 |
cleanup_temp_files(
|
252 |
"temp_mono_speaker1.wav",
|
253 |
"temp_mono_speaker2.wav"
|
254 |
)
|
255 |
|
|
|
|
|
256 |
return clean_output
|
|
|
11 |
device = 0 if torch.cuda.is_available() else "cpu"
|
12 |
torch_dtype = torch.float32
|
13 |
|
14 |
+
DEBUG_MODE = True
|
15 |
MODEL_PATH_V2 = "langtech-veu/whisper-timestamped-cs"
|
16 |
MODEL_PATH_V2_FAST = "langtech-veu/faster-whisper-timestamped-cs"
|
17 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
102 |
return merged_transcription.strip()
|
103 |
|
104 |
def cleanup_temp_files(*file_paths):
|
105 |
+
|
106 |
+
if DEBUG_MODE: print(f"Entered cleanup_temp_files function...")
|
107 |
+
|
108 |
+
if DEBUG_MODE: print(f"file_paths: {file_paths}")
|
109 |
+
|
110 |
for path in file_paths:
|
111 |
if path and os.path.exists(path):
|
112 |
+
if DEBUG_MODE: print(f"Removing path: {path}")
|
113 |
os.remove(path)
|
114 |
|
115 |
+
if DEBUG_MODE: print(f"Exited cleanup_temp_files function.")
|
116 |
+
|
117 |
'''
|
118 |
try:
|
119 |
faster_model = WhisperModel(
|
|
|
182 |
|
183 |
def generate(audio_path, use_v2_fast):
|
184 |
|
185 |
+
if DEBUG_MODE: print(f"Entering generate function...")
|
186 |
+
if DEBUG_MODE: print(f"use_v2_fast: {use_v2_fast}")
|
187 |
+
|
188 |
if use_v2_fast:
|
189 |
split_stereo_channels(audio_path)
|
190 |
left_channel_path = "temp_mono_speaker2.wav"
|
|
|
218 |
|
219 |
clean_output = ""
|
220 |
for start, end, speaker, text in merged_transcript:
|
221 |
+
clean_output += f"[{speaker}]: {text}\n"
|
|
|
222 |
|
223 |
# FIX Seems that post_merge_consecutive_segments_from_text returns an empty string
|
224 |
#clean_output = post_merge_consecutive_segments_from_text(clean_output)
|
225 |
#print('clean_output',clean_output)
|
226 |
+
|
227 |
+
if DEBUG_MODE: print(f"clean_output: {clean_output}")
|
228 |
|
229 |
else:
|
230 |
model = load_whisper_model(MODEL_PATH_V2)
|
|
|
261 |
|
262 |
clean_output = output.strip()
|
263 |
|
264 |
+
if DEBUG_MODE: print(f"Clean output generated.")
|
265 |
+
|
266 |
cleanup_temp_files(
|
267 |
"temp_mono_speaker1.wav",
|
268 |
"temp_mono_speaker2.wav"
|
269 |
)
|
270 |
|
271 |
+
if DEBUG_MODE: print(f"Exiting generate function...")
|
272 |
+
|
273 |
return clean_output
|