federicocosta1989 commited on
Commit
6fccdc4
·
verified ·
1 Parent(s): 99577ba

Update whisper_cs.py (#40)

Browse files

- Update whisper_cs.py (ee5993709babb4400395b6bccd3d2cf4d3152d9e)

Files changed (1) hide show
  1. whisper_cs.py +19 -2
whisper_cs.py CHANGED
@@ -11,6 +11,7 @@ from faster_whisper import WhisperModel
11
  device = 0 if torch.cuda.is_available() else "cpu"
12
  torch_dtype = torch.float32
13
 
 
14
  MODEL_PATH_V2 = "langtech-veu/whisper-timestamped-cs"
15
  MODEL_PATH_V2_FAST = "langtech-veu/faster-whisper-timestamped-cs"
16
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
@@ -101,10 +102,18 @@ def post_merge_consecutive_segments_from_text(transcription_text: str) -> str:
101
  return merged_transcription.strip()
102
 
103
  def cleanup_temp_files(*file_paths):
 
 
 
 
 
104
  for path in file_paths:
105
  if path and os.path.exists(path):
 
106
  os.remove(path)
107
 
 
 
108
  '''
109
  try:
110
  faster_model = WhisperModel(
@@ -173,6 +182,9 @@ def transcribe_audio(model, audio_path: str) -> Dict:
173
 
174
  def generate(audio_path, use_v2_fast):
175
 
 
 
 
176
  if use_v2_fast:
177
  split_stereo_channels(audio_path)
178
  left_channel_path = "temp_mono_speaker2.wav"
@@ -206,12 +218,13 @@ def generate(audio_path, use_v2_fast):
206
 
207
  clean_output = ""
208
  for start, end, speaker, text in merged_transcript:
209
- clean_output += f"[{speaker}]: {text}\n"
210
- print('clean_output',clean_output)
211
 
212
  # FIX Seems that post_merge_consecutive_segments_from_text returns an empty string
213
  #clean_output = post_merge_consecutive_segments_from_text(clean_output)
214
  #print('clean_output',clean_output)
 
 
215
 
216
  else:
217
  model = load_whisper_model(MODEL_PATH_V2)
@@ -248,9 +261,13 @@ def generate(audio_path, use_v2_fast):
248
 
249
  clean_output = output.strip()
250
 
 
 
251
  cleanup_temp_files(
252
  "temp_mono_speaker1.wav",
253
  "temp_mono_speaker2.wav"
254
  )
255
 
 
 
256
  return clean_output
 
11
  device = 0 if torch.cuda.is_available() else "cpu"
12
  torch_dtype = torch.float32
13
 
14
+ DEBUG_MODE = True
15
  MODEL_PATH_V2 = "langtech-veu/whisper-timestamped-cs"
16
  MODEL_PATH_V2_FAST = "langtech-veu/faster-whisper-timestamped-cs"
17
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
102
  return merged_transcription.strip()
103
 
104
  def cleanup_temp_files(*file_paths):
105
+
106
+ if DEBUG_MODE: print(f"Entered cleanup_temp_files function...")
107
+
108
+ if DEBUG_MODE: print(f"file_paths: {file_paths}")
109
+
110
  for path in file_paths:
111
  if path and os.path.exists(path):
112
+ if DEBUG_MODE: print(f"Removing path: {path}")
113
  os.remove(path)
114
 
115
+ if DEBUG_MODE: print(f"Exited cleanup_temp_files function.")
116
+
117
  '''
118
  try:
119
  faster_model = WhisperModel(
 
182
 
183
  def generate(audio_path, use_v2_fast):
184
 
185
+ if DEBUG_MODE: print(f"Entering generate function...")
186
+ if DEBUG_MODE: print(f"use_v2_fast: {use_v2_fast}")
187
+
188
  if use_v2_fast:
189
  split_stereo_channels(audio_path)
190
  left_channel_path = "temp_mono_speaker2.wav"
 
218
 
219
  clean_output = ""
220
  for start, end, speaker, text in merged_transcript:
221
+ clean_output += f"[{speaker}]: {text}\n"
 
222
 
223
  # FIX Seems that post_merge_consecutive_segments_from_text returns an empty string
224
  #clean_output = post_merge_consecutive_segments_from_text(clean_output)
225
  #print('clean_output',clean_output)
226
+
227
+ if DEBUG_MODE: print(f"clean_output: {clean_output}")
228
 
229
  else:
230
  model = load_whisper_model(MODEL_PATH_V2)
 
261
 
262
  clean_output = output.strip()
263
 
264
+ if DEBUG_MODE: print(f"Clean output generated.")
265
+
266
  cleanup_temp_files(
267
  "temp_mono_speaker1.wav",
268
  "temp_mono_speaker2.wav"
269
  )
270
 
271
+ if DEBUG_MODE: print(f"Exiting generate function...")
272
+
273
  return clean_output