LAP-DEV commited on
Commit
af50baa
·
verified ·
1 Parent(s): fe7eaa5

Update modules/whisper/whisper_base.py

Browse files
Files changed (1) hide show
  1. modules/whisper/whisper_base.py +88 -88
modules/whisper/whisper_base.py CHANGED
@@ -208,97 +208,97 @@ class WhisperBase(ABC):
208
  result_file_path:
209
  Output file path to return to gr.Files()
210
  """
211
- #try:
212
- if input_folder_path:
213
- files = get_media_files(input_folder_path)
214
- if isinstance(files, str):
215
- files = [files]
216
- if files and isinstance(files[0], gr.utils.NamedString):
217
- files = [file.name for file in files]
218
-
219
- ## Load model to detect language
220
- model = whisper.load_model("base")
221
-
222
- files_info = {}
223
- files_to_download = {}
224
- time_start = datetime.now()
225
-
226
- for file in files:
227
-
228
- ## Detect language
229
- #params = WhisperParameters.as_value(*whisper_params)
230
- #model = whisper.load_model(params.model_size)
231
- mel = whisper.log_mel_spectrogram(whisper.pad_or_trim(whisper.load_audio(file))).to(model.device)
232
- _, probs = model.detect_language(mel)
233
- file_language = ""
234
- file_lang_probs = ""
235
- for key,value in whisper.tokenizer.LANGUAGES.items():
236
- if key == str(max(probs, key=probs.get)):
237
- file_language = value.capitalize()
238
- max_k, max_v = max(probs.items(), key=lambda x: x[1])
239
- file_lang_probs = str(round(max_v,2))
240
- break
241
-
242
- transcribed_segments, time_for_task = self.run(
243
- file,
244
- progress,
245
- add_timestamp,
246
- *whisper_params,
247
- )
248
 
249
- file_name, file_ext = os.path.splitext(os.path.basename(file))
250
- subtitle, file_path = self.generate_and_write_file(
251
- file_name=file_name,
252
- transcribed_segments=transcribed_segments,
253
- add_timestamp=add_timestamp,
254
- file_format=file_format,
255
- output_dir=self.output_dir
256
- )
257
- files_info[file_name] = {"subtitle": subtitle, "time_for_task": time_for_task, "path": file_path, "lang": file_language, "lang_prob": file_lang_probs, "input_source_file": (file_name+file_ext)}
258
-
259
- ## Add output file as txt
260
- file_name, file_ext = os.path.splitext(os.path.basename(file))
261
- subtitle, file_path = self.generate_and_write_file(
262
- file_name=file_name,
263
- transcribed_segments=transcribed_segments,
264
- add_timestamp=add_timestamp,
265
- file_format="txt",
266
- output_dir=self.output_dir
267
- )
268
- files_to_download[file_name+"_txt"] = {"path": file_path}
269
 
270
- ## Add output file as srt
271
- file_name, file_ext = os.path.splitext(os.path.basename(file))
272
- subtitle, file_path = self.generate_and_write_file(
273
- file_name=file_name,
274
- transcribed_segments=transcribed_segments,
275
- add_timestamp=add_timestamp,
276
- file_format="srt",
277
- output_dir=self.output_dir
278
- )
279
- files_to_download[file_name+"_srt"] = {"path": file_path}
280
-
281
- total_result = ''
282
- total_info = ''
283
- total_time = 0
284
- for file_name, info in files_info.items():
285
- total_result += f'{info["subtitle"]}'
286
- total_time += info["time_for_task"]
287
- total_info += f'Input file: {info["input_source_file"]}\nLanguage prediction: {info["lang"]} with probability {info["lang_prob"]}\n'
288
-
289
- #total_info += f"\nTranscription duration: {self.format_time(total_time)}"
290
- time_end = datetime.now()
291
- total_info += f"\nTranscription duration: {self.format_time((time_end-time_start).total_seconds())}"
292
-
293
- result_str = total_result
294
- result_file_path = [info['path'] for info in files_to_download.values()]
295
-
296
- return [result_str,result_file_path,total_info]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
 
298
- #except Exception as e:
299
- # print(f"Error transcribing file: {e}")
300
- #finally:
301
- # self.release_cuda_memory()
302
 
303
  def transcribe_mic(self,
304
  mic_audio: str,
 
208
  result_file_path:
209
  Output file path to return to gr.Files()
210
  """
211
+ try:
212
+ if input_folder_path:
213
+ files = get_media_files(input_folder_path)
214
+ if isinstance(files, str):
215
+ files = [files]
216
+ if files and isinstance(files[0], gr.utils.NamedString):
217
+ files = [file.name for file in files]
218
+
219
+ ## Load model to detect language
220
+ model = whisper.load_model("base")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
 
222
+ files_info = {}
223
+ files_to_download = {}
224
+ time_start = datetime.now()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
+ for file in files:
227
+
228
+ ## Detect language
229
+ #params = WhisperParameters.as_value(*whisper_params)
230
+ #model = whisper.load_model(params.model_size)
231
+ mel = whisper.log_mel_spectrogram(whisper.pad_or_trim(whisper.load_audio(file))).to(model.device)
232
+ _, probs = model.detect_language(mel)
233
+ file_language = ""
234
+ file_lang_probs = ""
235
+ for key,value in whisper.tokenizer.LANGUAGES.items():
236
+ if key == str(max(probs, key=probs.get)):
237
+ file_language = value.capitalize()
238
+ max_k, max_v = max(probs.items(), key=lambda x: x[1])
239
+ file_lang_probs = str(round(max_v,2))
240
+ break
241
+
242
+ transcribed_segments, time_for_task = self.run(
243
+ file,
244
+ progress,
245
+ add_timestamp,
246
+ *whisper_params,
247
+ )
248
+
249
+ file_name, file_ext = os.path.splitext(os.path.basename(file))
250
+ subtitle, file_path = self.generate_and_write_file(
251
+ file_name=file_name,
252
+ transcribed_segments=transcribed_segments,
253
+ add_timestamp=add_timestamp,
254
+ file_format=file_format,
255
+ output_dir=self.output_dir
256
+ )
257
+ files_info[file_name] = {"subtitle": subtitle, "time_for_task": time_for_task, "path": file_path, "lang": file_language, "lang_prob": file_lang_probs, "input_source_file": (file_name+file_ext)}
258
+
259
+ ## Add output file as txt
260
+ file_name, file_ext = os.path.splitext(os.path.basename(file))
261
+ subtitle, file_path = self.generate_and_write_file(
262
+ file_name=file_name,
263
+ transcribed_segments=transcribed_segments,
264
+ add_timestamp=add_timestamp,
265
+ file_format="txt",
266
+ output_dir=self.output_dir
267
+ )
268
+ files_to_download[file_name+"_txt"] = {"path": file_path}
269
+
270
+ ## Add output file as srt
271
+ file_name, file_ext = os.path.splitext(os.path.basename(file))
272
+ subtitle, file_path = self.generate_and_write_file(
273
+ file_name=file_name,
274
+ transcribed_segments=transcribed_segments,
275
+ add_timestamp=add_timestamp,
276
+ file_format="srt",
277
+ output_dir=self.output_dir
278
+ )
279
+ files_to_download[file_name+"_srt"] = {"path": file_path}
280
+
281
+ total_result = ''
282
+ total_info = ''
283
+ total_time = 0
284
+ for file_name, info in files_info.items():
285
+ total_result += f'{info["subtitle"]}'
286
+ total_time += info["time_for_task"]
287
+ total_info += f'Input file: {info["input_source_file"]}\nLanguage prediction: {info["lang"]} with probability {info["lang_prob"]}\n'
288
+
289
+ #total_info += f"\nTranscription duration: {self.format_time(total_time)}"
290
+ time_end = datetime.now()
291
+ total_info += f"\nTranscription duration: {self.format_time((time_end-time_start).total_seconds())}"
292
+
293
+ result_str = total_result
294
+ result_file_path = [info['path'] for info in files_to_download.values()]
295
+
296
+ return [result_str,result_file_path,total_info]
297
 
298
+ except Exception as e:
299
+ print(f"Error transcribing file: {e}")
300
+ finally:
301
+ self.release_cuda_memory()
302
 
303
  def transcribe_mic(self,
304
  mic_audio: str,