Update modules/whisper/whisper_base.py
Browse files- modules/whisper/whisper_base.py +87 -87
modules/whisper/whisper_base.py
CHANGED
@@ -208,97 +208,97 @@ class WhisperBase(ABC):
|
|
208 |
result_file_path:
|
209 |
Output file path to return to gr.Files()
|
210 |
"""
|
211 |
-
try:
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
file_name, file_ext = os.path.splitext(os.path.basename(file))
|
250 |
-
subtitle, file_path = self.generate_and_write_file(
|
251 |
-
file_name=file_name,
|
252 |
-
transcribed_segments=transcribed_segments,
|
253 |
-
add_timestamp=add_timestamp,
|
254 |
-
file_format=file_format,
|
255 |
-
output_dir=self.output_dir
|
256 |
-
)
|
257 |
-
files_info[file_name] = {"subtitle": subtitle, "time_for_task": time_for_task, "path": file_path, "lang": file_language, "lang_prob": file_lang_probs, "input_source_file": (file_name+file_ext)}
|
258 |
-
|
259 |
-
## Add output file as txt
|
260 |
-
file_name, file_ext = os.path.splitext(os.path.basename(file))
|
261 |
-
subtitle, file_path = self.generate_and_write_file(
|
262 |
-
file_name=file_name,
|
263 |
-
transcribed_segments=transcribed_segments,
|
264 |
-
add_timestamp=add_timestamp,
|
265 |
-
file_format="txt",
|
266 |
-
output_dir=self.output_dir
|
267 |
-
)
|
268 |
-
files_to_download[file_name+"_txt"] = {"path": file_path}
|
269 |
-
|
270 |
-
## Add output file as srt
|
271 |
-
file_name, file_ext = os.path.splitext(os.path.basename(file))
|
272 |
-
subtitle, file_path = self.generate_and_write_file(
|
273 |
-
file_name=file_name,
|
274 |
-
transcribed_segments=transcribed_segments,
|
275 |
-
add_timestamp=add_timestamp,
|
276 |
-
file_format="srt",
|
277 |
-
output_dir=self.output_dir
|
278 |
-
)
|
279 |
-
files_to_download[file_name+"_srt"] = {"path": file_path}
|
280 |
-
|
281 |
-
total_result = ''
|
282 |
-
total_info = ''
|
283 |
-
total_time = 0
|
284 |
-
for file_name, info in files_info.items():
|
285 |
-
total_result += f'{info["subtitle"]}'
|
286 |
-
total_time += info["time_for_task"]
|
287 |
-
total_info += f'Input file: {info["input_source_file"]}\nLanguage prediction: {info["lang"]} with probability {info["lang_prob"]}\n'
|
288 |
|
289 |
-
|
290 |
-
|
291 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
|
293 |
-
|
294 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
295 |
|
296 |
-
|
297 |
|
298 |
-
except Exception as e:
|
299 |
-
|
300 |
-
finally:
|
301 |
-
|
302 |
|
303 |
def transcribe_mic(self,
|
304 |
mic_audio: str,
|
|
|
208 |
result_file_path:
|
209 |
Output file path to return to gr.Files()
|
210 |
"""
|
211 |
+
#try:
|
212 |
+
if input_folder_path:
|
213 |
+
files = get_media_files(input_folder_path)
|
214 |
+
if isinstance(files, str):
|
215 |
+
files = [files]
|
216 |
+
if files and isinstance(files[0], gr.utils.NamedString):
|
217 |
+
files = [file.name for file in files]
|
218 |
+
|
219 |
+
## Load model to detect language
|
220 |
+
model = whisper.load_model("base")
|
221 |
+
|
222 |
+
files_info = {}
|
223 |
+
files_to_download = {}
|
224 |
+
time_start = datetime.now()
|
225 |
+
|
226 |
+
for file in files:
|
227 |
+
|
228 |
+
## Detect language
|
229 |
+
#params = WhisperParameters.as_value(*whisper_params)
|
230 |
+
#model = whisper.load_model(params.model_size)
|
231 |
+
mel = whisper.log_mel_spectrogram(whisper.pad_or_trim(whisper.load_audio(file))).to(model.device)
|
232 |
+
_, probs = model.detect_language(mel)
|
233 |
+
file_language = ""
|
234 |
+
file_lang_probs = ""
|
235 |
+
for key,value in whisper.tokenizer.LANGUAGES.items():
|
236 |
+
if key == str(max(probs, key=probs.get)):
|
237 |
+
file_language = value.capitalize()
|
238 |
+
max_k, max_v = max(probs.items(), key=lambda x: x[1])
|
239 |
+
file_lang_probs = str(round(max_v,2))
|
240 |
+
break
|
241 |
+
|
242 |
+
transcribed_segments, time_for_task = self.run(
|
243 |
+
file,
|
244 |
+
progress,
|
245 |
+
add_timestamp,
|
246 |
+
*whisper_params,
|
247 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
248 |
|
249 |
+
file_name, file_ext = os.path.splitext(os.path.basename(file))
|
250 |
+
subtitle, file_path = self.generate_and_write_file(
|
251 |
+
file_name=file_name,
|
252 |
+
transcribed_segments=transcribed_segments,
|
253 |
+
add_timestamp=add_timestamp,
|
254 |
+
file_format=file_format,
|
255 |
+
output_dir=self.output_dir
|
256 |
+
)
|
257 |
+
files_info[file_name] = {"subtitle": subtitle, "time_for_task": time_for_task, "path": file_path, "lang": file_language, "lang_prob": file_lang_probs, "input_source_file": (file_name+file_ext)}
|
258 |
+
|
259 |
+
## Add output file as txt
|
260 |
+
file_name, file_ext = os.path.splitext(os.path.basename(file))
|
261 |
+
subtitle, file_path = self.generate_and_write_file(
|
262 |
+
file_name=file_name,
|
263 |
+
transcribed_segments=transcribed_segments,
|
264 |
+
add_timestamp=add_timestamp,
|
265 |
+
file_format="txt",
|
266 |
+
output_dir=self.output_dir
|
267 |
+
)
|
268 |
+
files_to_download[file_name+"_txt"] = {"path": file_path}
|
269 |
|
270 |
+
## Add output file as srt
|
271 |
+
file_name, file_ext = os.path.splitext(os.path.basename(file))
|
272 |
+
subtitle, file_path = self.generate_and_write_file(
|
273 |
+
file_name=file_name,
|
274 |
+
transcribed_segments=transcribed_segments,
|
275 |
+
add_timestamp=add_timestamp,
|
276 |
+
file_format="srt",
|
277 |
+
output_dir=self.output_dir
|
278 |
+
)
|
279 |
+
files_to_download[file_name+"_srt"] = {"path": file_path}
|
280 |
+
|
281 |
+
total_result = ''
|
282 |
+
total_info = ''
|
283 |
+
total_time = 0
|
284 |
+
for file_name, info in files_info.items():
|
285 |
+
total_result += f'{info["subtitle"]}'
|
286 |
+
total_time += info["time_for_task"]
|
287 |
+
total_info += f'Input file: {info["input_source_file"]}\nLanguage prediction: {info["lang"]} with probability {info["lang_prob"]}\n'
|
288 |
+
|
289 |
+
#total_info += f"\nTranscription duration: {self.format_time(total_time)}"
|
290 |
+
time_end = datetime.now()
|
291 |
+
total_info += f"\nTranscription duration: {self.format_time((time_end-time_start).total_seconds())}"
|
292 |
+
|
293 |
+
result_str = total_result
|
294 |
+
result_file_path = [info['path'] for info in files_to_download.values()]
|
295 |
|
296 |
+
return [result_str,result_file_path,total_info]
|
297 |
|
298 |
+
#except Exception as e:
|
299 |
+
# print(f"Error transcribing file: {e}")
|
300 |
+
#finally:
|
301 |
+
# self.release_cuda_memory()
|
302 |
|
303 |
def transcribe_mic(self,
|
304 |
mic_audio: str,
|