Update modules/whisper/whisper_base.py
Browse files- modules/whisper/whisper_base.py +87 -87
modules/whisper/whisper_base.py
CHANGED
|
@@ -208,97 +208,97 @@ class WhisperBase(ABC):
|
|
| 208 |
result_file_path:
|
| 209 |
Output file path to return to gr.Files()
|
| 210 |
"""
|
| 211 |
-
try:
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
file_name, file_ext = os.path.splitext(os.path.basename(file))
|
| 250 |
-
subtitle, file_path = self.generate_and_write_file(
|
| 251 |
-
file_name=file_name,
|
| 252 |
-
transcribed_segments=transcribed_segments,
|
| 253 |
-
add_timestamp=add_timestamp,
|
| 254 |
-
file_format=file_format,
|
| 255 |
-
output_dir=self.output_dir
|
| 256 |
-
)
|
| 257 |
-
files_info[file_name] = {"subtitle": subtitle, "time_for_task": time_for_task, "path": file_path, "lang": file_language, "lang_prob": file_lang_probs, "input_source_file": (file_name+file_ext)}
|
| 258 |
-
|
| 259 |
-
## Add output file as txt
|
| 260 |
-
file_name, file_ext = os.path.splitext(os.path.basename(file))
|
| 261 |
-
subtitle, file_path = self.generate_and_write_file(
|
| 262 |
-
file_name=file_name,
|
| 263 |
-
transcribed_segments=transcribed_segments,
|
| 264 |
-
add_timestamp=add_timestamp,
|
| 265 |
-
file_format="txt",
|
| 266 |
-
output_dir=self.output_dir
|
| 267 |
-
)
|
| 268 |
-
files_to_download[file_name+"_txt"] = {"path": file_path}
|
| 269 |
-
|
| 270 |
-
## Add output file as srt
|
| 271 |
-
file_name, file_ext = os.path.splitext(os.path.basename(file))
|
| 272 |
-
subtitle, file_path = self.generate_and_write_file(
|
| 273 |
-
file_name=file_name,
|
| 274 |
-
transcribed_segments=transcribed_segments,
|
| 275 |
-
add_timestamp=add_timestamp,
|
| 276 |
-
file_format="srt",
|
| 277 |
-
output_dir=self.output_dir
|
| 278 |
-
)
|
| 279 |
-
files_to_download[file_name+"_srt"] = {"path": file_path}
|
| 280 |
-
|
| 281 |
-
total_result = ''
|
| 282 |
-
total_info = ''
|
| 283 |
-
total_time = 0
|
| 284 |
-
for file_name, info in files_info.items():
|
| 285 |
-
total_result += f'{info["subtitle"]}'
|
| 286 |
-
total_time += info["time_for_task"]
|
| 287 |
-
total_info += f'Input file: {info["input_source_file"]}\nLanguage prediction: {info["lang"]} with probability {info["lang_prob"]}\n'
|
| 288 |
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 292 |
|
| 293 |
-
|
| 294 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 295 |
|
| 296 |
-
|
| 297 |
|
| 298 |
-
except Exception as e:
|
| 299 |
-
|
| 300 |
-
finally:
|
| 301 |
-
|
| 302 |
|
| 303 |
def transcribe_mic(self,
|
| 304 |
mic_audio: str,
|
|
|
|
| 208 |
result_file_path:
|
| 209 |
Output file path to return to gr.Files()
|
| 210 |
"""
|
| 211 |
+
#try:
|
| 212 |
+
if input_folder_path:
|
| 213 |
+
files = get_media_files(input_folder_path)
|
| 214 |
+
if isinstance(files, str):
|
| 215 |
+
files = [files]
|
| 216 |
+
if files and isinstance(files[0], gr.utils.NamedString):
|
| 217 |
+
files = [file.name for file in files]
|
| 218 |
+
|
| 219 |
+
## Load model to detect language
|
| 220 |
+
model = whisper.load_model("base")
|
| 221 |
+
|
| 222 |
+
files_info = {}
|
| 223 |
+
files_to_download = {}
|
| 224 |
+
time_start = datetime.now()
|
| 225 |
+
|
| 226 |
+
for file in files:
|
| 227 |
+
|
| 228 |
+
## Detect language
|
| 229 |
+
#params = WhisperParameters.as_value(*whisper_params)
|
| 230 |
+
#model = whisper.load_model(params.model_size)
|
| 231 |
+
mel = whisper.log_mel_spectrogram(whisper.pad_or_trim(whisper.load_audio(file))).to(model.device)
|
| 232 |
+
_, probs = model.detect_language(mel)
|
| 233 |
+
file_language = ""
|
| 234 |
+
file_lang_probs = ""
|
| 235 |
+
for key,value in whisper.tokenizer.LANGUAGES.items():
|
| 236 |
+
if key == str(max(probs, key=probs.get)):
|
| 237 |
+
file_language = value.capitalize()
|
| 238 |
+
max_k, max_v = max(probs.items(), key=lambda x: x[1])
|
| 239 |
+
file_lang_probs = str(round(max_v,2))
|
| 240 |
+
break
|
| 241 |
+
|
| 242 |
+
transcribed_segments, time_for_task = self.run(
|
| 243 |
+
file,
|
| 244 |
+
progress,
|
| 245 |
+
add_timestamp,
|
| 246 |
+
*whisper_params,
|
| 247 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
|
| 249 |
+
file_name, file_ext = os.path.splitext(os.path.basename(file))
|
| 250 |
+
subtitle, file_path = self.generate_and_write_file(
|
| 251 |
+
file_name=file_name,
|
| 252 |
+
transcribed_segments=transcribed_segments,
|
| 253 |
+
add_timestamp=add_timestamp,
|
| 254 |
+
file_format=file_format,
|
| 255 |
+
output_dir=self.output_dir
|
| 256 |
+
)
|
| 257 |
+
files_info[file_name] = {"subtitle": subtitle, "time_for_task": time_for_task, "path": file_path, "lang": file_language, "lang_prob": file_lang_probs, "input_source_file": (file_name+file_ext)}
|
| 258 |
+
|
| 259 |
+
## Add output file as txt
|
| 260 |
+
file_name, file_ext = os.path.splitext(os.path.basename(file))
|
| 261 |
+
subtitle, file_path = self.generate_and_write_file(
|
| 262 |
+
file_name=file_name,
|
| 263 |
+
transcribed_segments=transcribed_segments,
|
| 264 |
+
add_timestamp=add_timestamp,
|
| 265 |
+
file_format="txt",
|
| 266 |
+
output_dir=self.output_dir
|
| 267 |
+
)
|
| 268 |
+
files_to_download[file_name+"_txt"] = {"path": file_path}
|
| 269 |
|
| 270 |
+
## Add output file as srt
|
| 271 |
+
file_name, file_ext = os.path.splitext(os.path.basename(file))
|
| 272 |
+
subtitle, file_path = self.generate_and_write_file(
|
| 273 |
+
file_name=file_name,
|
| 274 |
+
transcribed_segments=transcribed_segments,
|
| 275 |
+
add_timestamp=add_timestamp,
|
| 276 |
+
file_format="srt",
|
| 277 |
+
output_dir=self.output_dir
|
| 278 |
+
)
|
| 279 |
+
files_to_download[file_name+"_srt"] = {"path": file_path}
|
| 280 |
+
|
| 281 |
+
total_result = ''
|
| 282 |
+
total_info = ''
|
| 283 |
+
total_time = 0
|
| 284 |
+
for file_name, info in files_info.items():
|
| 285 |
+
total_result += f'{info["subtitle"]}'
|
| 286 |
+
total_time += info["time_for_task"]
|
| 287 |
+
total_info += f'Input file: {info["input_source_file"]}\nLanguage prediction: {info["lang"]} with probability {info["lang_prob"]}\n'
|
| 288 |
+
|
| 289 |
+
#total_info += f"\nTranscription duration: {self.format_time(total_time)}"
|
| 290 |
+
time_end = datetime.now()
|
| 291 |
+
total_info += f"\nTranscription duration: {self.format_time((time_end-time_start).total_seconds())}"
|
| 292 |
+
|
| 293 |
+
result_str = total_result
|
| 294 |
+
result_file_path = [info['path'] for info in files_to_download.values()]
|
| 295 |
|
| 296 |
+
return [result_str,result_file_path,total_info]
|
| 297 |
|
| 298 |
+
#except Exception as e:
|
| 299 |
+
# print(f"Error transcribing file: {e}")
|
| 300 |
+
#finally:
|
| 301 |
+
# self.release_cuda_memory()
|
| 302 |
|
| 303 |
def transcribe_mic(self,
|
| 304 |
mic_audio: str,
|