whisper_transcribe / set_up.py
chompionsawelo's picture
big fixes
9002374
raw
history blame
2.33 kB
from ui import *
from file_name import *
from diarization import start_diarization
from transcribe import start_transcribe
from video_tool import convert_video_to_audio, add_subtitle_to_video
import gradio as gr
import re
import os
import utils
def prepare_input(input_file, start_time, end_time, lang, model_size, progress=gr.Progress()):
gr.Info(ui_lang["progress_starting_process"])
if input_file is None or not os.path.exists(input_file):
gr.Warning(ui_lang["input_video_warning"])
return [None, None, [None, None]]
if validate_time_format(start_time) is False:
gr.Warning(ui_lang["start_time_warning"])
return [None, None, [None, None]]
if validate_time_format(end_time) is False:
gr.Warning(ui_lang["end_time_warning"])
return [None, None, [None, None]]
# Check if start time is lower than end time
if lang is None:
gr.Warning(ui_lang["lang_radio_warning"])
return [None, None, [None, None]]
if model_size is None:
gr.Warning(ui_lang["model_dropdown_warning"])
return [None, None, [None, None]]
print(f"SOURCE: {input_file}")
print(f"AUDIO FILE: {audio_file}")
# Convert video to audio
progress(0.2, desc=ui_lang["progress_preparing_video"])
convert_video_to_audio(
input_file, audio_file, start_time, end_time)
# Start diarization
progress(0.4, desc=ui_lang["progress_acquiring_diarization"])
start_diarization(audio_file)
# Start transcribing
progress(0.6, desc=ui_lang["progress_transcribing_audio"])
start_transcribe(input_file, lang, model_size, progress)
# Add subtitle to video
progress(0.8, desc=ui_lang["progress_add_subtitle"])
add_subtitle_to_video(input_file, base_subtitle_file,
video_subtitle_file, start_time, end_time)
# Return video file link, transcribe string, transcribe.txt, subtitle.txt
transcribe_txt_list, _ = utils.read_transcribe_subtitle_file(
input_file, False)
transcribe_txt = "\n".join(transcribe_txt_list)
return [
video_subtitle_file,
transcribe_txt,
[transcribe_file, subtitle_file]
]
def validate_time_format(input_string):
pattern = re.compile(r'^\d{2}:\d{2}:\d{2}$')
return pattern.match(input_string) is not None