Spaces:
Runtime error
Runtime error
File size: 2,169 Bytes
e2d8d82 e698260 931df81 bef4887 e698260 931df81 e698260 8f3ea36 931df81 581b947 e698260 cb85517 32e4ded e698260 bef4887 931df81 3130060 e698260 3130060 32e4ded e698260 9002374 73cf1fe f8597f4 9002374 e698260 931df81 e698260 931df81 e698260 9002374 e698260 581b947 e698260 9002374 e698260 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
from faster_whisper import WhisperModel
from utils import load_groups_json
import torch
import gc
from ui import *
import utils
gc.collect()
torch.cuda.empty_cache()
model_lang_list = ['en', 'id', None]
model_size = ["tiny", "base", "small", "medium", "large-v2"]
def start_transcribe(input_file, lang_choice: int, model_size_choice: int, progress):
print(
f"Starting transcribing with model size {model_size[model_size_choice]} for language {model_lang_list[lang_choice]}")
model = WhisperModel(
model_size[model_size_choice], device="cuda", compute_type="int8_float16")
_, speaker_groups = load_groups_json()
subtitle_txt_list = []
transcribe_txt_list = []
for speaker, _ in zip(speaker_groups, progress.tqdm(speaker_groups, desc=ui_lang["progress_transcribing_audio"])):
# Transcribe and save temp file
audiof = f"{speaker}.wav"
print(f"Loading {audiof}")
segments, _ = model.transcribe(
audio=audiof, language=model_lang_list[lang_choice], word_timestamps=True)
segments_list = list(segments)
speaker_txt_list = []
shift = speaker_groups[speaker][0] + 1
print(
f"Current starting point: {shift}s or {time_str_subtitle(shift)}")
name = str(speaker)[:10]
for segment in segments_list:
start = time_str_subtitle(segment.start + shift)
end = time_str_subtitle(segment.end + shift)
segment_txt = segment.text
speaker_txt_list.append(segment_txt)
subtitle = f"{len(subtitle_txt_list) + 1}\n{start} --> {end}\n[{name}] {segment_txt}\n\n"
subtitle_txt_list.append(subtitle)
speaker_txt = " ".join(speaker_txt_list)
transcribe_txt_list.append(
f"[{time_str(shift)}]\n[{name}] {speaker_txt}\n")
utils.write_transcribe_subtitle_file(
input_file, transcribe_txt_list, subtitle_txt_list, False)
def time_str_subtitle(t):
return '{0:02d}:{1:02d}:{2:06.3f}'.format(round(t // 3600),
round(t % 3600 // 60),
t % 60)
|