Spaces:
Runtime error
Runtime error
File size: 2,714 Bytes
e2d8d82 59e1d08 931df81 59e1d08 931df81 e698260 8f3ea36 931df81 59e1d08 e698260 59e1d08 cb85517 32e4ded e698260 59e1d08 931df81 59e1d08 3130060 e698260 3130060 32e4ded e698260 59e1d08 73cf1fe f8597f4 9002374 e698260 931df81 e698260 931df81 e698260 9002374 59e1d08 e698260 59e1d08 e698260 9002374 e698260 59e1d08 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
from faster_whisper import WhisperModel
from tool.json_tool import load_groups_json
from ui.ui_component import *
import torch
import gc
import tool.text_file_tool as text_file_tool
gc.collect()
torch.cuda.empty_cache()
model_lang_list = ['en', 'id', None]
model_size = ["tiny", "base", "small", "medium", "large-v2"]
def start_transcribe(lang_choice: int, model_size_choice: int, progress):
print(
f"Starting transcribing with model size {model_size[model_size_choice]} for language {model_lang_list[lang_choice]}")
model = WhisperModel(model_size[model_size_choice])
_, speaker_groups = load_groups_json()
subtitle_txt_list = []
transcribe_txt_list = []
simple_transcribe_txt_list = []
for speaker, _ in zip(speaker_groups, progress.tqdm(speaker_groups, desc=current_ui_lang["progress_transcribing_audio"])):
# Transcribe and save temp file
audiof = f"{speaker}.wav"
segments, _ = model.transcribe(
audio=audiof, language=model_lang_list[lang_choice], word_timestamps=True)
segments_list = list(segments)
speaker_txt_list = []
shift = speaker_groups[speaker][0] + 1
print(f"Starting point: {shift}s or {time_str_subtitle(shift)}")
name = str(speaker)[:10]
for segment in segments_list:
start = time_str_subtitle(segment.start + shift)
end = time_str_subtitle(segment.end + shift)
segment_txt = segment.text
speaker_txt_list.append(segment_txt)
subtitle = f"{len(subtitle_txt_list) + 1}\n{start} --> {end}\n[{name}] {segment_txt}\n\n"
subtitle_txt_list.append(subtitle)
speaker_txt = " ".join(speaker_txt_list)
transcribe_txt_list.append(
f"[{time_str(shift)}]\n[{name}] {speaker_txt}\n")
simple_transcribe_txt_list.append(f"{speaker_txt}\n")
# Write simple transcribe for summary later
text_file_tool.write_simple_transcribe_file(
simple_transcribe_txt_list)
# Write to base as main
text_file_tool.write_transcribe_subtitle_file(
transcribe_txt_list, subtitle_txt_list, False)
# Write to adjusted as fallout
text_file_tool.write_transcribe_subtitle_file(
transcribe_txt_list, subtitle_txt_list, True)
def time_str_subtitle(t):
return '{0:02d}:{1:02d}:{2:06.3f}'.format(round(t // 3600),
round(t % 3600 // 60),
t % 60)
def time_str(t):
return '{0:02d}:{1:02d}:{2:02d}'.format(round(t // 3600),
round(t % 3600 // 60),
round(t % 60))
|