File size: 2,059 Bytes
e2d8d82
e698260
931df81
 
e698260
 
931df81
 
 
 
e698260
 
931df81
 
581b947
e698260
 
 
 
 
 
cb85517
32e4ded
e698260
 
a442a66
931df81
 
 
3130060
e698260
3130060
32e4ded
e698260
 
 
73cf1fe
f8597f4
e698260
 
 
 
 
931df81
e698260
 
931df81
e698260
 
 
 
581b947
e698260
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from faster_whisper import WhisperModel
from utils import load_groups_json
import torch
import gc
import file_name
import utils

gc.collect()
torch.cuda.empty_cache()

model_lang_list = ['en', 'id', None]
model_size = ["tiny", "base", "small", "medium", "large"]


def start_transcribe(input_file, lang_choice: int, model_size_choice: int, progress):

    print(
        f"Starting transcribing with model size {model_size[model_size_choice]} for language {model_lang_list[lang_choice]}")

    model = WhisperModel(
        model_size[model_size_choice], device="cuda", compute_type="int8_float16")
    _, speaker_groups = load_groups_json()

    subtitle_txt_list = []
    transcribe_txt_list = []
    for speaker, _ in zip(speaker_groups, progress.tqdm(speaker_groups, desc="Transcribing")):
        # Transcribe and save temp file
        audiof = f"{speaker}.wav"
        print(f"Loading {audiof}")
        segments, _ = model.transcribe(
            audio=audiof, language=model_lang_list[lang_choice], word_timestamps=True)
        segments_list = list(segments)

        speaker_txt_list = []
        shift = speaker_groups[speaker][0] + 1
        print(f"Current starting point: {shift}s or {time_str(shift)}")
        name = str(speaker)[:10]
        for segment in segments_list:
            start = time_str(segment.start + shift)
            end = time_str(segment.end + shift)

            segment_txt = segment.text
            speaker_txt_list.append(segment_txt)

            subtitle = f"{len(subtitle_txt_list) + 1}\n{start} --> {end}\n[{name}] {segment_txt}\n\n"
            subtitle_txt_list.append(subtitle)

        speaker_txt = " ".join(speaker_txt_list)
        transcribe_txt_list.append(f"[{name}] {speaker_txt}\n")

    utils.write_transcribe_subtitle_file(
        input_file, transcribe_txt_list, subtitle_txt_list, False)


def time_str(t):
    return '{0:02d}:{1:02d}:{2:06.3f}'.format(round(t // 3600),
                                              round(t % 3600 // 60),
                                              t % 60)