File size: 2,714 Bytes
e2d8d82
59e1d08
 
931df81
 
59e1d08
931df81
 
 
 
e698260
8f3ea36
931df81
 
59e1d08
e698260
 
 
59e1d08
cb85517
32e4ded
e698260
 
59e1d08
 
931df81
 
59e1d08
3130060
e698260
3130060
32e4ded
e698260
 
59e1d08
 
73cf1fe
f8597f4
9002374
 
e698260
 
 
931df81
e698260
 
931df81
e698260
9002374
 
59e1d08
e698260
59e1d08
 
 
 
 
 
 
 
 
e698260
 
9002374
e698260
 
 
59e1d08
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from faster_whisper import WhisperModel
from tool.json_tool import load_groups_json
from ui.ui_component import *
import torch
import gc
import tool.text_file_tool as text_file_tool

gc.collect()
torch.cuda.empty_cache()

model_lang_list = ['en', 'id', None]
model_size = ["tiny", "base", "small", "medium", "large-v2"]


def start_transcribe(lang_choice: int, model_size_choice: int, progress):
    print(
        f"Starting transcribing with model size {model_size[model_size_choice]} for language {model_lang_list[lang_choice]}")

    model = WhisperModel(model_size[model_size_choice])
    _, speaker_groups = load_groups_json()

    subtitle_txt_list = []
    transcribe_txt_list = []
    simple_transcribe_txt_list = []
    for speaker, _ in zip(speaker_groups, progress.tqdm(speaker_groups, desc=current_ui_lang["progress_transcribing_audio"])):
        # Transcribe and save temp file
        audiof = f"{speaker}.wav"

        segments, _ = model.transcribe(
            audio=audiof, language=model_lang_list[lang_choice], word_timestamps=True)
        segments_list = list(segments)

        speaker_txt_list = []
        shift = speaker_groups[speaker][0] + 1
        print(f"Starting point: {shift}s or {time_str_subtitle(shift)}")

        name = str(speaker)[:10]
        for segment in segments_list:
            start = time_str_subtitle(segment.start + shift)
            end = time_str_subtitle(segment.end + shift)

            segment_txt = segment.text
            speaker_txt_list.append(segment_txt)

            subtitle = f"{len(subtitle_txt_list) + 1}\n{start} --> {end}\n[{name}] {segment_txt}\n\n"
            subtitle_txt_list.append(subtitle)

        speaker_txt = " ".join(speaker_txt_list)
        transcribe_txt_list.append(
            f"[{time_str(shift)}]\n[{name}] {speaker_txt}\n")
        simple_transcribe_txt_list.append(f"{speaker_txt}\n")

    # Write simple transcribe for summary later
    text_file_tool.write_simple_transcribe_file(
        simple_transcribe_txt_list)
    # Write to base as main
    text_file_tool.write_transcribe_subtitle_file(
        transcribe_txt_list, subtitle_txt_list, False)
    # Write to adjusted as fallout
    text_file_tool.write_transcribe_subtitle_file(
        transcribe_txt_list, subtitle_txt_list, True)


def time_str_subtitle(t):
    return '{0:02d}:{1:02d}:{2:06.3f}'.format(round(t // 3600),
                                              round(t % 3600 // 60),
                                              t % 60)


def time_str(t):
    return '{0:02d}:{1:02d}:{2:02d}'.format(round(t // 3600),
                                            round(t % 3600 // 60),
                                            round(t % 60))