from faster_whisper import WhisperModel import torch import gc import json gc.collect() torch.cuda.empty_cache() model = WhisperModel("medium", device="cuda", compute_type="int8_float16") def start_transcribe(progress): _, speaker_groups = load_groups_json() subtitle_txt = [] for speaker, _ in zip(speaker_groups, progress.tqdm(speaker_groups, desc="Transcribing")): # Transcribe and save temp file audiof = f"{speaker}.wav" print(f"Loading {audiof}") segments, _ = model.transcribe( audio=audiof, language='id', word_timestamps=True) segments_list = list(segments) text_list_to_print = [] for segment in segments_list: start = timeStr(segment.start) end = timeStr(segment.end) name = str(speaker)[:10] text = segment.text subtitle_txt.append( f"{len(subtitle_txt) + 1}\n{start} --> {end}\n[{name}] {text}\n\n") # Appending text for each segment to print text_list_to_print.append(text) # Print full text for each speaker turn text = "\n".join(text_list_to_print) print(text) # Append to complete transcribe file with open("transcribe.txt", "a") as file: file.write(f"[{name}] {text}\n") # Appending subtitle txt for each segment with open("subtitle.srt", "w") as file: file.writelines(subtitle_txt) return ["transcribe.txt", "subtitle.srt"] def timeStr(t): return '{0:02d}:{1:02d}:{2:06.2f}'.format(round(t // 3600), round(t % 3600 // 60), t % 60) def load_groups_json(): with open("sample_groups.json", "r") as json_file_sample: sample_groups_list: list = json.load(json_file_sample) with open("speaker_groups.json", "r") as json_file_speaker: speaker_groups_dict: dict = json.load(json_file_speaker) return sample_groups_list, speaker_groups_dict