from faster_whisper import WhisperModel
import torch
import gc
import json

gc.collect()
torch.cuda.empty_cache()

model = WhisperModel("medium", device="cuda", compute_type="int8_float16")


def start_transcribe(progress):
    _, speaker_groups = load_groups_json()
    for speaker, _ in zip(speaker_groups, progress.tqdm(speaker_groups, desc="Processing diarization")):
        # Transcribe and save temp file
        audiof = f"{speaker}.wav"
        print(f"Loading {audiof}")
        result = model.transcribe(
            audio=audiof, language='id', word_timestamps=True)
        with open(f"{speaker}.json", "w") as text_file:
            json.dump(result, text_file, indent=4)
        return result['text']


def load_groups_json():
    with open("sample_groups.json", "r") as json_file_sample:
        sample_groups_list: list = json.load(json_file_sample)
    with open("speaker_groups.json", "r") as json_file_speaker:
        speaker_groups_dict: dict = json.load(json_file_speaker)
    return sample_groups_list, speaker_groups_dict