Spaces:
Runtime error
Runtime error
File size: 2,192 Bytes
e2d8d82 931df81 e2d8d82 931df81 cb85517 32e4ded cb85517 931df81 3130060 931df81 3130060 32e4ded 931df81 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
from faster_whisper import WhisperModel
import torch
import gc
import json
gc.collect()
torch.cuda.empty_cache()
model = WhisperModel("medium", device="cuda", compute_type="int8_float16")
def start_transcribe(progress):
_, speaker_groups = load_groups_json()
for speaker, _ in zip(speaker_groups, progress.tqdm(speaker_groups, desc="Processing diarization")):
# Transcribe and save temp file
audiof = f"{speaker}.wav"
print(f"Loading {audiof}")
segments, _ = model.transcribe(
audio=audiof, language='id', word_timestamps=True)
segments_list = list(segments)
text_list_to_print = []
for segment in segments_list:
start = timeStr(segment['start'])
end = timeStr(segment['end'])
name = str(speaker)[:10]
text = segment["text"]
subtitle_txt = f"{len(subtitle_txt) + 1}\n{start} --> {end}\n[{name}] {text}\n\n"
# Appending subtitle txt for each segment
with open("subtitle.srt", "a") as file:
file.writelines(subtitle_txt)
# Appending text for each segment to print
text_list_to_print.append(text)
# Print full text for each speaker turn
text = "\n".join(text_list_to_print)
print(text)
# Create transcribe per speaker
with open(f"{speaker}.json", "w") as text_file:
json.dump(segments_list, text_file, indent=4)
# Append to complete transcribe file
with open("transcribe.txt", "a") as file:
file.write(f"[{name}] {text}\n")
return ["subtitle.srt", "transcribe.txt"]
def timeStr(t):
return '{0:02d}:{1:02d}:{2:06.2f}'.format(round(t // 3600),
round(t % 3600 // 60),
t % 60)
def load_groups_json():
with open("sample_groups.json", "r") as json_file_sample:
sample_groups_list: list = json.load(json_file_sample)
with open("speaker_groups.json", "r") as json_file_speaker:
speaker_groups_dict: dict = json.load(json_file_speaker)
return sample_groups_list, speaker_groups_dict
|