File size: 341 Bytes
429df62
 
bdaf5fc
429df62
 
bdaf5fc
4f38470
bdaf5fc
 
 
429df62
1
2
3
4
5
6
7
8
9
10
11
12
import soundfile as sf
import torch
from kotoba_whisper import SpeakerDiarization


pipeline = SpeakerDiarization(device=torch.device("cpu"))
a, sr = sf.read("sample_diarization_japanese.mp3")
output = pipeline(a.T, sampling_rate=sr)
output = {s: [[i.start, i.end] for i in output.label_timeline(s)] for s in output.labels()}
print(output)