README.md · hbredin/utter-project-diarization at 12d57f46daf30adf865f7efa8592198e1fe0725c

metadata

tags:
  - pyannote
  - pyannote-audio
  - pyannote-audio-pipeline


# load pretrained pipeline
from pyannote.audio import Pipeline
pipeline = Pipeline.from_pretrained('hbredin/utter-project-diarization')

# send it to MPS device (on Apple Silicon)
import torch
mps = torch.device('mps')
pipeline.to(mps)

# apply it on sample file
from pyannote.audio.sample import SAMPLE_FILE
diarization = pipeline(SAMPLE_FILE)

# print output
print(diarization)
# [ 00:00:06.730 -->  00:00:06.747] A speaker90
# [ 00:00:06.747 -->  00:00:07.169] B speaker91
# [ 00:00:07.169 -->  00:00:07.185] C speaker90
# [ 00:00:07.590 -->  00:00:07.624] D speaker90
# [ 00:00:07.624 -->  00:00:08.029] E speaker91
# [ 00:00:08.029 -->  00:00:09.970] F speaker90
# [ 00:00:09.970 -->  00:00:10.982] G speaker91
# [ 00:00:10.459 -->  00:00:14.729] H speaker90
# [ 00:00:14.307 -->  00:00:17.884] I speaker91
# [ 00:00:18.019 -->  00:00:21.512] J 2
# [ 00:00:18.188 -->  00:00:18.407] K speaker91
# [ 00:00:21.765 -->  00:00:28.499] L speaker91
# [ 00:00:27.824 -->  00:00:29.967] M 2

# compute diarization error rate
from pyannote.metrics.diarization import DiarizationErrorRate
metric = DiarizationErrorRate()
metric(SAMPLE_FILE['annotation'], diarization, detailed=True)
# {'confusion': 6.2540312500000015,
#  'missed detection': 0.5480625000000003,
#  'correct': 17.547906249999997,
#  'false alarm': 0.4811874999999999,
#  'total': 24.349999999999998,
#  'diarization error rate': 0.2991080595482547}