File size: 1,411 Bytes
4aaddae
 
 
 
 
 
 
 
b3d64cd
 
4aaddae
 
 
b3d64cd
4aaddae
 
 
 
b3d64cd
 
 
 
 
 
e58cf49
 
 
 
 
 
 
 
 
 
 
 
b3d64cd
 
 
 
 
e58cf49
 
 
 
b3d64cd
e58cf49
4aaddae
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
---
tags:
  - pyannote
  - pyannote-audio
  - pyannote-audio-pipeline
---

```python

# load pretrained pipeline
from pyannote.audio import Pipeline
pipeline = Pipeline.from_pretrained('hbredin/utter-project-diarization')

# send it to MPS device (on Apple Silicon)
import torch
mps = torch.device('mps')
pipeline.to(mps)

# apply it on sample file
from pyannote.audio.sample import SAMPLE_FILE
diarization = pipeline(SAMPLE_FILE)

# print output
print(diarization)
# [ 00:00:06.730 -->  00:00:07.185] A 1
# [ 00:00:07.590 -->  00:00:08.316] B 1
# [ 00:00:08.316 -->  00:00:09.852] C speaker90
# [ 00:00:09.852 -->  00:00:09.902] D 1
# [ 00:00:09.902 -->  00:00:10.982] E speaker91
# [ 00:00:10.459 -->  00:00:10.527] F 1
# [ 00:00:10.527 -->  00:00:14.729] G speaker90
# [ 00:00:14.307 -->  00:00:17.884] H speaker91
# [ 00:00:18.019 -->  00:00:21.495] I 3
# [ 00:00:18.239 -->  00:00:18.374] J speaker91
# [ 00:00:21.765 -->  00:00:28.515] K speaker91
# [ 00:00:27.824 -->  00:00:29.984] L speaker90

# compute diarization error rate
from pyannote.metrics.diarization import DiarizationErrorRate
metric = DiarizationErrorRate()
metric(SAMPLE_FILE['annotation'], diarization, detailed=True)
# {'missed detection': 0.6146562499999995,
#  'correct': 19.108875,
#  'false alarm': 0.48028125,
#  'confusion': 4.626468749999998,
#  'total': 24.349999999999998,
#  'diarization error rate': 0.23496534907597527}
```